Commit 2122ff5eab8faec853e43f6de886e8dc8f31e317

Authored by Avi Kivity
1 parent 1683b2416e

KVM: move vcpu locking to dispatcher for generic vcpu ioctls

All vcpu ioctls need to be locked, so instead of locking each one specifically
we lock at the generic dispatcher.

This patch only updates generic ioctls and leaves arch specific ioctls alone.

Signed-off-by: Avi Kivity <avi@redhat.com>

Showing 7 changed files with 17 additions and 95 deletions Inline Diff

arch/ia64/kvm/kvm-ia64.c
1 /* 1 /*
2 * kvm_ia64.c: Basic KVM suppport On Itanium series processors 2 * kvm_ia64.c: Basic KVM suppport On Itanium series processors
3 * 3 *
4 * 4 *
5 * Copyright (C) 2007, Intel Corporation. 5 * Copyright (C) 2007, Intel Corporation.
6 * Xiantao Zhang (xiantao.zhang@intel.com) 6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation. 10 * version 2, as published by the Free Software Foundation.
11 * 11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT 12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details. 15 * more details.
16 * 16 *
17 * You should have received a copy of the GNU General Public License along with 17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA. 19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 * 20 *
21 */ 21 */
22 22
23 #include <linux/module.h> 23 #include <linux/module.h>
24 #include <linux/errno.h> 24 #include <linux/errno.h>
25 #include <linux/percpu.h> 25 #include <linux/percpu.h>
26 #include <linux/fs.h> 26 #include <linux/fs.h>
27 #include <linux/slab.h> 27 #include <linux/slab.h>
28 #include <linux/smp.h> 28 #include <linux/smp.h>
29 #include <linux/kvm_host.h> 29 #include <linux/kvm_host.h>
30 #include <linux/kvm.h> 30 #include <linux/kvm.h>
31 #include <linux/bitops.h> 31 #include <linux/bitops.h>
32 #include <linux/hrtimer.h> 32 #include <linux/hrtimer.h>
33 #include <linux/uaccess.h> 33 #include <linux/uaccess.h>
34 #include <linux/iommu.h> 34 #include <linux/iommu.h>
35 #include <linux/intel-iommu.h> 35 #include <linux/intel-iommu.h>
36 36
37 #include <asm/pgtable.h> 37 #include <asm/pgtable.h>
38 #include <asm/gcc_intrin.h> 38 #include <asm/gcc_intrin.h>
39 #include <asm/pal.h> 39 #include <asm/pal.h>
40 #include <asm/cacheflush.h> 40 #include <asm/cacheflush.h>
41 #include <asm/div64.h> 41 #include <asm/div64.h>
42 #include <asm/tlb.h> 42 #include <asm/tlb.h>
43 #include <asm/elf.h> 43 #include <asm/elf.h>
44 #include <asm/sn/addrs.h> 44 #include <asm/sn/addrs.h>
45 #include <asm/sn/clksupport.h> 45 #include <asm/sn/clksupport.h>
46 #include <asm/sn/shub_mmr.h> 46 #include <asm/sn/shub_mmr.h>
47 47
48 #include "misc.h" 48 #include "misc.h"
49 #include "vti.h" 49 #include "vti.h"
50 #include "iodev.h" 50 #include "iodev.h"
51 #include "ioapic.h" 51 #include "ioapic.h"
52 #include "lapic.h" 52 #include "lapic.h"
53 #include "irq.h" 53 #include "irq.h"
54 54
55 static unsigned long kvm_vmm_base; 55 static unsigned long kvm_vmm_base;
56 static unsigned long kvm_vsa_base; 56 static unsigned long kvm_vsa_base;
57 static unsigned long kvm_vm_buffer; 57 static unsigned long kvm_vm_buffer;
58 static unsigned long kvm_vm_buffer_size; 58 static unsigned long kvm_vm_buffer_size;
59 unsigned long kvm_vmm_gp; 59 unsigned long kvm_vmm_gp;
60 60
61 static long vp_env_info; 61 static long vp_env_info;
62 62
63 static struct kvm_vmm_info *kvm_vmm_info; 63 static struct kvm_vmm_info *kvm_vmm_info;
64 64
65 static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); 65 static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
66 66
67 struct kvm_stats_debugfs_item debugfs_entries[] = { 67 struct kvm_stats_debugfs_item debugfs_entries[] = {
68 { NULL } 68 { NULL }
69 }; 69 };
70 70
71 static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu) 71 static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
72 { 72 {
73 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) 73 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
74 if (vcpu->kvm->arch.is_sn2) 74 if (vcpu->kvm->arch.is_sn2)
75 return rtc_time(); 75 return rtc_time();
76 else 76 else
77 #endif 77 #endif
78 return ia64_getreg(_IA64_REG_AR_ITC); 78 return ia64_getreg(_IA64_REG_AR_ITC);
79 } 79 }
80 80
81 static void kvm_flush_icache(unsigned long start, unsigned long len) 81 static void kvm_flush_icache(unsigned long start, unsigned long len)
82 { 82 {
83 int l; 83 int l;
84 84
85 for (l = 0; l < (len + 32); l += 32) 85 for (l = 0; l < (len + 32); l += 32)
86 ia64_fc((void *)(start + l)); 86 ia64_fc((void *)(start + l));
87 87
88 ia64_sync_i(); 88 ia64_sync_i();
89 ia64_srlz_i(); 89 ia64_srlz_i();
90 } 90 }
91 91
92 static void kvm_flush_tlb_all(void) 92 static void kvm_flush_tlb_all(void)
93 { 93 {
94 unsigned long i, j, count0, count1, stride0, stride1, addr; 94 unsigned long i, j, count0, count1, stride0, stride1, addr;
95 long flags; 95 long flags;
96 96
97 addr = local_cpu_data->ptce_base; 97 addr = local_cpu_data->ptce_base;
98 count0 = local_cpu_data->ptce_count[0]; 98 count0 = local_cpu_data->ptce_count[0];
99 count1 = local_cpu_data->ptce_count[1]; 99 count1 = local_cpu_data->ptce_count[1];
100 stride0 = local_cpu_data->ptce_stride[0]; 100 stride0 = local_cpu_data->ptce_stride[0];
101 stride1 = local_cpu_data->ptce_stride[1]; 101 stride1 = local_cpu_data->ptce_stride[1];
102 102
103 local_irq_save(flags); 103 local_irq_save(flags);
104 for (i = 0; i < count0; ++i) { 104 for (i = 0; i < count0; ++i) {
105 for (j = 0; j < count1; ++j) { 105 for (j = 0; j < count1; ++j) {
106 ia64_ptce(addr); 106 ia64_ptce(addr);
107 addr += stride1; 107 addr += stride1;
108 } 108 }
109 addr += stride0; 109 addr += stride0;
110 } 110 }
111 local_irq_restore(flags); 111 local_irq_restore(flags);
112 ia64_srlz_i(); /* srlz.i implies srlz.d */ 112 ia64_srlz_i(); /* srlz.i implies srlz.d */
113 } 113 }
114 114
115 long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) 115 long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
116 { 116 {
117 struct ia64_pal_retval iprv; 117 struct ia64_pal_retval iprv;
118 118
119 PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, 119 PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
120 (u64)opt_handler); 120 (u64)opt_handler);
121 121
122 return iprv.status; 122 return iprv.status;
123 } 123 }
124 124
125 static DEFINE_SPINLOCK(vp_lock); 125 static DEFINE_SPINLOCK(vp_lock);
126 126
127 int kvm_arch_hardware_enable(void *garbage) 127 int kvm_arch_hardware_enable(void *garbage)
128 { 128 {
129 long status; 129 long status;
130 long tmp_base; 130 long tmp_base;
131 unsigned long pte; 131 unsigned long pte;
132 unsigned long saved_psr; 132 unsigned long saved_psr;
133 int slot; 133 int slot;
134 134
135 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); 135 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
136 local_irq_save(saved_psr); 136 local_irq_save(saved_psr);
137 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); 137 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
138 local_irq_restore(saved_psr); 138 local_irq_restore(saved_psr);
139 if (slot < 0) 139 if (slot < 0)
140 return -EINVAL; 140 return -EINVAL;
141 141
142 spin_lock(&vp_lock); 142 spin_lock(&vp_lock);
143 status = ia64_pal_vp_init_env(kvm_vsa_base ? 143 status = ia64_pal_vp_init_env(kvm_vsa_base ?
144 VP_INIT_ENV : VP_INIT_ENV_INITALIZE, 144 VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
145 __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); 145 __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
146 if (status != 0) { 146 if (status != 0) {
147 spin_unlock(&vp_lock); 147 spin_unlock(&vp_lock);
148 printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); 148 printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
149 return -EINVAL; 149 return -EINVAL;
150 } 150 }
151 151
152 if (!kvm_vsa_base) { 152 if (!kvm_vsa_base) {
153 kvm_vsa_base = tmp_base; 153 kvm_vsa_base = tmp_base;
154 printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); 154 printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
155 } 155 }
156 spin_unlock(&vp_lock); 156 spin_unlock(&vp_lock);
157 ia64_ptr_entry(0x3, slot); 157 ia64_ptr_entry(0x3, slot);
158 158
159 return 0; 159 return 0;
160 } 160 }
161 161
162 void kvm_arch_hardware_disable(void *garbage) 162 void kvm_arch_hardware_disable(void *garbage)
163 { 163 {
164 164
165 long status; 165 long status;
166 int slot; 166 int slot;
167 unsigned long pte; 167 unsigned long pte;
168 unsigned long saved_psr; 168 unsigned long saved_psr;
169 unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); 169 unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
170 170
171 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), 171 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
172 PAGE_KERNEL)); 172 PAGE_KERNEL));
173 173
174 local_irq_save(saved_psr); 174 local_irq_save(saved_psr);
175 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); 175 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
176 local_irq_restore(saved_psr); 176 local_irq_restore(saved_psr);
177 if (slot < 0) 177 if (slot < 0)
178 return; 178 return;
179 179
180 status = ia64_pal_vp_exit_env(host_iva); 180 status = ia64_pal_vp_exit_env(host_iva);
181 if (status) 181 if (status)
182 printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", 182 printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
183 status); 183 status);
184 ia64_ptr_entry(0x3, slot); 184 ia64_ptr_entry(0x3, slot);
185 } 185 }
186 186
187 void kvm_arch_check_processor_compat(void *rtn) 187 void kvm_arch_check_processor_compat(void *rtn)
188 { 188 {
189 *(int *)rtn = 0; 189 *(int *)rtn = 0;
190 } 190 }
191 191
192 int kvm_dev_ioctl_check_extension(long ext) 192 int kvm_dev_ioctl_check_extension(long ext)
193 { 193 {
194 194
195 int r; 195 int r;
196 196
197 switch (ext) { 197 switch (ext) {
198 case KVM_CAP_IRQCHIP: 198 case KVM_CAP_IRQCHIP:
199 case KVM_CAP_MP_STATE: 199 case KVM_CAP_MP_STATE:
200 case KVM_CAP_IRQ_INJECT_STATUS: 200 case KVM_CAP_IRQ_INJECT_STATUS:
201 r = 1; 201 r = 1;
202 break; 202 break;
203 case KVM_CAP_COALESCED_MMIO: 203 case KVM_CAP_COALESCED_MMIO:
204 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 204 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
205 break; 205 break;
206 case KVM_CAP_IOMMU: 206 case KVM_CAP_IOMMU:
207 r = iommu_found(); 207 r = iommu_found();
208 break; 208 break;
209 default: 209 default:
210 r = 0; 210 r = 0;
211 } 211 }
212 return r; 212 return r;
213 213
214 } 214 }
215 215
216 static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 216 static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
217 { 217 {
218 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 218 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
219 kvm_run->hw.hardware_exit_reason = 1; 219 kvm_run->hw.hardware_exit_reason = 1;
220 return 0; 220 return 0;
221 } 221 }
222 222
223 static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 223 static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
224 { 224 {
225 struct kvm_mmio_req *p; 225 struct kvm_mmio_req *p;
226 struct kvm_io_device *mmio_dev; 226 struct kvm_io_device *mmio_dev;
227 int r; 227 int r;
228 228
229 p = kvm_get_vcpu_ioreq(vcpu); 229 p = kvm_get_vcpu_ioreq(vcpu);
230 230
231 if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) 231 if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
232 goto mmio; 232 goto mmio;
233 vcpu->mmio_needed = 1; 233 vcpu->mmio_needed = 1;
234 vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; 234 vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
235 vcpu->mmio_size = kvm_run->mmio.len = p->size; 235 vcpu->mmio_size = kvm_run->mmio.len = p->size;
236 vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; 236 vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
237 237
238 if (vcpu->mmio_is_write) 238 if (vcpu->mmio_is_write)
239 memcpy(vcpu->mmio_data, &p->data, p->size); 239 memcpy(vcpu->mmio_data, &p->data, p->size);
240 memcpy(kvm_run->mmio.data, &p->data, p->size); 240 memcpy(kvm_run->mmio.data, &p->data, p->size);
241 kvm_run->exit_reason = KVM_EXIT_MMIO; 241 kvm_run->exit_reason = KVM_EXIT_MMIO;
242 return 0; 242 return 0;
243 mmio: 243 mmio:
244 if (p->dir) 244 if (p->dir)
245 r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr, 245 r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
246 p->size, &p->data); 246 p->size, &p->data);
247 else 247 else
248 r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr, 248 r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
249 p->size, &p->data); 249 p->size, &p->data);
250 if (r) 250 if (r)
251 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); 251 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
252 p->state = STATE_IORESP_READY; 252 p->state = STATE_IORESP_READY;
253 253
254 return 1; 254 return 1;
255 } 255 }
256 256
257 static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 257 static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
258 { 258 {
259 struct exit_ctl_data *p; 259 struct exit_ctl_data *p;
260 260
261 p = kvm_get_exit_data(vcpu); 261 p = kvm_get_exit_data(vcpu);
262 262
263 if (p->exit_reason == EXIT_REASON_PAL_CALL) 263 if (p->exit_reason == EXIT_REASON_PAL_CALL)
264 return kvm_pal_emul(vcpu, kvm_run); 264 return kvm_pal_emul(vcpu, kvm_run);
265 else { 265 else {
266 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 266 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
267 kvm_run->hw.hardware_exit_reason = 2; 267 kvm_run->hw.hardware_exit_reason = 2;
268 return 0; 268 return 0;
269 } 269 }
270 } 270 }
271 271
272 static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 272 static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
273 { 273 {
274 struct exit_ctl_data *p; 274 struct exit_ctl_data *p;
275 275
276 p = kvm_get_exit_data(vcpu); 276 p = kvm_get_exit_data(vcpu);
277 277
278 if (p->exit_reason == EXIT_REASON_SAL_CALL) { 278 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
279 kvm_sal_emul(vcpu); 279 kvm_sal_emul(vcpu);
280 return 1; 280 return 1;
281 } else { 281 } else {
282 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 282 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
283 kvm_run->hw.hardware_exit_reason = 3; 283 kvm_run->hw.hardware_exit_reason = 3;
284 return 0; 284 return 0;
285 } 285 }
286 286
287 } 287 }
288 288
289 static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector) 289 static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
290 { 290 {
291 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 291 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
292 292
293 if (!test_and_set_bit(vector, &vpd->irr[0])) { 293 if (!test_and_set_bit(vector, &vpd->irr[0])) {
294 vcpu->arch.irq_new_pending = 1; 294 vcpu->arch.irq_new_pending = 1;
295 kvm_vcpu_kick(vcpu); 295 kvm_vcpu_kick(vcpu);
296 return 1; 296 return 1;
297 } 297 }
298 return 0; 298 return 0;
299 } 299 }
300 300
301 /* 301 /*
302 * offset: address offset to IPI space. 302 * offset: address offset to IPI space.
303 * value: deliver value. 303 * value: deliver value.
304 */ 304 */
305 static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, 305 static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
306 uint64_t vector) 306 uint64_t vector)
307 { 307 {
308 switch (dm) { 308 switch (dm) {
309 case SAPIC_FIXED: 309 case SAPIC_FIXED:
310 break; 310 break;
311 case SAPIC_NMI: 311 case SAPIC_NMI:
312 vector = 2; 312 vector = 2;
313 break; 313 break;
314 case SAPIC_EXTINT: 314 case SAPIC_EXTINT:
315 vector = 0; 315 vector = 0;
316 break; 316 break;
317 case SAPIC_INIT: 317 case SAPIC_INIT:
318 case SAPIC_PMI: 318 case SAPIC_PMI:
319 default: 319 default:
320 printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); 320 printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
321 return; 321 return;
322 } 322 }
323 __apic_accept_irq(vcpu, vector); 323 __apic_accept_irq(vcpu, vector);
324 } 324 }
325 325
326 static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, 326 static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
327 unsigned long eid) 327 unsigned long eid)
328 { 328 {
329 union ia64_lid lid; 329 union ia64_lid lid;
330 int i; 330 int i;
331 struct kvm_vcpu *vcpu; 331 struct kvm_vcpu *vcpu;
332 332
333 kvm_for_each_vcpu(i, vcpu, kvm) { 333 kvm_for_each_vcpu(i, vcpu, kvm) {
334 lid.val = VCPU_LID(vcpu); 334 lid.val = VCPU_LID(vcpu);
335 if (lid.id == id && lid.eid == eid) 335 if (lid.id == id && lid.eid == eid)
336 return vcpu; 336 return vcpu;
337 } 337 }
338 338
339 return NULL; 339 return NULL;
340 } 340 }
341 341
342 static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 342 static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
343 { 343 {
344 struct exit_ctl_data *p = kvm_get_exit_data(vcpu); 344 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
345 struct kvm_vcpu *target_vcpu; 345 struct kvm_vcpu *target_vcpu;
346 struct kvm_pt_regs *regs; 346 struct kvm_pt_regs *regs;
347 union ia64_ipi_a addr = p->u.ipi_data.addr; 347 union ia64_ipi_a addr = p->u.ipi_data.addr;
348 union ia64_ipi_d data = p->u.ipi_data.data; 348 union ia64_ipi_d data = p->u.ipi_data.data;
349 349
350 target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); 350 target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
351 if (!target_vcpu) 351 if (!target_vcpu)
352 return handle_vm_error(vcpu, kvm_run); 352 return handle_vm_error(vcpu, kvm_run);
353 353
354 if (!target_vcpu->arch.launched) { 354 if (!target_vcpu->arch.launched) {
355 regs = vcpu_regs(target_vcpu); 355 regs = vcpu_regs(target_vcpu);
356 356
357 regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; 357 regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
358 regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; 358 regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
359 359
360 target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 360 target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
361 if (waitqueue_active(&target_vcpu->wq)) 361 if (waitqueue_active(&target_vcpu->wq))
362 wake_up_interruptible(&target_vcpu->wq); 362 wake_up_interruptible(&target_vcpu->wq);
363 } else { 363 } else {
364 vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); 364 vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
365 if (target_vcpu != vcpu) 365 if (target_vcpu != vcpu)
366 kvm_vcpu_kick(target_vcpu); 366 kvm_vcpu_kick(target_vcpu);
367 } 367 }
368 368
369 return 1; 369 return 1;
370 } 370 }
371 371
372 struct call_data { 372 struct call_data {
373 struct kvm_ptc_g ptc_g_data; 373 struct kvm_ptc_g ptc_g_data;
374 struct kvm_vcpu *vcpu; 374 struct kvm_vcpu *vcpu;
375 }; 375 };
376 376
377 static void vcpu_global_purge(void *info) 377 static void vcpu_global_purge(void *info)
378 { 378 {
379 struct call_data *p = (struct call_data *)info; 379 struct call_data *p = (struct call_data *)info;
380 struct kvm_vcpu *vcpu = p->vcpu; 380 struct kvm_vcpu *vcpu = p->vcpu;
381 381
382 if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 382 if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
383 return; 383 return;
384 384
385 set_bit(KVM_REQ_PTC_G, &vcpu->requests); 385 set_bit(KVM_REQ_PTC_G, &vcpu->requests);
386 if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { 386 if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
387 vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = 387 vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
388 p->ptc_g_data; 388 p->ptc_g_data;
389 } else { 389 } else {
390 clear_bit(KVM_REQ_PTC_G, &vcpu->requests); 390 clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
391 vcpu->arch.ptc_g_count = 0; 391 vcpu->arch.ptc_g_count = 0;
392 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); 392 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
393 } 393 }
394 } 394 }
395 395
396 static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 396 static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
397 { 397 {
398 struct exit_ctl_data *p = kvm_get_exit_data(vcpu); 398 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
399 struct kvm *kvm = vcpu->kvm; 399 struct kvm *kvm = vcpu->kvm;
400 struct call_data call_data; 400 struct call_data call_data;
401 int i; 401 int i;
402 struct kvm_vcpu *vcpui; 402 struct kvm_vcpu *vcpui;
403 403
404 call_data.ptc_g_data = p->u.ptc_g_data; 404 call_data.ptc_g_data = p->u.ptc_g_data;
405 405
406 kvm_for_each_vcpu(i, vcpui, kvm) { 406 kvm_for_each_vcpu(i, vcpui, kvm) {
407 if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED || 407 if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
408 vcpu == vcpui) 408 vcpu == vcpui)
409 continue; 409 continue;
410 410
411 if (waitqueue_active(&vcpui->wq)) 411 if (waitqueue_active(&vcpui->wq))
412 wake_up_interruptible(&vcpui->wq); 412 wake_up_interruptible(&vcpui->wq);
413 413
414 if (vcpui->cpu != -1) { 414 if (vcpui->cpu != -1) {
415 call_data.vcpu = vcpui; 415 call_data.vcpu = vcpui;
416 smp_call_function_single(vcpui->cpu, 416 smp_call_function_single(vcpui->cpu,
417 vcpu_global_purge, &call_data, 1); 417 vcpu_global_purge, &call_data, 1);
418 } else 418 } else
419 printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); 419 printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
420 420
421 } 421 }
422 return 1; 422 return 1;
423 } 423 }
424 424
425 static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 425 static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
426 { 426 {
427 return 1; 427 return 1;
428 } 428 }
429 429
430 static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu) 430 static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
431 { 431 {
432 unsigned long pte, rtc_phys_addr, map_addr; 432 unsigned long pte, rtc_phys_addr, map_addr;
433 int slot; 433 int slot;
434 434
435 map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT); 435 map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
436 rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC; 436 rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
437 pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC)); 437 pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
438 slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT); 438 slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
439 vcpu->arch.sn_rtc_tr_slot = slot; 439 vcpu->arch.sn_rtc_tr_slot = slot;
440 if (slot < 0) { 440 if (slot < 0) {
441 printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n"); 441 printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
442 slot = 0; 442 slot = 0;
443 } 443 }
444 return slot; 444 return slot;
445 } 445 }
446 446
447 int kvm_emulate_halt(struct kvm_vcpu *vcpu) 447 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
448 { 448 {
449 449
450 ktime_t kt; 450 ktime_t kt;
451 long itc_diff; 451 long itc_diff;
452 unsigned long vcpu_now_itc; 452 unsigned long vcpu_now_itc;
453 unsigned long expires; 453 unsigned long expires;
454 struct hrtimer *p_ht = &vcpu->arch.hlt_timer; 454 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
455 unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; 455 unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
456 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 456 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
457 457
458 if (irqchip_in_kernel(vcpu->kvm)) { 458 if (irqchip_in_kernel(vcpu->kvm)) {
459 459
460 vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset; 460 vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
461 461
462 if (time_after(vcpu_now_itc, vpd->itm)) { 462 if (time_after(vcpu_now_itc, vpd->itm)) {
463 vcpu->arch.timer_check = 1; 463 vcpu->arch.timer_check = 1;
464 return 1; 464 return 1;
465 } 465 }
466 itc_diff = vpd->itm - vcpu_now_itc; 466 itc_diff = vpd->itm - vcpu_now_itc;
467 if (itc_diff < 0) 467 if (itc_diff < 0)
468 itc_diff = -itc_diff; 468 itc_diff = -itc_diff;
469 469
470 expires = div64_u64(itc_diff, cyc_per_usec); 470 expires = div64_u64(itc_diff, cyc_per_usec);
471 kt = ktime_set(0, 1000 * expires); 471 kt = ktime_set(0, 1000 * expires);
472 472
473 vcpu->arch.ht_active = 1; 473 vcpu->arch.ht_active = 1;
474 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); 474 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
475 475
476 vcpu->arch.mp_state = KVM_MP_STATE_HALTED; 476 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
477 kvm_vcpu_block(vcpu); 477 kvm_vcpu_block(vcpu);
478 hrtimer_cancel(p_ht); 478 hrtimer_cancel(p_ht);
479 vcpu->arch.ht_active = 0; 479 vcpu->arch.ht_active = 0;
480 480
481 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) || 481 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
482 kvm_cpu_has_pending_timer(vcpu)) 482 kvm_cpu_has_pending_timer(vcpu))
483 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 483 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
484 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 484 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
485 485
486 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 486 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
487 return -EINTR; 487 return -EINTR;
488 return 1; 488 return 1;
489 } else { 489 } else {
490 printk(KERN_ERR"kvm: Unsupported userspace halt!"); 490 printk(KERN_ERR"kvm: Unsupported userspace halt!");
491 return 0; 491 return 0;
492 } 492 }
493 } 493 }
494 494
495 static int handle_vm_shutdown(struct kvm_vcpu *vcpu, 495 static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
496 struct kvm_run *kvm_run) 496 struct kvm_run *kvm_run)
497 { 497 {
498 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 498 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
499 return 0; 499 return 0;
500 } 500 }
501 501
502 static int handle_external_interrupt(struct kvm_vcpu *vcpu, 502 static int handle_external_interrupt(struct kvm_vcpu *vcpu,
503 struct kvm_run *kvm_run) 503 struct kvm_run *kvm_run)
504 { 504 {
505 return 1; 505 return 1;
506 } 506 }
507 507
508 static int handle_vcpu_debug(struct kvm_vcpu *vcpu, 508 static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
509 struct kvm_run *kvm_run) 509 struct kvm_run *kvm_run)
510 { 510 {
511 printk("VMM: %s", vcpu->arch.log_buf); 511 printk("VMM: %s", vcpu->arch.log_buf);
512 return 1; 512 return 1;
513 } 513 }
514 514
515 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, 515 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
516 struct kvm_run *kvm_run) = { 516 struct kvm_run *kvm_run) = {
517 [EXIT_REASON_VM_PANIC] = handle_vm_error, 517 [EXIT_REASON_VM_PANIC] = handle_vm_error,
518 [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, 518 [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio,
519 [EXIT_REASON_PAL_CALL] = handle_pal_call, 519 [EXIT_REASON_PAL_CALL] = handle_pal_call,
520 [EXIT_REASON_SAL_CALL] = handle_sal_call, 520 [EXIT_REASON_SAL_CALL] = handle_sal_call,
521 [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, 521 [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6,
522 [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, 522 [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown,
523 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 523 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
524 [EXIT_REASON_IPI] = handle_ipi, 524 [EXIT_REASON_IPI] = handle_ipi,
525 [EXIT_REASON_PTC_G] = handle_global_purge, 525 [EXIT_REASON_PTC_G] = handle_global_purge,
526 [EXIT_REASON_DEBUG] = handle_vcpu_debug, 526 [EXIT_REASON_DEBUG] = handle_vcpu_debug,
527 527
528 }; 528 };
529 529
530 static const int kvm_vti_max_exit_handlers = 530 static const int kvm_vti_max_exit_handlers =
531 sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); 531 sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
532 532
533 static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) 533 static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
534 { 534 {
535 struct exit_ctl_data *p_exit_data; 535 struct exit_ctl_data *p_exit_data;
536 536
537 p_exit_data = kvm_get_exit_data(vcpu); 537 p_exit_data = kvm_get_exit_data(vcpu);
538 return p_exit_data->exit_reason; 538 return p_exit_data->exit_reason;
539 } 539 }
540 540
541 /* 541 /*
542 * The guest has exited. See if we can fix it or if we need userspace 542 * The guest has exited. See if we can fix it or if we need userspace
543 * assistance. 543 * assistance.
544 */ 544 */
545 static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 545 static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
546 { 546 {
547 u32 exit_reason = kvm_get_exit_reason(vcpu); 547 u32 exit_reason = kvm_get_exit_reason(vcpu);
548 vcpu->arch.last_exit = exit_reason; 548 vcpu->arch.last_exit = exit_reason;
549 549
550 if (exit_reason < kvm_vti_max_exit_handlers 550 if (exit_reason < kvm_vti_max_exit_handlers
551 && kvm_vti_exit_handlers[exit_reason]) 551 && kvm_vti_exit_handlers[exit_reason])
552 return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); 552 return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
553 else { 553 else {
554 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 554 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
555 kvm_run->hw.hardware_exit_reason = exit_reason; 555 kvm_run->hw.hardware_exit_reason = exit_reason;
556 } 556 }
557 return 0; 557 return 0;
558 } 558 }
559 559
560 static inline void vti_set_rr6(unsigned long rr6) 560 static inline void vti_set_rr6(unsigned long rr6)
561 { 561 {
562 ia64_set_rr(RR6, rr6); 562 ia64_set_rr(RR6, rr6);
563 ia64_srlz_i(); 563 ia64_srlz_i();
564 } 564 }
565 565
566 static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) 566 static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
567 { 567 {
568 unsigned long pte; 568 unsigned long pte;
569 struct kvm *kvm = vcpu->kvm; 569 struct kvm *kvm = vcpu->kvm;
570 int r; 570 int r;
571 571
572 /*Insert a pair of tr to map vmm*/ 572 /*Insert a pair of tr to map vmm*/
573 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); 573 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
574 r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); 574 r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
575 if (r < 0) 575 if (r < 0)
576 goto out; 576 goto out;
577 vcpu->arch.vmm_tr_slot = r; 577 vcpu->arch.vmm_tr_slot = r;
578 /*Insert a pairt of tr to map data of vm*/ 578 /*Insert a pairt of tr to map data of vm*/
579 pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); 579 pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
580 r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, 580 r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
581 pte, KVM_VM_DATA_SHIFT); 581 pte, KVM_VM_DATA_SHIFT);
582 if (r < 0) 582 if (r < 0)
583 goto out; 583 goto out;
584 vcpu->arch.vm_tr_slot = r; 584 vcpu->arch.vm_tr_slot = r;
585 585
586 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) 586 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
587 if (kvm->arch.is_sn2) { 587 if (kvm->arch.is_sn2) {
588 r = kvm_sn2_setup_mappings(vcpu); 588 r = kvm_sn2_setup_mappings(vcpu);
589 if (r < 0) 589 if (r < 0)
590 goto out; 590 goto out;
591 } 591 }
592 #endif 592 #endif
593 593
594 r = 0; 594 r = 0;
595 out: 595 out:
596 return r; 596 return r;
597 } 597 }
598 598
599 static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) 599 static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
600 { 600 {
601 struct kvm *kvm = vcpu->kvm; 601 struct kvm *kvm = vcpu->kvm;
602 ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); 602 ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
603 ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); 603 ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
604 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) 604 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
605 if (kvm->arch.is_sn2) 605 if (kvm->arch.is_sn2)
606 ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot); 606 ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
607 #endif 607 #endif
608 } 608 }
609 609
610 static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) 610 static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
611 { 611 {
612 unsigned long psr; 612 unsigned long psr;
613 int r; 613 int r;
614 int cpu = smp_processor_id(); 614 int cpu = smp_processor_id();
615 615
616 if (vcpu->arch.last_run_cpu != cpu || 616 if (vcpu->arch.last_run_cpu != cpu ||
617 per_cpu(last_vcpu, cpu) != vcpu) { 617 per_cpu(last_vcpu, cpu) != vcpu) {
618 per_cpu(last_vcpu, cpu) = vcpu; 618 per_cpu(last_vcpu, cpu) = vcpu;
619 vcpu->arch.last_run_cpu = cpu; 619 vcpu->arch.last_run_cpu = cpu;
620 kvm_flush_tlb_all(); 620 kvm_flush_tlb_all();
621 } 621 }
622 622
623 vcpu->arch.host_rr6 = ia64_get_rr(RR6); 623 vcpu->arch.host_rr6 = ia64_get_rr(RR6);
624 vti_set_rr6(vcpu->arch.vmm_rr); 624 vti_set_rr6(vcpu->arch.vmm_rr);
625 local_irq_save(psr); 625 local_irq_save(psr);
626 r = kvm_insert_vmm_mapping(vcpu); 626 r = kvm_insert_vmm_mapping(vcpu);
627 local_irq_restore(psr); 627 local_irq_restore(psr);
628 return r; 628 return r;
629 } 629 }
630 630
631 static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) 631 static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
632 { 632 {
633 kvm_purge_vmm_mapping(vcpu); 633 kvm_purge_vmm_mapping(vcpu);
634 vti_set_rr6(vcpu->arch.host_rr6); 634 vti_set_rr6(vcpu->arch.host_rr6);
635 } 635 }
636 636
637 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 637 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
638 { 638 {
639 union context *host_ctx, *guest_ctx; 639 union context *host_ctx, *guest_ctx;
640 int r, idx; 640 int r, idx;
641 641
642 idx = srcu_read_lock(&vcpu->kvm->srcu); 642 idx = srcu_read_lock(&vcpu->kvm->srcu);
643 643
644 again: 644 again:
645 if (signal_pending(current)) { 645 if (signal_pending(current)) {
646 r = -EINTR; 646 r = -EINTR;
647 kvm_run->exit_reason = KVM_EXIT_INTR; 647 kvm_run->exit_reason = KVM_EXIT_INTR;
648 goto out; 648 goto out;
649 } 649 }
650 650
651 preempt_disable(); 651 preempt_disable();
652 local_irq_disable(); 652 local_irq_disable();
653 653
654 /*Get host and guest context with guest address space.*/ 654 /*Get host and guest context with guest address space.*/
655 host_ctx = kvm_get_host_context(vcpu); 655 host_ctx = kvm_get_host_context(vcpu);
656 guest_ctx = kvm_get_guest_context(vcpu); 656 guest_ctx = kvm_get_guest_context(vcpu);
657 657
658 clear_bit(KVM_REQ_KICK, &vcpu->requests); 658 clear_bit(KVM_REQ_KICK, &vcpu->requests);
659 659
660 r = kvm_vcpu_pre_transition(vcpu); 660 r = kvm_vcpu_pre_transition(vcpu);
661 if (r < 0) 661 if (r < 0)
662 goto vcpu_run_fail; 662 goto vcpu_run_fail;
663 663
664 srcu_read_unlock(&vcpu->kvm->srcu, idx); 664 srcu_read_unlock(&vcpu->kvm->srcu, idx);
665 kvm_guest_enter(); 665 kvm_guest_enter();
666 666
667 /* 667 /*
668 * Transition to the guest 668 * Transition to the guest
669 */ 669 */
670 kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); 670 kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
671 671
672 kvm_vcpu_post_transition(vcpu); 672 kvm_vcpu_post_transition(vcpu);
673 673
674 vcpu->arch.launched = 1; 674 vcpu->arch.launched = 1;
675 set_bit(KVM_REQ_KICK, &vcpu->requests); 675 set_bit(KVM_REQ_KICK, &vcpu->requests);
676 local_irq_enable(); 676 local_irq_enable();
677 677
678 /* 678 /*
679 * We must have an instruction between local_irq_enable() and 679 * We must have an instruction between local_irq_enable() and
680 * kvm_guest_exit(), so the timer interrupt isn't delayed by 680 * kvm_guest_exit(), so the timer interrupt isn't delayed by
681 * the interrupt shadow. The stat.exits increment will do nicely. 681 * the interrupt shadow. The stat.exits increment will do nicely.
682 * But we need to prevent reordering, hence this barrier(): 682 * But we need to prevent reordering, hence this barrier():
683 */ 683 */
684 barrier(); 684 barrier();
685 kvm_guest_exit(); 685 kvm_guest_exit();
686 preempt_enable(); 686 preempt_enable();
687 687
688 idx = srcu_read_lock(&vcpu->kvm->srcu); 688 idx = srcu_read_lock(&vcpu->kvm->srcu);
689 689
690 r = kvm_handle_exit(kvm_run, vcpu); 690 r = kvm_handle_exit(kvm_run, vcpu);
691 691
692 if (r > 0) { 692 if (r > 0) {
693 if (!need_resched()) 693 if (!need_resched())
694 goto again; 694 goto again;
695 } 695 }
696 696
697 out: 697 out:
698 srcu_read_unlock(&vcpu->kvm->srcu, idx); 698 srcu_read_unlock(&vcpu->kvm->srcu, idx);
699 if (r > 0) { 699 if (r > 0) {
700 kvm_resched(vcpu); 700 kvm_resched(vcpu);
701 idx = srcu_read_lock(&vcpu->kvm->srcu); 701 idx = srcu_read_lock(&vcpu->kvm->srcu);
702 goto again; 702 goto again;
703 } 703 }
704 704
705 return r; 705 return r;
706 706
707 vcpu_run_fail: 707 vcpu_run_fail:
708 local_irq_enable(); 708 local_irq_enable();
709 preempt_enable(); 709 preempt_enable();
710 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 710 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
711 goto out; 711 goto out;
712 } 712 }
713 713
714 static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) 714 static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
715 { 715 {
716 struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); 716 struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
717 717
718 if (!vcpu->mmio_is_write) 718 if (!vcpu->mmio_is_write)
719 memcpy(&p->data, vcpu->mmio_data, 8); 719 memcpy(&p->data, vcpu->mmio_data, 8);
720 p->state = STATE_IORESP_READY; 720 p->state = STATE_IORESP_READY;
721 } 721 }
722 722
723 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 723 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
724 { 724 {
725 int r; 725 int r;
726 sigset_t sigsaved; 726 sigset_t sigsaved;
727 727
728 vcpu_load(vcpu);
729
730 if (vcpu->sigset_active) 728 if (vcpu->sigset_active)
731 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 729 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
732 730
733 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 731 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
734 kvm_vcpu_block(vcpu); 732 kvm_vcpu_block(vcpu);
735 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 733 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
736 r = -EAGAIN; 734 r = -EAGAIN;
737 goto out; 735 goto out;
738 } 736 }
739 737
740 if (vcpu->mmio_needed) { 738 if (vcpu->mmio_needed) {
741 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 739 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
742 kvm_set_mmio_data(vcpu); 740 kvm_set_mmio_data(vcpu);
743 vcpu->mmio_read_completed = 1; 741 vcpu->mmio_read_completed = 1;
744 vcpu->mmio_needed = 0; 742 vcpu->mmio_needed = 0;
745 } 743 }
746 r = __vcpu_run(vcpu, kvm_run); 744 r = __vcpu_run(vcpu, kvm_run);
747 out: 745 out:
748 if (vcpu->sigset_active) 746 if (vcpu->sigset_active)
749 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 747 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
750 748
751 vcpu_put(vcpu);
752 return r; 749 return r;
753 } 750 }
754 751
755 static struct kvm *kvm_alloc_kvm(void) 752 static struct kvm *kvm_alloc_kvm(void)
756 { 753 {
757 754
758 struct kvm *kvm; 755 struct kvm *kvm;
759 uint64_t vm_base; 756 uint64_t vm_base;
760 757
761 BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE); 758 BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
762 759
763 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); 760 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
764 761
765 if (!vm_base) 762 if (!vm_base)
766 return ERR_PTR(-ENOMEM); 763 return ERR_PTR(-ENOMEM);
767 764
768 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); 765 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
769 kvm = (struct kvm *)(vm_base + 766 kvm = (struct kvm *)(vm_base +
770 offsetof(struct kvm_vm_data, kvm_vm_struct)); 767 offsetof(struct kvm_vm_data, kvm_vm_struct));
771 kvm->arch.vm_base = vm_base; 768 kvm->arch.vm_base = vm_base;
772 printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base); 769 printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
773 770
774 return kvm; 771 return kvm;
775 } 772 }
776 773
777 struct kvm_io_range { 774 struct kvm_io_range {
778 unsigned long start; 775 unsigned long start;
779 unsigned long size; 776 unsigned long size;
780 unsigned long type; 777 unsigned long type;
781 }; 778 };
782 779
783 static const struct kvm_io_range io_ranges[] = { 780 static const struct kvm_io_range io_ranges[] = {
784 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, 781 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
785 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, 782 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
786 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, 783 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
787 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, 784 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
788 {PIB_START, PIB_SIZE, GPFN_PIB}, 785 {PIB_START, PIB_SIZE, GPFN_PIB},
789 }; 786 };
790 787
791 static void kvm_build_io_pmt(struct kvm *kvm) 788 static void kvm_build_io_pmt(struct kvm *kvm)
792 { 789 {
793 unsigned long i, j; 790 unsigned long i, j;
794 791
795 /* Mark I/O ranges */ 792 /* Mark I/O ranges */
796 for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); 793 for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
797 i++) { 794 i++) {
798 for (j = io_ranges[i].start; 795 for (j = io_ranges[i].start;
799 j < io_ranges[i].start + io_ranges[i].size; 796 j < io_ranges[i].start + io_ranges[i].size;
800 j += PAGE_SIZE) 797 j += PAGE_SIZE)
801 kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, 798 kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
802 io_ranges[i].type, 0); 799 io_ranges[i].type, 0);
803 } 800 }
804 801
805 } 802 }
806 803
807 /*Use unused rids to virtualize guest rid.*/ 804 /*Use unused rids to virtualize guest rid.*/
808 #define GUEST_PHYSICAL_RR0 0x1739 805 #define GUEST_PHYSICAL_RR0 0x1739
809 #define GUEST_PHYSICAL_RR4 0x2739 806 #define GUEST_PHYSICAL_RR4 0x2739
810 #define VMM_INIT_RR 0x1660 807 #define VMM_INIT_RR 0x1660
811 808
812 static void kvm_init_vm(struct kvm *kvm) 809 static void kvm_init_vm(struct kvm *kvm)
813 { 810 {
814 BUG_ON(!kvm); 811 BUG_ON(!kvm);
815 812
816 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; 813 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
817 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; 814 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
818 kvm->arch.vmm_init_rr = VMM_INIT_RR; 815 kvm->arch.vmm_init_rr = VMM_INIT_RR;
819 816
820 /* 817 /*
821 *Fill P2M entries for MMIO/IO ranges 818 *Fill P2M entries for MMIO/IO ranges
822 */ 819 */
823 kvm_build_io_pmt(kvm); 820 kvm_build_io_pmt(kvm);
824 821
825 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 822 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
826 823
827 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 824 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
828 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 825 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
829 } 826 }
830 827
831 struct kvm *kvm_arch_create_vm(void) 828 struct kvm *kvm_arch_create_vm(void)
832 { 829 {
833 struct kvm *kvm = kvm_alloc_kvm(); 830 struct kvm *kvm = kvm_alloc_kvm();
834 831
835 if (IS_ERR(kvm)) 832 if (IS_ERR(kvm))
836 return ERR_PTR(-ENOMEM); 833 return ERR_PTR(-ENOMEM);
837 834
838 kvm->arch.is_sn2 = ia64_platform_is("sn2"); 835 kvm->arch.is_sn2 = ia64_platform_is("sn2");
839 836
840 kvm_init_vm(kvm); 837 kvm_init_vm(kvm);
841 838
842 return kvm; 839 return kvm;
843 840
844 } 841 }
845 842
846 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, 843 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
847 struct kvm_irqchip *chip) 844 struct kvm_irqchip *chip)
848 { 845 {
849 int r; 846 int r;
850 847
851 r = 0; 848 r = 0;
852 switch (chip->chip_id) { 849 switch (chip->chip_id) {
853 case KVM_IRQCHIP_IOAPIC: 850 case KVM_IRQCHIP_IOAPIC:
854 r = kvm_get_ioapic(kvm, &chip->chip.ioapic); 851 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
855 break; 852 break;
856 default: 853 default:
857 r = -EINVAL; 854 r = -EINVAL;
858 break; 855 break;
859 } 856 }
860 return r; 857 return r;
861 } 858 }
862 859
863 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) 860 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
864 { 861 {
865 int r; 862 int r;
866 863
867 r = 0; 864 r = 0;
868 switch (chip->chip_id) { 865 switch (chip->chip_id) {
869 case KVM_IRQCHIP_IOAPIC: 866 case KVM_IRQCHIP_IOAPIC:
870 r = kvm_set_ioapic(kvm, &chip->chip.ioapic); 867 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
871 break; 868 break;
872 default: 869 default:
873 r = -EINVAL; 870 r = -EINVAL;
874 break; 871 break;
875 } 872 }
876 return r; 873 return r;
877 } 874 }
878 875
879 #define RESTORE_REGS(_x) vcpu->arch._x = regs->_x 876 #define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
880 877
881 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 878 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
882 { 879 {
883 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 880 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
884 int i; 881 int i;
885 882
886 vcpu_load(vcpu);
887
888 for (i = 0; i < 16; i++) { 883 for (i = 0; i < 16; i++) {
889 vpd->vgr[i] = regs->vpd.vgr[i]; 884 vpd->vgr[i] = regs->vpd.vgr[i];
890 vpd->vbgr[i] = regs->vpd.vbgr[i]; 885 vpd->vbgr[i] = regs->vpd.vbgr[i];
891 } 886 }
892 for (i = 0; i < 128; i++) 887 for (i = 0; i < 128; i++)
893 vpd->vcr[i] = regs->vpd.vcr[i]; 888 vpd->vcr[i] = regs->vpd.vcr[i];
894 vpd->vhpi = regs->vpd.vhpi; 889 vpd->vhpi = regs->vpd.vhpi;
895 vpd->vnat = regs->vpd.vnat; 890 vpd->vnat = regs->vpd.vnat;
896 vpd->vbnat = regs->vpd.vbnat; 891 vpd->vbnat = regs->vpd.vbnat;
897 vpd->vpsr = regs->vpd.vpsr; 892 vpd->vpsr = regs->vpd.vpsr;
898 893
899 vpd->vpr = regs->vpd.vpr; 894 vpd->vpr = regs->vpd.vpr;
900 895
901 memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context)); 896 memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
902 897
903 RESTORE_REGS(mp_state); 898 RESTORE_REGS(mp_state);
904 RESTORE_REGS(vmm_rr); 899 RESTORE_REGS(vmm_rr);
905 memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); 900 memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
906 memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); 901 memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
907 RESTORE_REGS(itr_regions); 902 RESTORE_REGS(itr_regions);
908 RESTORE_REGS(dtr_regions); 903 RESTORE_REGS(dtr_regions);
909 RESTORE_REGS(tc_regions); 904 RESTORE_REGS(tc_regions);
910 RESTORE_REGS(irq_check); 905 RESTORE_REGS(irq_check);
911 RESTORE_REGS(itc_check); 906 RESTORE_REGS(itc_check);
912 RESTORE_REGS(timer_check); 907 RESTORE_REGS(timer_check);
913 RESTORE_REGS(timer_pending); 908 RESTORE_REGS(timer_pending);
914 RESTORE_REGS(last_itc); 909 RESTORE_REGS(last_itc);
915 for (i = 0; i < 8; i++) { 910 for (i = 0; i < 8; i++) {
916 vcpu->arch.vrr[i] = regs->vrr[i]; 911 vcpu->arch.vrr[i] = regs->vrr[i];
917 vcpu->arch.ibr[i] = regs->ibr[i]; 912 vcpu->arch.ibr[i] = regs->ibr[i];
918 vcpu->arch.dbr[i] = regs->dbr[i]; 913 vcpu->arch.dbr[i] = regs->dbr[i];
919 } 914 }
920 for (i = 0; i < 4; i++) 915 for (i = 0; i < 4; i++)
921 vcpu->arch.insvc[i] = regs->insvc[i]; 916 vcpu->arch.insvc[i] = regs->insvc[i];
922 RESTORE_REGS(xtp); 917 RESTORE_REGS(xtp);
923 RESTORE_REGS(metaphysical_rr0); 918 RESTORE_REGS(metaphysical_rr0);
924 RESTORE_REGS(metaphysical_rr4); 919 RESTORE_REGS(metaphysical_rr4);
925 RESTORE_REGS(metaphysical_saved_rr0); 920 RESTORE_REGS(metaphysical_saved_rr0);
926 RESTORE_REGS(metaphysical_saved_rr4); 921 RESTORE_REGS(metaphysical_saved_rr4);
927 RESTORE_REGS(fp_psr); 922 RESTORE_REGS(fp_psr);
928 RESTORE_REGS(saved_gp); 923 RESTORE_REGS(saved_gp);
929 924
930 vcpu->arch.irq_new_pending = 1; 925 vcpu->arch.irq_new_pending = 1;
931 vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); 926 vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
932 set_bit(KVM_REQ_RESUME, &vcpu->requests); 927 set_bit(KVM_REQ_RESUME, &vcpu->requests);
933 928
934 vcpu_put(vcpu);
935
936 return 0; 929 return 0;
937 } 930 }
938 931
939 long kvm_arch_vm_ioctl(struct file *filp, 932 long kvm_arch_vm_ioctl(struct file *filp,
940 unsigned int ioctl, unsigned long arg) 933 unsigned int ioctl, unsigned long arg)
941 { 934 {
942 struct kvm *kvm = filp->private_data; 935 struct kvm *kvm = filp->private_data;
943 void __user *argp = (void __user *)arg; 936 void __user *argp = (void __user *)arg;
944 int r = -ENOTTY; 937 int r = -ENOTTY;
945 938
946 switch (ioctl) { 939 switch (ioctl) {
947 case KVM_SET_MEMORY_REGION: { 940 case KVM_SET_MEMORY_REGION: {
948 struct kvm_memory_region kvm_mem; 941 struct kvm_memory_region kvm_mem;
949 struct kvm_userspace_memory_region kvm_userspace_mem; 942 struct kvm_userspace_memory_region kvm_userspace_mem;
950 943
951 r = -EFAULT; 944 r = -EFAULT;
952 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) 945 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
953 goto out; 946 goto out;
954 kvm_userspace_mem.slot = kvm_mem.slot; 947 kvm_userspace_mem.slot = kvm_mem.slot;
955 kvm_userspace_mem.flags = kvm_mem.flags; 948 kvm_userspace_mem.flags = kvm_mem.flags;
956 kvm_userspace_mem.guest_phys_addr = 949 kvm_userspace_mem.guest_phys_addr =
957 kvm_mem.guest_phys_addr; 950 kvm_mem.guest_phys_addr;
958 kvm_userspace_mem.memory_size = kvm_mem.memory_size; 951 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
959 r = kvm_vm_ioctl_set_memory_region(kvm, 952 r = kvm_vm_ioctl_set_memory_region(kvm,
960 &kvm_userspace_mem, 0); 953 &kvm_userspace_mem, 0);
961 if (r) 954 if (r)
962 goto out; 955 goto out;
963 break; 956 break;
964 } 957 }
965 case KVM_CREATE_IRQCHIP: 958 case KVM_CREATE_IRQCHIP:
966 r = -EFAULT; 959 r = -EFAULT;
967 r = kvm_ioapic_init(kvm); 960 r = kvm_ioapic_init(kvm);
968 if (r) 961 if (r)
969 goto out; 962 goto out;
970 r = kvm_setup_default_irq_routing(kvm); 963 r = kvm_setup_default_irq_routing(kvm);
971 if (r) { 964 if (r) {
972 kvm_ioapic_destroy(kvm); 965 kvm_ioapic_destroy(kvm);
973 goto out; 966 goto out;
974 } 967 }
975 break; 968 break;
976 case KVM_IRQ_LINE_STATUS: 969 case KVM_IRQ_LINE_STATUS:
977 case KVM_IRQ_LINE: { 970 case KVM_IRQ_LINE: {
978 struct kvm_irq_level irq_event; 971 struct kvm_irq_level irq_event;
979 972
980 r = -EFAULT; 973 r = -EFAULT;
981 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 974 if (copy_from_user(&irq_event, argp, sizeof irq_event))
982 goto out; 975 goto out;
983 r = -ENXIO; 976 r = -ENXIO;
984 if (irqchip_in_kernel(kvm)) { 977 if (irqchip_in_kernel(kvm)) {
985 __s32 status; 978 __s32 status;
986 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 979 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
987 irq_event.irq, irq_event.level); 980 irq_event.irq, irq_event.level);
988 if (ioctl == KVM_IRQ_LINE_STATUS) { 981 if (ioctl == KVM_IRQ_LINE_STATUS) {
989 r = -EFAULT; 982 r = -EFAULT;
990 irq_event.status = status; 983 irq_event.status = status;
991 if (copy_to_user(argp, &irq_event, 984 if (copy_to_user(argp, &irq_event,
992 sizeof irq_event)) 985 sizeof irq_event))
993 goto out; 986 goto out;
994 } 987 }
995 r = 0; 988 r = 0;
996 } 989 }
997 break; 990 break;
998 } 991 }
999 case KVM_GET_IRQCHIP: { 992 case KVM_GET_IRQCHIP: {
1000 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 993 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
1001 struct kvm_irqchip chip; 994 struct kvm_irqchip chip;
1002 995
1003 r = -EFAULT; 996 r = -EFAULT;
1004 if (copy_from_user(&chip, argp, sizeof chip)) 997 if (copy_from_user(&chip, argp, sizeof chip))
1005 goto out; 998 goto out;
1006 r = -ENXIO; 999 r = -ENXIO;
1007 if (!irqchip_in_kernel(kvm)) 1000 if (!irqchip_in_kernel(kvm))
1008 goto out; 1001 goto out;
1009 r = kvm_vm_ioctl_get_irqchip(kvm, &chip); 1002 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
1010 if (r) 1003 if (r)
1011 goto out; 1004 goto out;
1012 r = -EFAULT; 1005 r = -EFAULT;
1013 if (copy_to_user(argp, &chip, sizeof chip)) 1006 if (copy_to_user(argp, &chip, sizeof chip))
1014 goto out; 1007 goto out;
1015 r = 0; 1008 r = 0;
1016 break; 1009 break;
1017 } 1010 }
1018 case KVM_SET_IRQCHIP: { 1011 case KVM_SET_IRQCHIP: {
1019 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 1012 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
1020 struct kvm_irqchip chip; 1013 struct kvm_irqchip chip;
1021 1014
1022 r = -EFAULT; 1015 r = -EFAULT;
1023 if (copy_from_user(&chip, argp, sizeof chip)) 1016 if (copy_from_user(&chip, argp, sizeof chip))
1024 goto out; 1017 goto out;
1025 r = -ENXIO; 1018 r = -ENXIO;
1026 if (!irqchip_in_kernel(kvm)) 1019 if (!irqchip_in_kernel(kvm))
1027 goto out; 1020 goto out;
1028 r = kvm_vm_ioctl_set_irqchip(kvm, &chip); 1021 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
1029 if (r) 1022 if (r)
1030 goto out; 1023 goto out;
1031 r = 0; 1024 r = 0;
1032 break; 1025 break;
1033 } 1026 }
1034 default: 1027 default:
1035 ; 1028 ;
1036 } 1029 }
1037 out: 1030 out:
1038 return r; 1031 return r;
1039 } 1032 }
1040 1033
1041 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 1034 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1042 struct kvm_sregs *sregs) 1035 struct kvm_sregs *sregs)
1043 { 1036 {
1044 return -EINVAL; 1037 return -EINVAL;
1045 } 1038 }
1046 1039
1047 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 1040 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1048 struct kvm_sregs *sregs) 1041 struct kvm_sregs *sregs)
1049 { 1042 {
1050 return -EINVAL; 1043 return -EINVAL;
1051 1044
1052 } 1045 }
1053 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 1046 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1054 struct kvm_translation *tr) 1047 struct kvm_translation *tr)
1055 { 1048 {
1056 1049
1057 return -EINVAL; 1050 return -EINVAL;
1058 } 1051 }
1059 1052
1060 static int kvm_alloc_vmm_area(void) 1053 static int kvm_alloc_vmm_area(void)
1061 { 1054 {
1062 if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { 1055 if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
1063 kvm_vmm_base = __get_free_pages(GFP_KERNEL, 1056 kvm_vmm_base = __get_free_pages(GFP_KERNEL,
1064 get_order(KVM_VMM_SIZE)); 1057 get_order(KVM_VMM_SIZE));
1065 if (!kvm_vmm_base) 1058 if (!kvm_vmm_base)
1066 return -ENOMEM; 1059 return -ENOMEM;
1067 1060
1068 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); 1061 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1069 kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; 1062 kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
1070 1063
1071 printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", 1064 printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
1072 kvm_vmm_base, kvm_vm_buffer); 1065 kvm_vmm_base, kvm_vm_buffer);
1073 } 1066 }
1074 1067
1075 return 0; 1068 return 0;
1076 } 1069 }
1077 1070
1078 static void kvm_free_vmm_area(void) 1071 static void kvm_free_vmm_area(void)
1079 { 1072 {
1080 if (kvm_vmm_base) { 1073 if (kvm_vmm_base) {
1081 /*Zero this area before free to avoid bits leak!!*/ 1074 /*Zero this area before free to avoid bits leak!!*/
1082 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); 1075 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1083 free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); 1076 free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
1084 kvm_vmm_base = 0; 1077 kvm_vmm_base = 0;
1085 kvm_vm_buffer = 0; 1078 kvm_vm_buffer = 0;
1086 kvm_vsa_base = 0; 1079 kvm_vsa_base = 0;
1087 } 1080 }
1088 } 1081 }
1089 1082
1090 static int vti_init_vpd(struct kvm_vcpu *vcpu) 1083 static int vti_init_vpd(struct kvm_vcpu *vcpu)
1091 { 1084 {
1092 int i; 1085 int i;
1093 union cpuid3_t cpuid3; 1086 union cpuid3_t cpuid3;
1094 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1087 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1095 1088
1096 if (IS_ERR(vpd)) 1089 if (IS_ERR(vpd))
1097 return PTR_ERR(vpd); 1090 return PTR_ERR(vpd);
1098 1091
1099 /* CPUID init */ 1092 /* CPUID init */
1100 for (i = 0; i < 5; i++) 1093 for (i = 0; i < 5; i++)
1101 vpd->vcpuid[i] = ia64_get_cpuid(i); 1094 vpd->vcpuid[i] = ia64_get_cpuid(i);
1102 1095
1103 /* Limit the CPUID number to 5 */ 1096 /* Limit the CPUID number to 5 */
1104 cpuid3.value = vpd->vcpuid[3]; 1097 cpuid3.value = vpd->vcpuid[3];
1105 cpuid3.number = 4; /* 5 - 1 */ 1098 cpuid3.number = 4; /* 5 - 1 */
1106 vpd->vcpuid[3] = cpuid3.value; 1099 vpd->vcpuid[3] = cpuid3.value;
1107 1100
1108 /*Set vac and vdc fields*/ 1101 /*Set vac and vdc fields*/
1109 vpd->vac.a_from_int_cr = 1; 1102 vpd->vac.a_from_int_cr = 1;
1110 vpd->vac.a_to_int_cr = 1; 1103 vpd->vac.a_to_int_cr = 1;
1111 vpd->vac.a_from_psr = 1; 1104 vpd->vac.a_from_psr = 1;
1112 vpd->vac.a_from_cpuid = 1; 1105 vpd->vac.a_from_cpuid = 1;
1113 vpd->vac.a_cover = 1; 1106 vpd->vac.a_cover = 1;
1114 vpd->vac.a_bsw = 1; 1107 vpd->vac.a_bsw = 1;
1115 vpd->vac.a_int = 1; 1108 vpd->vac.a_int = 1;
1116 vpd->vdc.d_vmsw = 1; 1109 vpd->vdc.d_vmsw = 1;
1117 1110
1118 /*Set virtual buffer*/ 1111 /*Set virtual buffer*/
1119 vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; 1112 vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
1120 1113
1121 return 0; 1114 return 0;
1122 } 1115 }
1123 1116
1124 static int vti_create_vp(struct kvm_vcpu *vcpu) 1117 static int vti_create_vp(struct kvm_vcpu *vcpu)
1125 { 1118 {
1126 long ret; 1119 long ret;
1127 struct vpd *vpd = vcpu->arch.vpd; 1120 struct vpd *vpd = vcpu->arch.vpd;
1128 unsigned long vmm_ivt; 1121 unsigned long vmm_ivt;
1129 1122
1130 vmm_ivt = kvm_vmm_info->vmm_ivt; 1123 vmm_ivt = kvm_vmm_info->vmm_ivt;
1131 1124
1132 printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); 1125 printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
1133 1126
1134 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); 1127 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
1135 1128
1136 if (ret) { 1129 if (ret) {
1137 printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); 1130 printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
1138 return -EINVAL; 1131 return -EINVAL;
1139 } 1132 }
1140 return 0; 1133 return 0;
1141 } 1134 }
1142 1135
1143 static void init_ptce_info(struct kvm_vcpu *vcpu) 1136 static void init_ptce_info(struct kvm_vcpu *vcpu)
1144 { 1137 {
1145 ia64_ptce_info_t ptce = {0}; 1138 ia64_ptce_info_t ptce = {0};
1146 1139
1147 ia64_get_ptce(&ptce); 1140 ia64_get_ptce(&ptce);
1148 vcpu->arch.ptce_base = ptce.base; 1141 vcpu->arch.ptce_base = ptce.base;
1149 vcpu->arch.ptce_count[0] = ptce.count[0]; 1142 vcpu->arch.ptce_count[0] = ptce.count[0];
1150 vcpu->arch.ptce_count[1] = ptce.count[1]; 1143 vcpu->arch.ptce_count[1] = ptce.count[1];
1151 vcpu->arch.ptce_stride[0] = ptce.stride[0]; 1144 vcpu->arch.ptce_stride[0] = ptce.stride[0];
1152 vcpu->arch.ptce_stride[1] = ptce.stride[1]; 1145 vcpu->arch.ptce_stride[1] = ptce.stride[1];
1153 } 1146 }
1154 1147
1155 static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) 1148 static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
1156 { 1149 {
1157 struct hrtimer *p_ht = &vcpu->arch.hlt_timer; 1150 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
1158 1151
1159 if (hrtimer_cancel(p_ht)) 1152 if (hrtimer_cancel(p_ht))
1160 hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS); 1153 hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
1161 } 1154 }
1162 1155
1163 static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) 1156 static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
1164 { 1157 {
1165 struct kvm_vcpu *vcpu; 1158 struct kvm_vcpu *vcpu;
1166 wait_queue_head_t *q; 1159 wait_queue_head_t *q;
1167 1160
1168 vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); 1161 vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer);
1169 q = &vcpu->wq; 1162 q = &vcpu->wq;
1170 1163
1171 if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) 1164 if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
1172 goto out; 1165 goto out;
1173 1166
1174 if (waitqueue_active(q)) 1167 if (waitqueue_active(q))
1175 wake_up_interruptible(q); 1168 wake_up_interruptible(q);
1176 1169
1177 out: 1170 out:
1178 vcpu->arch.timer_fired = 1; 1171 vcpu->arch.timer_fired = 1;
1179 vcpu->arch.timer_check = 1; 1172 vcpu->arch.timer_check = 1;
1180 return HRTIMER_NORESTART; 1173 return HRTIMER_NORESTART;
1181 } 1174 }
1182 1175
1183 #define PALE_RESET_ENTRY 0x80000000ffffffb0UL 1176 #define PALE_RESET_ENTRY 0x80000000ffffffb0UL
1184 1177
1185 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 1178 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1186 { 1179 {
1187 struct kvm_vcpu *v; 1180 struct kvm_vcpu *v;
1188 int r; 1181 int r;
1189 int i; 1182 int i;
1190 long itc_offset; 1183 long itc_offset;
1191 struct kvm *kvm = vcpu->kvm; 1184 struct kvm *kvm = vcpu->kvm;
1192 struct kvm_pt_regs *regs = vcpu_regs(vcpu); 1185 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1193 1186
1194 union context *p_ctx = &vcpu->arch.guest; 1187 union context *p_ctx = &vcpu->arch.guest;
1195 struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); 1188 struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
1196 1189
1197 /*Init vcpu context for first run.*/ 1190 /*Init vcpu context for first run.*/
1198 if (IS_ERR(vmm_vcpu)) 1191 if (IS_ERR(vmm_vcpu))
1199 return PTR_ERR(vmm_vcpu); 1192 return PTR_ERR(vmm_vcpu);
1200 1193
1201 if (kvm_vcpu_is_bsp(vcpu)) { 1194 if (kvm_vcpu_is_bsp(vcpu)) {
1202 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1195 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1203 1196
1204 /*Set entry address for first run.*/ 1197 /*Set entry address for first run.*/
1205 regs->cr_iip = PALE_RESET_ENTRY; 1198 regs->cr_iip = PALE_RESET_ENTRY;
1206 1199
1207 /*Initialize itc offset for vcpus*/ 1200 /*Initialize itc offset for vcpus*/
1208 itc_offset = 0UL - kvm_get_itc(vcpu); 1201 itc_offset = 0UL - kvm_get_itc(vcpu);
1209 for (i = 0; i < KVM_MAX_VCPUS; i++) { 1202 for (i = 0; i < KVM_MAX_VCPUS; i++) {
1210 v = (struct kvm_vcpu *)((char *)vcpu + 1203 v = (struct kvm_vcpu *)((char *)vcpu +
1211 sizeof(struct kvm_vcpu_data) * i); 1204 sizeof(struct kvm_vcpu_data) * i);
1212 v->arch.itc_offset = itc_offset; 1205 v->arch.itc_offset = itc_offset;
1213 v->arch.last_itc = 0; 1206 v->arch.last_itc = 0;
1214 } 1207 }
1215 } else 1208 } else
1216 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; 1209 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
1217 1210
1218 r = -ENOMEM; 1211 r = -ENOMEM;
1219 vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); 1212 vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
1220 if (!vcpu->arch.apic) 1213 if (!vcpu->arch.apic)
1221 goto out; 1214 goto out;
1222 vcpu->arch.apic->vcpu = vcpu; 1215 vcpu->arch.apic->vcpu = vcpu;
1223 1216
1224 p_ctx->gr[1] = 0; 1217 p_ctx->gr[1] = 0;
1225 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET); 1218 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
1226 p_ctx->gr[13] = (unsigned long)vmm_vcpu; 1219 p_ctx->gr[13] = (unsigned long)vmm_vcpu;
1227 p_ctx->psr = 0x1008522000UL; 1220 p_ctx->psr = 0x1008522000UL;
1228 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ 1221 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
1229 p_ctx->caller_unat = 0; 1222 p_ctx->caller_unat = 0;
1230 p_ctx->pr = 0x0; 1223 p_ctx->pr = 0x0;
1231 p_ctx->ar[36] = 0x0; /*unat*/ 1224 p_ctx->ar[36] = 0x0; /*unat*/
1232 p_ctx->ar[19] = 0x0; /*rnat*/ 1225 p_ctx->ar[19] = 0x0; /*rnat*/
1233 p_ctx->ar[18] = (unsigned long)vmm_vcpu + 1226 p_ctx->ar[18] = (unsigned long)vmm_vcpu +
1234 ((sizeof(struct kvm_vcpu)+15) & ~15); 1227 ((sizeof(struct kvm_vcpu)+15) & ~15);
1235 p_ctx->ar[64] = 0x0; /*pfs*/ 1228 p_ctx->ar[64] = 0x0; /*pfs*/
1236 p_ctx->cr[0] = 0x7e04UL; 1229 p_ctx->cr[0] = 0x7e04UL;
1237 p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; 1230 p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
1238 p_ctx->cr[8] = 0x3c; 1231 p_ctx->cr[8] = 0x3c;
1239 1232
1240 /*Initilize region register*/ 1233 /*Initilize region register*/
1241 p_ctx->rr[0] = 0x30; 1234 p_ctx->rr[0] = 0x30;
1242 p_ctx->rr[1] = 0x30; 1235 p_ctx->rr[1] = 0x30;
1243 p_ctx->rr[2] = 0x30; 1236 p_ctx->rr[2] = 0x30;
1244 p_ctx->rr[3] = 0x30; 1237 p_ctx->rr[3] = 0x30;
1245 p_ctx->rr[4] = 0x30; 1238 p_ctx->rr[4] = 0x30;
1246 p_ctx->rr[5] = 0x30; 1239 p_ctx->rr[5] = 0x30;
1247 p_ctx->rr[7] = 0x30; 1240 p_ctx->rr[7] = 0x30;
1248 1241
1249 /*Initilize branch register 0*/ 1242 /*Initilize branch register 0*/
1250 p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; 1243 p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
1251 1244
1252 vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; 1245 vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
1253 vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; 1246 vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
1254 vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; 1247 vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
1255 1248
1256 hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 1249 hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1257 vcpu->arch.hlt_timer.function = hlt_timer_fn; 1250 vcpu->arch.hlt_timer.function = hlt_timer_fn;
1258 1251
1259 vcpu->arch.last_run_cpu = -1; 1252 vcpu->arch.last_run_cpu = -1;
1260 vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id); 1253 vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
1261 vcpu->arch.vsa_base = kvm_vsa_base; 1254 vcpu->arch.vsa_base = kvm_vsa_base;
1262 vcpu->arch.__gp = kvm_vmm_gp; 1255 vcpu->arch.__gp = kvm_vmm_gp;
1263 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); 1256 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
1264 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id); 1257 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
1265 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id); 1258 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
1266 init_ptce_info(vcpu); 1259 init_ptce_info(vcpu);
1267 1260
1268 r = 0; 1261 r = 0;
1269 out: 1262 out:
1270 return r; 1263 return r;
1271 } 1264 }
1272 1265
1273 static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) 1266 static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
1274 { 1267 {
1275 unsigned long psr; 1268 unsigned long psr;
1276 int r; 1269 int r;
1277 1270
1278 local_irq_save(psr); 1271 local_irq_save(psr);
1279 r = kvm_insert_vmm_mapping(vcpu); 1272 r = kvm_insert_vmm_mapping(vcpu);
1280 local_irq_restore(psr); 1273 local_irq_restore(psr);
1281 if (r) 1274 if (r)
1282 goto fail; 1275 goto fail;
1283 r = kvm_vcpu_init(vcpu, vcpu->kvm, id); 1276 r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
1284 if (r) 1277 if (r)
1285 goto fail; 1278 goto fail;
1286 1279
1287 r = vti_init_vpd(vcpu); 1280 r = vti_init_vpd(vcpu);
1288 if (r) { 1281 if (r) {
1289 printk(KERN_DEBUG"kvm: vpd init error!!\n"); 1282 printk(KERN_DEBUG"kvm: vpd init error!!\n");
1290 goto uninit; 1283 goto uninit;
1291 } 1284 }
1292 1285
1293 r = vti_create_vp(vcpu); 1286 r = vti_create_vp(vcpu);
1294 if (r) 1287 if (r)
1295 goto uninit; 1288 goto uninit;
1296 1289
1297 kvm_purge_vmm_mapping(vcpu); 1290 kvm_purge_vmm_mapping(vcpu);
1298 1291
1299 return 0; 1292 return 0;
1300 uninit: 1293 uninit:
1301 kvm_vcpu_uninit(vcpu); 1294 kvm_vcpu_uninit(vcpu);
1302 fail: 1295 fail:
1303 return r; 1296 return r;
1304 } 1297 }
1305 1298
1306 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 1299 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1307 unsigned int id) 1300 unsigned int id)
1308 { 1301 {
1309 struct kvm_vcpu *vcpu; 1302 struct kvm_vcpu *vcpu;
1310 unsigned long vm_base = kvm->arch.vm_base; 1303 unsigned long vm_base = kvm->arch.vm_base;
1311 int r; 1304 int r;
1312 int cpu; 1305 int cpu;
1313 1306
1314 BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2); 1307 BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
1315 1308
1316 r = -EINVAL; 1309 r = -EINVAL;
1317 if (id >= KVM_MAX_VCPUS) { 1310 if (id >= KVM_MAX_VCPUS) {
1318 printk(KERN_ERR"kvm: Can't configure vcpus > %ld", 1311 printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
1319 KVM_MAX_VCPUS); 1312 KVM_MAX_VCPUS);
1320 goto fail; 1313 goto fail;
1321 } 1314 }
1322 1315
1323 r = -ENOMEM; 1316 r = -ENOMEM;
1324 if (!vm_base) { 1317 if (!vm_base) {
1325 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); 1318 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
1326 goto fail; 1319 goto fail;
1327 } 1320 }
1328 vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data, 1321 vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
1329 vcpu_data[id].vcpu_struct)); 1322 vcpu_data[id].vcpu_struct));
1330 vcpu->kvm = kvm; 1323 vcpu->kvm = kvm;
1331 1324
1332 cpu = get_cpu(); 1325 cpu = get_cpu();
1333 r = vti_vcpu_setup(vcpu, id); 1326 r = vti_vcpu_setup(vcpu, id);
1334 put_cpu(); 1327 put_cpu();
1335 1328
1336 if (r) { 1329 if (r) {
1337 printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); 1330 printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
1338 goto fail; 1331 goto fail;
1339 } 1332 }
1340 1333
1341 return vcpu; 1334 return vcpu;
1342 fail: 1335 fail:
1343 return ERR_PTR(r); 1336 return ERR_PTR(r);
1344 } 1337 }
1345 1338
1346 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 1339 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1347 { 1340 {
1348 return 0; 1341 return 0;
1349 } 1342 }
1350 1343
1351 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1344 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1352 { 1345 {
1353 return -EINVAL; 1346 return -EINVAL;
1354 } 1347 }
1355 1348
1356 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1349 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1357 { 1350 {
1358 return -EINVAL; 1351 return -EINVAL;
1359 } 1352 }
1360 1353
1361 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 1354 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1362 struct kvm_guest_debug *dbg) 1355 struct kvm_guest_debug *dbg)
1363 { 1356 {
1364 return -EINVAL; 1357 return -EINVAL;
1365 } 1358 }
1366 1359
1367 static void free_kvm(struct kvm *kvm) 1360 static void free_kvm(struct kvm *kvm)
1368 { 1361 {
1369 unsigned long vm_base = kvm->arch.vm_base; 1362 unsigned long vm_base = kvm->arch.vm_base;
1370 1363
1371 if (vm_base) { 1364 if (vm_base) {
1372 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); 1365 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
1373 free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); 1366 free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
1374 } 1367 }
1375 1368
1376 } 1369 }
1377 1370
1378 static void kvm_release_vm_pages(struct kvm *kvm) 1371 static void kvm_release_vm_pages(struct kvm *kvm)
1379 { 1372 {
1380 struct kvm_memslots *slots; 1373 struct kvm_memslots *slots;
1381 struct kvm_memory_slot *memslot; 1374 struct kvm_memory_slot *memslot;
1382 int i, j; 1375 int i, j;
1383 unsigned long base_gfn; 1376 unsigned long base_gfn;
1384 1377
1385 slots = kvm_memslots(kvm); 1378 slots = kvm_memslots(kvm);
1386 for (i = 0; i < slots->nmemslots; i++) { 1379 for (i = 0; i < slots->nmemslots; i++) {
1387 memslot = &slots->memslots[i]; 1380 memslot = &slots->memslots[i];
1388 base_gfn = memslot->base_gfn; 1381 base_gfn = memslot->base_gfn;
1389 1382
1390 for (j = 0; j < memslot->npages; j++) { 1383 for (j = 0; j < memslot->npages; j++) {
1391 if (memslot->rmap[j]) 1384 if (memslot->rmap[j])
1392 put_page((struct page *)memslot->rmap[j]); 1385 put_page((struct page *)memslot->rmap[j]);
1393 } 1386 }
1394 } 1387 }
1395 } 1388 }
1396 1389
1397 void kvm_arch_sync_events(struct kvm *kvm) 1390 void kvm_arch_sync_events(struct kvm *kvm)
1398 { 1391 {
1399 } 1392 }
1400 1393
1401 void kvm_arch_destroy_vm(struct kvm *kvm) 1394 void kvm_arch_destroy_vm(struct kvm *kvm)
1402 { 1395 {
1403 kvm_iommu_unmap_guest(kvm); 1396 kvm_iommu_unmap_guest(kvm);
1404 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 1397 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1405 kvm_free_all_assigned_devices(kvm); 1398 kvm_free_all_assigned_devices(kvm);
1406 #endif 1399 #endif
1407 kfree(kvm->arch.vioapic); 1400 kfree(kvm->arch.vioapic);
1408 kvm_release_vm_pages(kvm); 1401 kvm_release_vm_pages(kvm);
1409 kvm_free_physmem(kvm); 1402 kvm_free_physmem(kvm);
1410 cleanup_srcu_struct(&kvm->srcu); 1403 cleanup_srcu_struct(&kvm->srcu);
1411 free_kvm(kvm); 1404 free_kvm(kvm);
1412 } 1405 }
1413 1406
1414 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1407 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1415 { 1408 {
1416 } 1409 }
1417 1410
1418 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1411 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1419 { 1412 {
1420 if (cpu != vcpu->cpu) { 1413 if (cpu != vcpu->cpu) {
1421 vcpu->cpu = cpu; 1414 vcpu->cpu = cpu;
1422 if (vcpu->arch.ht_active) 1415 if (vcpu->arch.ht_active)
1423 kvm_migrate_hlt_timer(vcpu); 1416 kvm_migrate_hlt_timer(vcpu);
1424 } 1417 }
1425 } 1418 }
1426 1419
1427 #define SAVE_REGS(_x) regs->_x = vcpu->arch._x 1420 #define SAVE_REGS(_x) regs->_x = vcpu->arch._x
1428 1421
1429 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1422 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1430 { 1423 {
1431 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1424 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1432 int i; 1425 int i;
1433 1426
1434 vcpu_load(vcpu); 1427 vcpu_load(vcpu);
1435 1428
1436 for (i = 0; i < 16; i++) { 1429 for (i = 0; i < 16; i++) {
1437 regs->vpd.vgr[i] = vpd->vgr[i]; 1430 regs->vpd.vgr[i] = vpd->vgr[i];
1438 regs->vpd.vbgr[i] = vpd->vbgr[i]; 1431 regs->vpd.vbgr[i] = vpd->vbgr[i];
1439 } 1432 }
1440 for (i = 0; i < 128; i++) 1433 for (i = 0; i < 128; i++)
1441 regs->vpd.vcr[i] = vpd->vcr[i]; 1434 regs->vpd.vcr[i] = vpd->vcr[i];
1442 regs->vpd.vhpi = vpd->vhpi; 1435 regs->vpd.vhpi = vpd->vhpi;
1443 regs->vpd.vnat = vpd->vnat; 1436 regs->vpd.vnat = vpd->vnat;
1444 regs->vpd.vbnat = vpd->vbnat; 1437 regs->vpd.vbnat = vpd->vbnat;
1445 regs->vpd.vpsr = vpd->vpsr; 1438 regs->vpd.vpsr = vpd->vpsr;
1446 regs->vpd.vpr = vpd->vpr; 1439 regs->vpd.vpr = vpd->vpr;
1447 1440
1448 memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context)); 1441 memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
1449 1442
1450 SAVE_REGS(mp_state); 1443 SAVE_REGS(mp_state);
1451 SAVE_REGS(vmm_rr); 1444 SAVE_REGS(vmm_rr);
1452 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); 1445 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
1453 memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); 1446 memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
1454 SAVE_REGS(itr_regions); 1447 SAVE_REGS(itr_regions);
1455 SAVE_REGS(dtr_regions); 1448 SAVE_REGS(dtr_regions);
1456 SAVE_REGS(tc_regions); 1449 SAVE_REGS(tc_regions);
1457 SAVE_REGS(irq_check); 1450 SAVE_REGS(irq_check);
1458 SAVE_REGS(itc_check); 1451 SAVE_REGS(itc_check);
1459 SAVE_REGS(timer_check); 1452 SAVE_REGS(timer_check);
1460 SAVE_REGS(timer_pending); 1453 SAVE_REGS(timer_pending);
1461 SAVE_REGS(last_itc); 1454 SAVE_REGS(last_itc);
1462 for (i = 0; i < 8; i++) { 1455 for (i = 0; i < 8; i++) {
1463 regs->vrr[i] = vcpu->arch.vrr[i]; 1456 regs->vrr[i] = vcpu->arch.vrr[i];
1464 regs->ibr[i] = vcpu->arch.ibr[i]; 1457 regs->ibr[i] = vcpu->arch.ibr[i];
1465 regs->dbr[i] = vcpu->arch.dbr[i]; 1458 regs->dbr[i] = vcpu->arch.dbr[i];
1466 } 1459 }
1467 for (i = 0; i < 4; i++) 1460 for (i = 0; i < 4; i++)
1468 regs->insvc[i] = vcpu->arch.insvc[i]; 1461 regs->insvc[i] = vcpu->arch.insvc[i];
1469 regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu); 1462 regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
1470 SAVE_REGS(xtp); 1463 SAVE_REGS(xtp);
1471 SAVE_REGS(metaphysical_rr0); 1464 SAVE_REGS(metaphysical_rr0);
1472 SAVE_REGS(metaphysical_rr4); 1465 SAVE_REGS(metaphysical_rr4);
1473 SAVE_REGS(metaphysical_saved_rr0); 1466 SAVE_REGS(metaphysical_saved_rr0);
1474 SAVE_REGS(metaphysical_saved_rr4); 1467 SAVE_REGS(metaphysical_saved_rr4);
1475 SAVE_REGS(fp_psr); 1468 SAVE_REGS(fp_psr);
1476 SAVE_REGS(saved_gp); 1469 SAVE_REGS(saved_gp);
1477 1470
1478 vcpu_put(vcpu); 1471 vcpu_put(vcpu);
1479 return 0; 1472 return 0;
1480 } 1473 }
1481 1474
1482 int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu, 1475 int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
1483 struct kvm_ia64_vcpu_stack *stack) 1476 struct kvm_ia64_vcpu_stack *stack)
1484 { 1477 {
1485 memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack)); 1478 memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
1486 return 0; 1479 return 0;
1487 } 1480 }
1488 1481
1489 int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu, 1482 int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
1490 struct kvm_ia64_vcpu_stack *stack) 1483 struct kvm_ia64_vcpu_stack *stack)
1491 { 1484 {
1492 memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu), 1485 memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
1493 sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu)); 1486 sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
1494 1487
1495 vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data; 1488 vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
1496 return 0; 1489 return 0;
1497 } 1490 }
1498 1491
1499 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 1492 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1500 { 1493 {
1501 1494
1502 hrtimer_cancel(&vcpu->arch.hlt_timer); 1495 hrtimer_cancel(&vcpu->arch.hlt_timer);
1503 kfree(vcpu->arch.apic); 1496 kfree(vcpu->arch.apic);
1504 } 1497 }
1505 1498
1506 1499
1507 long kvm_arch_vcpu_ioctl(struct file *filp, 1500 long kvm_arch_vcpu_ioctl(struct file *filp,
1508 unsigned int ioctl, unsigned long arg) 1501 unsigned int ioctl, unsigned long arg)
1509 { 1502 {
1510 struct kvm_vcpu *vcpu = filp->private_data; 1503 struct kvm_vcpu *vcpu = filp->private_data;
1511 void __user *argp = (void __user *)arg; 1504 void __user *argp = (void __user *)arg;
1512 struct kvm_ia64_vcpu_stack *stack = NULL; 1505 struct kvm_ia64_vcpu_stack *stack = NULL;
1513 long r; 1506 long r;
1514 1507
1515 switch (ioctl) { 1508 switch (ioctl) {
1516 case KVM_IA64_VCPU_GET_STACK: { 1509 case KVM_IA64_VCPU_GET_STACK: {
1517 struct kvm_ia64_vcpu_stack __user *user_stack; 1510 struct kvm_ia64_vcpu_stack __user *user_stack;
1518 void __user *first_p = argp; 1511 void __user *first_p = argp;
1519 1512
1520 r = -EFAULT; 1513 r = -EFAULT;
1521 if (copy_from_user(&user_stack, first_p, sizeof(void *))) 1514 if (copy_from_user(&user_stack, first_p, sizeof(void *)))
1522 goto out; 1515 goto out;
1523 1516
1524 if (!access_ok(VERIFY_WRITE, user_stack, 1517 if (!access_ok(VERIFY_WRITE, user_stack,
1525 sizeof(struct kvm_ia64_vcpu_stack))) { 1518 sizeof(struct kvm_ia64_vcpu_stack))) {
1526 printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: " 1519 printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
1527 "Illegal user destination address for stack\n"); 1520 "Illegal user destination address for stack\n");
1528 goto out; 1521 goto out;
1529 } 1522 }
1530 stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); 1523 stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
1531 if (!stack) { 1524 if (!stack) {
1532 r = -ENOMEM; 1525 r = -ENOMEM;
1533 goto out; 1526 goto out;
1534 } 1527 }
1535 1528
1536 r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack); 1529 r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
1537 if (r) 1530 if (r)
1538 goto out; 1531 goto out;
1539 1532
1540 if (copy_to_user(user_stack, stack, 1533 if (copy_to_user(user_stack, stack,
1541 sizeof(struct kvm_ia64_vcpu_stack))) { 1534 sizeof(struct kvm_ia64_vcpu_stack))) {
1542 r = -EFAULT; 1535 r = -EFAULT;
1543 goto out; 1536 goto out;
1544 } 1537 }
1545 1538
1546 break; 1539 break;
1547 } 1540 }
1548 case KVM_IA64_VCPU_SET_STACK: { 1541 case KVM_IA64_VCPU_SET_STACK: {
1549 struct kvm_ia64_vcpu_stack __user *user_stack; 1542 struct kvm_ia64_vcpu_stack __user *user_stack;
1550 void __user *first_p = argp; 1543 void __user *first_p = argp;
1551 1544
1552 r = -EFAULT; 1545 r = -EFAULT;
1553 if (copy_from_user(&user_stack, first_p, sizeof(void *))) 1546 if (copy_from_user(&user_stack, first_p, sizeof(void *)))
1554 goto out; 1547 goto out;
1555 1548
1556 if (!access_ok(VERIFY_READ, user_stack, 1549 if (!access_ok(VERIFY_READ, user_stack,
1557 sizeof(struct kvm_ia64_vcpu_stack))) { 1550 sizeof(struct kvm_ia64_vcpu_stack))) {
1558 printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: " 1551 printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
1559 "Illegal user address for stack\n"); 1552 "Illegal user address for stack\n");
1560 goto out; 1553 goto out;
1561 } 1554 }
1562 stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); 1555 stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
1563 if (!stack) { 1556 if (!stack) {
1564 r = -ENOMEM; 1557 r = -ENOMEM;
1565 goto out; 1558 goto out;
1566 } 1559 }
1567 if (copy_from_user(stack, user_stack, 1560 if (copy_from_user(stack, user_stack,
1568 sizeof(struct kvm_ia64_vcpu_stack))) 1561 sizeof(struct kvm_ia64_vcpu_stack)))
1569 goto out; 1562 goto out;
1570 1563
1571 r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack); 1564 r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
1572 break; 1565 break;
1573 } 1566 }
1574 1567
1575 default: 1568 default:
1576 r = -EINVAL; 1569 r = -EINVAL;
1577 } 1570 }
1578 1571
1579 out: 1572 out:
1580 kfree(stack); 1573 kfree(stack);
1581 return r; 1574 return r;
1582 } 1575 }
1583 1576
1584 int kvm_arch_prepare_memory_region(struct kvm *kvm, 1577 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1585 struct kvm_memory_slot *memslot, 1578 struct kvm_memory_slot *memslot,
1586 struct kvm_memory_slot old, 1579 struct kvm_memory_slot old,
1587 struct kvm_userspace_memory_region *mem, 1580 struct kvm_userspace_memory_region *mem,
1588 int user_alloc) 1581 int user_alloc)
1589 { 1582 {
1590 unsigned long i; 1583 unsigned long i;
1591 unsigned long pfn; 1584 unsigned long pfn;
1592 int npages = memslot->npages; 1585 int npages = memslot->npages;
1593 unsigned long base_gfn = memslot->base_gfn; 1586 unsigned long base_gfn = memslot->base_gfn;
1594 1587
1595 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) 1588 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
1596 return -ENOMEM; 1589 return -ENOMEM;
1597 1590
1598 for (i = 0; i < npages; i++) { 1591 for (i = 0; i < npages; i++) {
1599 pfn = gfn_to_pfn(kvm, base_gfn + i); 1592 pfn = gfn_to_pfn(kvm, base_gfn + i);
1600 if (!kvm_is_mmio_pfn(pfn)) { 1593 if (!kvm_is_mmio_pfn(pfn)) {
1601 kvm_set_pmt_entry(kvm, base_gfn + i, 1594 kvm_set_pmt_entry(kvm, base_gfn + i,
1602 pfn << PAGE_SHIFT, 1595 pfn << PAGE_SHIFT,
1603 _PAGE_AR_RWX | _PAGE_MA_WB); 1596 _PAGE_AR_RWX | _PAGE_MA_WB);
1604 memslot->rmap[i] = (unsigned long)pfn_to_page(pfn); 1597 memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
1605 } else { 1598 } else {
1606 kvm_set_pmt_entry(kvm, base_gfn + i, 1599 kvm_set_pmt_entry(kvm, base_gfn + i,
1607 GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT), 1600 GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
1608 _PAGE_MA_UC); 1601 _PAGE_MA_UC);
1609 memslot->rmap[i] = 0; 1602 memslot->rmap[i] = 0;
1610 } 1603 }
1611 } 1604 }
1612 1605
1613 return 0; 1606 return 0;
1614 } 1607 }
1615 1608
1616 void kvm_arch_commit_memory_region(struct kvm *kvm, 1609 void kvm_arch_commit_memory_region(struct kvm *kvm,
1617 struct kvm_userspace_memory_region *mem, 1610 struct kvm_userspace_memory_region *mem,
1618 struct kvm_memory_slot old, 1611 struct kvm_memory_slot old,
1619 int user_alloc) 1612 int user_alloc)
1620 { 1613 {
1621 return; 1614 return;
1622 } 1615 }
1623 1616
1624 void kvm_arch_flush_shadow(struct kvm *kvm) 1617 void kvm_arch_flush_shadow(struct kvm *kvm)
1625 { 1618 {
1626 kvm_flush_remote_tlbs(kvm); 1619 kvm_flush_remote_tlbs(kvm);
1627 } 1620 }
1628 1621
1629 long kvm_arch_dev_ioctl(struct file *filp, 1622 long kvm_arch_dev_ioctl(struct file *filp,
1630 unsigned int ioctl, unsigned long arg) 1623 unsigned int ioctl, unsigned long arg)
1631 { 1624 {
1632 return -EINVAL; 1625 return -EINVAL;
1633 } 1626 }
1634 1627
1635 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 1628 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1636 { 1629 {
1637 kvm_vcpu_uninit(vcpu); 1630 kvm_vcpu_uninit(vcpu);
1638 } 1631 }
1639 1632
1640 static int vti_cpu_has_kvm_support(void) 1633 static int vti_cpu_has_kvm_support(void)
1641 { 1634 {
1642 long avail = 1, status = 1, control = 1; 1635 long avail = 1, status = 1, control = 1;
1643 long ret; 1636 long ret;
1644 1637
1645 ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); 1638 ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
1646 if (ret) 1639 if (ret)
1647 goto out; 1640 goto out;
1648 1641
1649 if (!(avail & PAL_PROC_VM_BIT)) 1642 if (!(avail & PAL_PROC_VM_BIT))
1650 goto out; 1643 goto out;
1651 1644
1652 printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); 1645 printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
1653 1646
1654 ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); 1647 ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
1655 if (ret) 1648 if (ret)
1656 goto out; 1649 goto out;
1657 printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); 1650 printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
1658 1651
1659 if (!(vp_env_info & VP_OPCODE)) { 1652 if (!(vp_env_info & VP_OPCODE)) {
1660 printk(KERN_WARNING"kvm: No opcode ability on hardware, " 1653 printk(KERN_WARNING"kvm: No opcode ability on hardware, "
1661 "vm_env_info:0x%lx\n", vp_env_info); 1654 "vm_env_info:0x%lx\n", vp_env_info);
1662 } 1655 }
1663 1656
1664 return 1; 1657 return 1;
1665 out: 1658 out:
1666 return 0; 1659 return 0;
1667 } 1660 }
1668 1661
1669 1662
1670 /* 1663 /*
1671 * On SN2, the ITC isn't stable, so copy in fast path code to use the 1664 * On SN2, the ITC isn't stable, so copy in fast path code to use the
1672 * SN2 RTC, replacing the ITC based default verion. 1665 * SN2 RTC, replacing the ITC based default verion.
1673 */ 1666 */
1674 static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info, 1667 static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
1675 struct module *module) 1668 struct module *module)
1676 { 1669 {
1677 unsigned long new_ar, new_ar_sn2; 1670 unsigned long new_ar, new_ar_sn2;
1678 unsigned long module_base; 1671 unsigned long module_base;
1679 1672
1680 if (!ia64_platform_is("sn2")) 1673 if (!ia64_platform_is("sn2"))
1681 return; 1674 return;
1682 1675
1683 module_base = (unsigned long)module->module_core; 1676 module_base = (unsigned long)module->module_core;
1684 1677
1685 new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base; 1678 new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
1686 new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base; 1679 new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
1687 1680
1688 printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC " 1681 printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
1689 "as source\n"); 1682 "as source\n");
1690 1683
1691 /* 1684 /*
1692 * Copy the SN2 version of mov_ar into place. They are both 1685 * Copy the SN2 version of mov_ar into place. They are both
1693 * the same size, so 6 bundles is sufficient (6 * 0x10). 1686 * the same size, so 6 bundles is sufficient (6 * 0x10).
1694 */ 1687 */
1695 memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60); 1688 memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
1696 } 1689 }
1697 1690
1698 static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, 1691 static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
1699 struct module *module) 1692 struct module *module)
1700 { 1693 {
1701 unsigned long module_base; 1694 unsigned long module_base;
1702 unsigned long vmm_size; 1695 unsigned long vmm_size;
1703 1696
1704 unsigned long vmm_offset, func_offset, fdesc_offset; 1697 unsigned long vmm_offset, func_offset, fdesc_offset;
1705 struct fdesc *p_fdesc; 1698 struct fdesc *p_fdesc;
1706 1699
1707 BUG_ON(!module); 1700 BUG_ON(!module);
1708 1701
1709 if (!kvm_vmm_base) { 1702 if (!kvm_vmm_base) {
1710 printk("kvm: kvm area hasn't been initilized yet!!\n"); 1703 printk("kvm: kvm area hasn't been initilized yet!!\n");
1711 return -EFAULT; 1704 return -EFAULT;
1712 } 1705 }
1713 1706
1714 /*Calculate new position of relocated vmm module.*/ 1707 /*Calculate new position of relocated vmm module.*/
1715 module_base = (unsigned long)module->module_core; 1708 module_base = (unsigned long)module->module_core;
1716 vmm_size = module->core_size; 1709 vmm_size = module->core_size;
1717 if (unlikely(vmm_size > KVM_VMM_SIZE)) 1710 if (unlikely(vmm_size > KVM_VMM_SIZE))
1718 return -EFAULT; 1711 return -EFAULT;
1719 1712
1720 memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); 1713 memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
1721 kvm_patch_vmm(vmm_info, module); 1714 kvm_patch_vmm(vmm_info, module);
1722 kvm_flush_icache(kvm_vmm_base, vmm_size); 1715 kvm_flush_icache(kvm_vmm_base, vmm_size);
1723 1716
1724 /*Recalculate kvm_vmm_info based on new VMM*/ 1717 /*Recalculate kvm_vmm_info based on new VMM*/
1725 vmm_offset = vmm_info->vmm_ivt - module_base; 1718 vmm_offset = vmm_info->vmm_ivt - module_base;
1726 kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; 1719 kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
1727 printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", 1720 printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
1728 kvm_vmm_info->vmm_ivt); 1721 kvm_vmm_info->vmm_ivt);
1729 1722
1730 fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; 1723 fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
1731 kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + 1724 kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
1732 fdesc_offset); 1725 fdesc_offset);
1733 func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; 1726 func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
1734 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); 1727 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1735 p_fdesc->ip = KVM_VMM_BASE + func_offset; 1728 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1736 p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); 1729 p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
1737 1730
1738 printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", 1731 printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
1739 KVM_VMM_BASE+func_offset); 1732 KVM_VMM_BASE+func_offset);
1740 1733
1741 fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; 1734 fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
1742 kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + 1735 kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
1743 fdesc_offset); 1736 fdesc_offset);
1744 func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; 1737 func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
1745 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); 1738 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1746 p_fdesc->ip = KVM_VMM_BASE + func_offset; 1739 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1747 p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); 1740 p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
1748 1741
1749 kvm_vmm_gp = p_fdesc->gp; 1742 kvm_vmm_gp = p_fdesc->gp;
1750 1743
1751 printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", 1744 printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
1752 kvm_vmm_info->vmm_entry); 1745 kvm_vmm_info->vmm_entry);
1753 printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", 1746 printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
1754 KVM_VMM_BASE + func_offset); 1747 KVM_VMM_BASE + func_offset);
1755 1748
1756 return 0; 1749 return 0;
1757 } 1750 }
1758 1751
1759 int kvm_arch_init(void *opaque) 1752 int kvm_arch_init(void *opaque)
1760 { 1753 {
1761 int r; 1754 int r;
1762 struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; 1755 struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
1763 1756
1764 if (!vti_cpu_has_kvm_support()) { 1757 if (!vti_cpu_has_kvm_support()) {
1765 printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); 1758 printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
1766 r = -EOPNOTSUPP; 1759 r = -EOPNOTSUPP;
1767 goto out; 1760 goto out;
1768 } 1761 }
1769 1762
1770 if (kvm_vmm_info) { 1763 if (kvm_vmm_info) {
1771 printk(KERN_ERR "kvm: Already loaded VMM module!\n"); 1764 printk(KERN_ERR "kvm: Already loaded VMM module!\n");
1772 r = -EEXIST; 1765 r = -EEXIST;
1773 goto out; 1766 goto out;
1774 } 1767 }
1775 1768
1776 r = -ENOMEM; 1769 r = -ENOMEM;
1777 kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); 1770 kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
1778 if (!kvm_vmm_info) 1771 if (!kvm_vmm_info)
1779 goto out; 1772 goto out;
1780 1773
1781 if (kvm_alloc_vmm_area()) 1774 if (kvm_alloc_vmm_area())
1782 goto out_free0; 1775 goto out_free0;
1783 1776
1784 r = kvm_relocate_vmm(vmm_info, vmm_info->module); 1777 r = kvm_relocate_vmm(vmm_info, vmm_info->module);
1785 if (r) 1778 if (r)
1786 goto out_free1; 1779 goto out_free1;
1787 1780
1788 return 0; 1781 return 0;
1789 1782
1790 out_free1: 1783 out_free1:
1791 kvm_free_vmm_area(); 1784 kvm_free_vmm_area();
1792 out_free0: 1785 out_free0:
1793 kfree(kvm_vmm_info); 1786 kfree(kvm_vmm_info);
1794 out: 1787 out:
1795 return r; 1788 return r;
1796 } 1789 }
1797 1790
1798 void kvm_arch_exit(void) 1791 void kvm_arch_exit(void)
1799 { 1792 {
1800 kvm_free_vmm_area(); 1793 kvm_free_vmm_area();
1801 kfree(kvm_vmm_info); 1794 kfree(kvm_vmm_info);
1802 kvm_vmm_info = NULL; 1795 kvm_vmm_info = NULL;
1803 } 1796 }
1804 1797
1805 static int kvm_ia64_sync_dirty_log(struct kvm *kvm, 1798 static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1806 struct kvm_dirty_log *log) 1799 struct kvm_dirty_log *log)
1807 { 1800 {
1808 struct kvm_memory_slot *memslot; 1801 struct kvm_memory_slot *memslot;
1809 int r, i; 1802 int r, i;
1810 long base; 1803 long base;
1811 unsigned long n; 1804 unsigned long n;
1812 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + 1805 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
1813 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); 1806 offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
1814 1807
1815 r = -EINVAL; 1808 r = -EINVAL;
1816 if (log->slot >= KVM_MEMORY_SLOTS) 1809 if (log->slot >= KVM_MEMORY_SLOTS)
1817 goto out; 1810 goto out;
1818 1811
1819 memslot = &kvm->memslots->memslots[log->slot]; 1812 memslot = &kvm->memslots->memslots[log->slot];
1820 r = -ENOENT; 1813 r = -ENOENT;
1821 if (!memslot->dirty_bitmap) 1814 if (!memslot->dirty_bitmap)
1822 goto out; 1815 goto out;
1823 1816
1824 n = kvm_dirty_bitmap_bytes(memslot); 1817 n = kvm_dirty_bitmap_bytes(memslot);
1825 base = memslot->base_gfn / BITS_PER_LONG; 1818 base = memslot->base_gfn / BITS_PER_LONG;
1826 1819
1827 for (i = 0; i < n/sizeof(long); ++i) { 1820 for (i = 0; i < n/sizeof(long); ++i) {
1828 memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; 1821 memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
1829 dirty_bitmap[base + i] = 0; 1822 dirty_bitmap[base + i] = 0;
1830 } 1823 }
1831 r = 0; 1824 r = 0;
1832 out: 1825 out:
1833 return r; 1826 return r;
1834 } 1827 }
1835 1828
1836 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 1829 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1837 struct kvm_dirty_log *log) 1830 struct kvm_dirty_log *log)
1838 { 1831 {
1839 int r; 1832 int r;
1840 unsigned long n; 1833 unsigned long n;
1841 struct kvm_memory_slot *memslot; 1834 struct kvm_memory_slot *memslot;
1842 int is_dirty = 0; 1835 int is_dirty = 0;
1843 1836
1844 mutex_lock(&kvm->slots_lock); 1837 mutex_lock(&kvm->slots_lock);
1845 spin_lock(&kvm->arch.dirty_log_lock); 1838 spin_lock(&kvm->arch.dirty_log_lock);
1846 1839
1847 r = kvm_ia64_sync_dirty_log(kvm, log); 1840 r = kvm_ia64_sync_dirty_log(kvm, log);
1848 if (r) 1841 if (r)
1849 goto out; 1842 goto out;
1850 1843
1851 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1844 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1852 if (r) 1845 if (r)
1853 goto out; 1846 goto out;
1854 1847
1855 /* If nothing is dirty, don't bother messing with page tables. */ 1848 /* If nothing is dirty, don't bother messing with page tables. */
1856 if (is_dirty) { 1849 if (is_dirty) {
1857 kvm_flush_remote_tlbs(kvm); 1850 kvm_flush_remote_tlbs(kvm);
1858 memslot = &kvm->memslots->memslots[log->slot]; 1851 memslot = &kvm->memslots->memslots[log->slot];
1859 n = kvm_dirty_bitmap_bytes(memslot); 1852 n = kvm_dirty_bitmap_bytes(memslot);
1860 memset(memslot->dirty_bitmap, 0, n); 1853 memset(memslot->dirty_bitmap, 0, n);
1861 } 1854 }
1862 r = 0; 1855 r = 0;
1863 out: 1856 out:
1864 mutex_unlock(&kvm->slots_lock); 1857 mutex_unlock(&kvm->slots_lock);
1865 spin_unlock(&kvm->arch.dirty_log_lock); 1858 spin_unlock(&kvm->arch.dirty_log_lock);
1866 return r; 1859 return r;
1867 } 1860 }
1868 1861
1869 int kvm_arch_hardware_setup(void) 1862 int kvm_arch_hardware_setup(void)
1870 { 1863 {
1871 return 0; 1864 return 0;
1872 } 1865 }
1873 1866
1874 void kvm_arch_hardware_unsetup(void) 1867 void kvm_arch_hardware_unsetup(void)
1875 { 1868 {
1876 } 1869 }
1877 1870
1878 void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 1871 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
1879 { 1872 {
1880 int me; 1873 int me;
1881 int cpu = vcpu->cpu; 1874 int cpu = vcpu->cpu;
1882 1875
1883 if (waitqueue_active(&vcpu->wq)) 1876 if (waitqueue_active(&vcpu->wq))
1884 wake_up_interruptible(&vcpu->wq); 1877 wake_up_interruptible(&vcpu->wq);
1885 1878
1886 me = get_cpu(); 1879 me = get_cpu();
1887 if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu)) 1880 if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
1888 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) 1881 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
1889 smp_send_reschedule(cpu); 1882 smp_send_reschedule(cpu);
1890 put_cpu(); 1883 put_cpu();
1891 } 1884 }
1892 1885
1893 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 1886 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
1894 { 1887 {
1895 return __apic_accept_irq(vcpu, irq->vector); 1888 return __apic_accept_irq(vcpu, irq->vector);
1896 } 1889 }
1897 1890
1898 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 1891 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
1899 { 1892 {
1900 return apic->vcpu->vcpu_id == dest; 1893 return apic->vcpu->vcpu_id == dest;
1901 } 1894 }
1902 1895
1903 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 1896 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
1904 { 1897 {
1905 return 0; 1898 return 0;
1906 } 1899 }
1907 1900
1908 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1901 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1909 { 1902 {
1910 return vcpu1->arch.xtp - vcpu2->arch.xtp; 1903 return vcpu1->arch.xtp - vcpu2->arch.xtp;
1911 } 1904 }
1912 1905
1913 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 1906 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
1914 int short_hand, int dest, int dest_mode) 1907 int short_hand, int dest, int dest_mode)
1915 { 1908 {
1916 struct kvm_lapic *target = vcpu->arch.apic; 1909 struct kvm_lapic *target = vcpu->arch.apic;
1917 return (dest_mode == 0) ? 1910 return (dest_mode == 0) ?
1918 kvm_apic_match_physical_addr(target, dest) : 1911 kvm_apic_match_physical_addr(target, dest) :
1919 kvm_apic_match_logical_addr(target, dest); 1912 kvm_apic_match_logical_addr(target, dest);
1920 } 1913 }
1921 1914
1922 static int find_highest_bits(int *dat) 1915 static int find_highest_bits(int *dat)
1923 { 1916 {
1924 u32 bits, bitnum; 1917 u32 bits, bitnum;
1925 int i; 1918 int i;
1926 1919
1927 /* loop for all 256 bits */ 1920 /* loop for all 256 bits */
1928 for (i = 7; i >= 0 ; i--) { 1921 for (i = 7; i >= 0 ; i--) {
1929 bits = dat[i]; 1922 bits = dat[i];
1930 if (bits) { 1923 if (bits) {
1931 bitnum = fls(bits); 1924 bitnum = fls(bits);
1932 return i * 32 + bitnum - 1; 1925 return i * 32 + bitnum - 1;
1933 } 1926 }
1934 } 1927 }
1935 1928
1936 return -1; 1929 return -1;
1937 } 1930 }
1938 1931
1939 int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) 1932 int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
1940 { 1933 {
1941 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1934 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1942 1935
1943 if (vpd->irr[0] & (1UL << NMI_VECTOR)) 1936 if (vpd->irr[0] & (1UL << NMI_VECTOR))
1944 return NMI_VECTOR; 1937 return NMI_VECTOR;
1945 if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) 1938 if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
1946 return ExtINT_VECTOR; 1939 return ExtINT_VECTOR;
1947 1940
1948 return find_highest_bits((int *)&vpd->irr[0]); 1941 return find_highest_bits((int *)&vpd->irr[0]);
1949 } 1942 }
1950 1943
1951 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 1944 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
1952 { 1945 {
1953 return vcpu->arch.timer_fired; 1946 return vcpu->arch.timer_fired;
1954 } 1947 }
1955 1948
1956 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 1949 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
1957 { 1950 {
1958 return gfn; 1951 return gfn;
1959 } 1952 }
1960 1953
1961 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 1954 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1962 { 1955 {
1963 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || 1956 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
1964 (kvm_highest_pending_irq(vcpu) != -1); 1957 (kvm_highest_pending_irq(vcpu) != -1);
1965 } 1958 }
1966 1959
1967 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 1960 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1968 struct kvm_mp_state *mp_state) 1961 struct kvm_mp_state *mp_state)
1969 { 1962 {
1970 vcpu_load(vcpu);
1971 mp_state->mp_state = vcpu->arch.mp_state; 1963 mp_state->mp_state = vcpu->arch.mp_state;
1972 vcpu_put(vcpu);
1973 return 0; 1964 return 0;
1974 } 1965 }
1975 1966
1976 static int vcpu_reset(struct kvm_vcpu *vcpu) 1967 static int vcpu_reset(struct kvm_vcpu *vcpu)
1977 { 1968 {
1978 int r; 1969 int r;
1979 long psr; 1970 long psr;
1980 local_irq_save(psr); 1971 local_irq_save(psr);
1981 r = kvm_insert_vmm_mapping(vcpu); 1972 r = kvm_insert_vmm_mapping(vcpu);
1982 local_irq_restore(psr); 1973 local_irq_restore(psr);
1983 if (r) 1974 if (r)
1984 goto fail; 1975 goto fail;
1985 1976
1986 vcpu->arch.launched = 0; 1977 vcpu->arch.launched = 0;
1987 kvm_arch_vcpu_uninit(vcpu); 1978 kvm_arch_vcpu_uninit(vcpu);
1988 r = kvm_arch_vcpu_init(vcpu); 1979 r = kvm_arch_vcpu_init(vcpu);
1989 if (r) 1980 if (r)
1990 goto fail; 1981 goto fail;
1991 1982
1992 kvm_purge_vmm_mapping(vcpu); 1983 kvm_purge_vmm_mapping(vcpu);
1993 r = 0; 1984 r = 0;
1994 fail: 1985 fail:
1995 return r; 1986 return r;
1996 } 1987 }
1997 1988
1998 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 1989 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1999 struct kvm_mp_state *mp_state) 1990 struct kvm_mp_state *mp_state)
2000 { 1991 {
2001 int r = 0; 1992 int r = 0;
2002 1993
2003 vcpu_load(vcpu);
2004 vcpu->arch.mp_state = mp_state->mp_state; 1994 vcpu->arch.mp_state = mp_state->mp_state;
2005 if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) 1995 if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
2006 r = vcpu_reset(vcpu); 1996 r = vcpu_reset(vcpu);
2007 vcpu_put(vcpu);
2008 return r; 1997 return r;
2009 } 1998 }
2010 1999
arch/powerpc/kvm/book3s.c
1 /* 1 /*
2 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. 2 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
3 * 3 *
4 * Authors: 4 * Authors:
5 * Alexander Graf <agraf@suse.de> 5 * Alexander Graf <agraf@suse.de>
6 * Kevin Wolf <mail@kevin-wolf.de> 6 * Kevin Wolf <mail@kevin-wolf.de>
7 * 7 *
8 * Description: 8 * Description:
9 * This file is derived from arch/powerpc/kvm/44x.c, 9 * This file is derived from arch/powerpc/kvm/44x.c,
10 * by Hollis Blanchard <hollisb@us.ibm.com>. 10 * by Hollis Blanchard <hollisb@us.ibm.com>.
11 * 11 *
12 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License, version 2, as 13 * it under the terms of the GNU General Public License, version 2, as
14 * published by the Free Software Foundation. 14 * published by the Free Software Foundation.
15 */ 15 */
16 16
17 #include <linux/kvm_host.h> 17 #include <linux/kvm_host.h>
18 #include <linux/err.h> 18 #include <linux/err.h>
19 #include <linux/slab.h> 19 #include <linux/slab.h>
20 20
21 #include <asm/reg.h> 21 #include <asm/reg.h>
22 #include <asm/cputable.h> 22 #include <asm/cputable.h>
23 #include <asm/cacheflush.h> 23 #include <asm/cacheflush.h>
24 #include <asm/tlbflush.h> 24 #include <asm/tlbflush.h>
25 #include <asm/uaccess.h> 25 #include <asm/uaccess.h>
26 #include <asm/io.h> 26 #include <asm/io.h>
27 #include <asm/kvm_ppc.h> 27 #include <asm/kvm_ppc.h>
28 #include <asm/kvm_book3s.h> 28 #include <asm/kvm_book3s.h>
29 #include <asm/mmu_context.h> 29 #include <asm/mmu_context.h>
30 #include <linux/gfp.h> 30 #include <linux/gfp.h>
31 #include <linux/sched.h> 31 #include <linux/sched.h>
32 #include <linux/vmalloc.h> 32 #include <linux/vmalloc.h>
33 #include <linux/highmem.h> 33 #include <linux/highmem.h>
34 34
35 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 35 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
36 36
37 /* #define EXIT_DEBUG */ 37 /* #define EXIT_DEBUG */
38 /* #define EXIT_DEBUG_SIMPLE */ 38 /* #define EXIT_DEBUG_SIMPLE */
39 /* #define DEBUG_EXT */ 39 /* #define DEBUG_EXT */
40 40
41 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 41 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
42 ulong msr); 42 ulong msr);
43 43
44 /* Some compatibility defines */ 44 /* Some compatibility defines */
45 #ifdef CONFIG_PPC_BOOK3S_32 45 #ifdef CONFIG_PPC_BOOK3S_32
46 #define MSR_USER32 MSR_USER 46 #define MSR_USER32 MSR_USER
47 #define MSR_USER64 MSR_USER 47 #define MSR_USER64 MSR_USER
48 #define HW_PAGE_SIZE PAGE_SIZE 48 #define HW_PAGE_SIZE PAGE_SIZE
49 #endif 49 #endif
50 50
51 struct kvm_stats_debugfs_item debugfs_entries[] = { 51 struct kvm_stats_debugfs_item debugfs_entries[] = {
52 { "exits", VCPU_STAT(sum_exits) }, 52 { "exits", VCPU_STAT(sum_exits) },
53 { "mmio", VCPU_STAT(mmio_exits) }, 53 { "mmio", VCPU_STAT(mmio_exits) },
54 { "sig", VCPU_STAT(signal_exits) }, 54 { "sig", VCPU_STAT(signal_exits) },
55 { "sysc", VCPU_STAT(syscall_exits) }, 55 { "sysc", VCPU_STAT(syscall_exits) },
56 { "inst_emu", VCPU_STAT(emulated_inst_exits) }, 56 { "inst_emu", VCPU_STAT(emulated_inst_exits) },
57 { "dec", VCPU_STAT(dec_exits) }, 57 { "dec", VCPU_STAT(dec_exits) },
58 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 58 { "ext_intr", VCPU_STAT(ext_intr_exits) },
59 { "queue_intr", VCPU_STAT(queue_intr) }, 59 { "queue_intr", VCPU_STAT(queue_intr) },
60 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 60 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
61 { "pf_storage", VCPU_STAT(pf_storage) }, 61 { "pf_storage", VCPU_STAT(pf_storage) },
62 { "sp_storage", VCPU_STAT(sp_storage) }, 62 { "sp_storage", VCPU_STAT(sp_storage) },
63 { "pf_instruc", VCPU_STAT(pf_instruc) }, 63 { "pf_instruc", VCPU_STAT(pf_instruc) },
64 { "sp_instruc", VCPU_STAT(sp_instruc) }, 64 { "sp_instruc", VCPU_STAT(sp_instruc) },
65 { "ld", VCPU_STAT(ld) }, 65 { "ld", VCPU_STAT(ld) },
66 { "ld_slow", VCPU_STAT(ld_slow) }, 66 { "ld_slow", VCPU_STAT(ld_slow) },
67 { "st", VCPU_STAT(st) }, 67 { "st", VCPU_STAT(st) },
68 { "st_slow", VCPU_STAT(st_slow) }, 68 { "st_slow", VCPU_STAT(st_slow) },
69 { NULL } 69 { NULL }
70 }; 70 };
71 71
72 void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) 72 void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
73 { 73 {
74 } 74 }
75 75
76 void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) 76 void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
77 { 77 {
78 } 78 }
79 79
80 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 80 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
81 { 81 {
82 #ifdef CONFIG_PPC_BOOK3S_64 82 #ifdef CONFIG_PPC_BOOK3S_64
83 memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb)); 83 memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
84 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, 84 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
85 sizeof(get_paca()->shadow_vcpu)); 85 sizeof(get_paca()->shadow_vcpu));
86 to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max; 86 to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
87 #endif 87 #endif
88 88
89 #ifdef CONFIG_PPC_BOOK3S_32 89 #ifdef CONFIG_PPC_BOOK3S_32
90 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; 90 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
91 #endif 91 #endif
92 } 92 }
93 93
94 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 94 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
95 { 95 {
96 #ifdef CONFIG_PPC_BOOK3S_64 96 #ifdef CONFIG_PPC_BOOK3S_64
97 memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb)); 97 memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
98 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 98 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
99 sizeof(get_paca()->shadow_vcpu)); 99 sizeof(get_paca()->shadow_vcpu));
100 to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max; 100 to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
101 #endif 101 #endif
102 102
103 kvmppc_giveup_ext(vcpu, MSR_FP); 103 kvmppc_giveup_ext(vcpu, MSR_FP);
104 kvmppc_giveup_ext(vcpu, MSR_VEC); 104 kvmppc_giveup_ext(vcpu, MSR_VEC);
105 kvmppc_giveup_ext(vcpu, MSR_VSX); 105 kvmppc_giveup_ext(vcpu, MSR_VSX);
106 } 106 }
107 107
108 #if defined(EXIT_DEBUG) 108 #if defined(EXIT_DEBUG)
109 static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) 109 static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
110 { 110 {
111 u64 jd = mftb() - vcpu->arch.dec_jiffies; 111 u64 jd = mftb() - vcpu->arch.dec_jiffies;
112 return vcpu->arch.dec - jd; 112 return vcpu->arch.dec - jd;
113 } 113 }
114 #endif 114 #endif
115 115
116 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) 116 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
117 { 117 {
118 vcpu->arch.shadow_msr = vcpu->arch.msr; 118 vcpu->arch.shadow_msr = vcpu->arch.msr;
119 /* Guest MSR values */ 119 /* Guest MSR values */
120 vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | 120 vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
121 MSR_BE | MSR_DE; 121 MSR_BE | MSR_DE;
122 /* Process MSR values */ 122 /* Process MSR values */
123 vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | 123 vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
124 MSR_EE; 124 MSR_EE;
125 /* External providers the guest reserved */ 125 /* External providers the guest reserved */
126 vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext); 126 vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
127 /* 64-bit Process MSR values */ 127 /* 64-bit Process MSR values */
128 #ifdef CONFIG_PPC_BOOK3S_64 128 #ifdef CONFIG_PPC_BOOK3S_64
129 vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV; 129 vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
130 #endif 130 #endif
131 } 131 }
132 132
133 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 133 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
134 { 134 {
135 ulong old_msr = vcpu->arch.msr; 135 ulong old_msr = vcpu->arch.msr;
136 136
137 #ifdef EXIT_DEBUG 137 #ifdef EXIT_DEBUG
138 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); 138 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
139 #endif 139 #endif
140 140
141 msr &= to_book3s(vcpu)->msr_mask; 141 msr &= to_book3s(vcpu)->msr_mask;
142 vcpu->arch.msr = msr; 142 vcpu->arch.msr = msr;
143 kvmppc_recalc_shadow_msr(vcpu); 143 kvmppc_recalc_shadow_msr(vcpu);
144 144
145 if (msr & (MSR_WE|MSR_POW)) { 145 if (msr & (MSR_WE|MSR_POW)) {
146 if (!vcpu->arch.pending_exceptions) { 146 if (!vcpu->arch.pending_exceptions) {
147 kvm_vcpu_block(vcpu); 147 kvm_vcpu_block(vcpu);
148 vcpu->stat.halt_wakeup++; 148 vcpu->stat.halt_wakeup++;
149 } 149 }
150 } 150 }
151 151
152 if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) != 152 if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) !=
153 (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { 153 (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
154 kvmppc_mmu_flush_segments(vcpu); 154 kvmppc_mmu_flush_segments(vcpu);
155 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 155 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
156 } 156 }
157 157
158 /* Preload FPU if it's enabled */ 158 /* Preload FPU if it's enabled */
159 if (vcpu->arch.msr & MSR_FP) 159 if (vcpu->arch.msr & MSR_FP)
160 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 160 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
161 } 161 }
162 162
163 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) 163 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
164 { 164 {
165 vcpu->arch.srr0 = kvmppc_get_pc(vcpu); 165 vcpu->arch.srr0 = kvmppc_get_pc(vcpu);
166 vcpu->arch.srr1 = vcpu->arch.msr | flags; 166 vcpu->arch.srr1 = vcpu->arch.msr | flags;
167 kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec); 167 kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
168 vcpu->arch.mmu.reset_msr(vcpu); 168 vcpu->arch.mmu.reset_msr(vcpu);
169 } 169 }
170 170
171 static int kvmppc_book3s_vec2irqprio(unsigned int vec) 171 static int kvmppc_book3s_vec2irqprio(unsigned int vec)
172 { 172 {
173 unsigned int prio; 173 unsigned int prio;
174 174
175 switch (vec) { 175 switch (vec) {
176 case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break; 176 case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break;
177 case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break; 177 case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break;
178 case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE; break; 178 case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE; break;
179 case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT; break; 179 case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT; break;
180 case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; 180 case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
181 case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; 181 case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
182 case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; 182 case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
183 case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; 183 case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
184 case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; 184 case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
185 case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; 185 case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
186 case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER; break; 186 case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER; break;
187 case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL; break; 187 case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL; break;
188 case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break; 188 case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break;
189 case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break; 189 case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break;
190 case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break; 190 case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break;
191 default: prio = BOOK3S_IRQPRIO_MAX; break; 191 default: prio = BOOK3S_IRQPRIO_MAX; break;
192 } 192 }
193 193
194 return prio; 194 return prio;
195 } 195 }
196 196
197 static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, 197 static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
198 unsigned int vec) 198 unsigned int vec)
199 { 199 {
200 clear_bit(kvmppc_book3s_vec2irqprio(vec), 200 clear_bit(kvmppc_book3s_vec2irqprio(vec),
201 &vcpu->arch.pending_exceptions); 201 &vcpu->arch.pending_exceptions);
202 } 202 }
203 203
204 void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) 204 void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
205 { 205 {
206 vcpu->stat.queue_intr++; 206 vcpu->stat.queue_intr++;
207 207
208 set_bit(kvmppc_book3s_vec2irqprio(vec), 208 set_bit(kvmppc_book3s_vec2irqprio(vec),
209 &vcpu->arch.pending_exceptions); 209 &vcpu->arch.pending_exceptions);
210 #ifdef EXIT_DEBUG 210 #ifdef EXIT_DEBUG
211 printk(KERN_INFO "Queueing interrupt %x\n", vec); 211 printk(KERN_INFO "Queueing interrupt %x\n", vec);
212 #endif 212 #endif
213 } 213 }
214 214
215 215
216 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) 216 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
217 { 217 {
218 to_book3s(vcpu)->prog_flags = flags; 218 to_book3s(vcpu)->prog_flags = flags;
219 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); 219 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
220 } 220 }
221 221
222 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 222 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
223 { 223 {
224 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 224 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
225 } 225 }
226 226
227 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) 227 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
228 { 228 {
229 return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); 229 return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
230 } 230 }
231 231
232 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) 232 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
233 { 233 {
234 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 234 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
235 } 235 }
236 236
237 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 237 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
238 struct kvm_interrupt *irq) 238 struct kvm_interrupt *irq)
239 { 239 {
240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
241 } 241 }
242 242
243 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, 243 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
244 struct kvm_interrupt *irq) 244 struct kvm_interrupt *irq)
245 { 245 {
246 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 246 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
247 } 247 }
248 248
249 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) 249 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
250 { 250 {
251 int deliver = 1; 251 int deliver = 1;
252 int vec = 0; 252 int vec = 0;
253 ulong flags = 0ULL; 253 ulong flags = 0ULL;
254 254
255 switch (priority) { 255 switch (priority) {
256 case BOOK3S_IRQPRIO_DECREMENTER: 256 case BOOK3S_IRQPRIO_DECREMENTER:
257 deliver = vcpu->arch.msr & MSR_EE; 257 deliver = vcpu->arch.msr & MSR_EE;
258 vec = BOOK3S_INTERRUPT_DECREMENTER; 258 vec = BOOK3S_INTERRUPT_DECREMENTER;
259 break; 259 break;
260 case BOOK3S_IRQPRIO_EXTERNAL: 260 case BOOK3S_IRQPRIO_EXTERNAL:
261 deliver = vcpu->arch.msr & MSR_EE; 261 deliver = vcpu->arch.msr & MSR_EE;
262 vec = BOOK3S_INTERRUPT_EXTERNAL; 262 vec = BOOK3S_INTERRUPT_EXTERNAL;
263 break; 263 break;
264 case BOOK3S_IRQPRIO_SYSTEM_RESET: 264 case BOOK3S_IRQPRIO_SYSTEM_RESET:
265 vec = BOOK3S_INTERRUPT_SYSTEM_RESET; 265 vec = BOOK3S_INTERRUPT_SYSTEM_RESET;
266 break; 266 break;
267 case BOOK3S_IRQPRIO_MACHINE_CHECK: 267 case BOOK3S_IRQPRIO_MACHINE_CHECK:
268 vec = BOOK3S_INTERRUPT_MACHINE_CHECK; 268 vec = BOOK3S_INTERRUPT_MACHINE_CHECK;
269 break; 269 break;
270 case BOOK3S_IRQPRIO_DATA_STORAGE: 270 case BOOK3S_IRQPRIO_DATA_STORAGE:
271 vec = BOOK3S_INTERRUPT_DATA_STORAGE; 271 vec = BOOK3S_INTERRUPT_DATA_STORAGE;
272 break; 272 break;
273 case BOOK3S_IRQPRIO_INST_STORAGE: 273 case BOOK3S_IRQPRIO_INST_STORAGE:
274 vec = BOOK3S_INTERRUPT_INST_STORAGE; 274 vec = BOOK3S_INTERRUPT_INST_STORAGE;
275 break; 275 break;
276 case BOOK3S_IRQPRIO_DATA_SEGMENT: 276 case BOOK3S_IRQPRIO_DATA_SEGMENT:
277 vec = BOOK3S_INTERRUPT_DATA_SEGMENT; 277 vec = BOOK3S_INTERRUPT_DATA_SEGMENT;
278 break; 278 break;
279 case BOOK3S_IRQPRIO_INST_SEGMENT: 279 case BOOK3S_IRQPRIO_INST_SEGMENT:
280 vec = BOOK3S_INTERRUPT_INST_SEGMENT; 280 vec = BOOK3S_INTERRUPT_INST_SEGMENT;
281 break; 281 break;
282 case BOOK3S_IRQPRIO_ALIGNMENT: 282 case BOOK3S_IRQPRIO_ALIGNMENT:
283 vec = BOOK3S_INTERRUPT_ALIGNMENT; 283 vec = BOOK3S_INTERRUPT_ALIGNMENT;
284 break; 284 break;
285 case BOOK3S_IRQPRIO_PROGRAM: 285 case BOOK3S_IRQPRIO_PROGRAM:
286 vec = BOOK3S_INTERRUPT_PROGRAM; 286 vec = BOOK3S_INTERRUPT_PROGRAM;
287 flags = to_book3s(vcpu)->prog_flags; 287 flags = to_book3s(vcpu)->prog_flags;
288 break; 288 break;
289 case BOOK3S_IRQPRIO_VSX: 289 case BOOK3S_IRQPRIO_VSX:
290 vec = BOOK3S_INTERRUPT_VSX; 290 vec = BOOK3S_INTERRUPT_VSX;
291 break; 291 break;
292 case BOOK3S_IRQPRIO_ALTIVEC: 292 case BOOK3S_IRQPRIO_ALTIVEC:
293 vec = BOOK3S_INTERRUPT_ALTIVEC; 293 vec = BOOK3S_INTERRUPT_ALTIVEC;
294 break; 294 break;
295 case BOOK3S_IRQPRIO_FP_UNAVAIL: 295 case BOOK3S_IRQPRIO_FP_UNAVAIL:
296 vec = BOOK3S_INTERRUPT_FP_UNAVAIL; 296 vec = BOOK3S_INTERRUPT_FP_UNAVAIL;
297 break; 297 break;
298 case BOOK3S_IRQPRIO_SYSCALL: 298 case BOOK3S_IRQPRIO_SYSCALL:
299 vec = BOOK3S_INTERRUPT_SYSCALL; 299 vec = BOOK3S_INTERRUPT_SYSCALL;
300 break; 300 break;
301 case BOOK3S_IRQPRIO_DEBUG: 301 case BOOK3S_IRQPRIO_DEBUG:
302 vec = BOOK3S_INTERRUPT_TRACE; 302 vec = BOOK3S_INTERRUPT_TRACE;
303 break; 303 break;
304 case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR: 304 case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
305 vec = BOOK3S_INTERRUPT_PERFMON; 305 vec = BOOK3S_INTERRUPT_PERFMON;
306 break; 306 break;
307 default: 307 default:
308 deliver = 0; 308 deliver = 0;
309 printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority); 309 printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
310 break; 310 break;
311 } 311 }
312 312
313 #if 0 313 #if 0
314 printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver); 314 printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver);
315 #endif 315 #endif
316 316
317 if (deliver) 317 if (deliver)
318 kvmppc_inject_interrupt(vcpu, vec, flags); 318 kvmppc_inject_interrupt(vcpu, vec, flags);
319 319
320 return deliver; 320 return deliver;
321 } 321 }
322 322
323 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) 323 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
324 { 324 {
325 unsigned long *pending = &vcpu->arch.pending_exceptions; 325 unsigned long *pending = &vcpu->arch.pending_exceptions;
326 unsigned int priority; 326 unsigned int priority;
327 327
328 #ifdef EXIT_DEBUG 328 #ifdef EXIT_DEBUG
329 if (vcpu->arch.pending_exceptions) 329 if (vcpu->arch.pending_exceptions)
330 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); 330 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
331 #endif 331 #endif
332 priority = __ffs(*pending); 332 priority = __ffs(*pending);
333 while (priority < BOOK3S_IRQPRIO_MAX) { 333 while (priority < BOOK3S_IRQPRIO_MAX) {
334 if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && 334 if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
335 (priority != BOOK3S_IRQPRIO_DECREMENTER)) { 335 (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
336 /* DEC interrupts get cleared by mtdec */ 336 /* DEC interrupts get cleared by mtdec */
337 clear_bit(priority, &vcpu->arch.pending_exceptions); 337 clear_bit(priority, &vcpu->arch.pending_exceptions);
338 break; 338 break;
339 } 339 }
340 340
341 priority = find_next_bit(pending, 341 priority = find_next_bit(pending,
342 BITS_PER_BYTE * sizeof(*pending), 342 BITS_PER_BYTE * sizeof(*pending),
343 priority + 1); 343 priority + 1);
344 } 344 }
345 } 345 }
346 346
347 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 347 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
348 { 348 {
349 u32 host_pvr; 349 u32 host_pvr;
350 350
351 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; 351 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
352 vcpu->arch.pvr = pvr; 352 vcpu->arch.pvr = pvr;
353 #ifdef CONFIG_PPC_BOOK3S_64 353 #ifdef CONFIG_PPC_BOOK3S_64
354 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { 354 if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
355 kvmppc_mmu_book3s_64_init(vcpu); 355 kvmppc_mmu_book3s_64_init(vcpu);
356 to_book3s(vcpu)->hior = 0xfff00000; 356 to_book3s(vcpu)->hior = 0xfff00000;
357 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; 357 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
358 } else 358 } else
359 #endif 359 #endif
360 { 360 {
361 kvmppc_mmu_book3s_32_init(vcpu); 361 kvmppc_mmu_book3s_32_init(vcpu);
362 to_book3s(vcpu)->hior = 0; 362 to_book3s(vcpu)->hior = 0;
363 to_book3s(vcpu)->msr_mask = 0xffffffffULL; 363 to_book3s(vcpu)->msr_mask = 0xffffffffULL;
364 } 364 }
365 365
366 /* If we are in hypervisor level on 970, we can tell the CPU to 366 /* If we are in hypervisor level on 970, we can tell the CPU to
367 * treat DCBZ as 32 bytes store */ 367 * treat DCBZ as 32 bytes store */
368 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; 368 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
369 if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) && 369 if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
370 !strcmp(cur_cpu_spec->platform, "ppc970")) 370 !strcmp(cur_cpu_spec->platform, "ppc970"))
371 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 371 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
372 372
373 /* Cell performs badly if MSR_FEx are set. So let's hope nobody 373 /* Cell performs badly if MSR_FEx are set. So let's hope nobody
374 really needs them in a VM on Cell and force disable them. */ 374 really needs them in a VM on Cell and force disable them. */
375 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) 375 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
376 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); 376 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
377 377
378 #ifdef CONFIG_PPC_BOOK3S_32 378 #ifdef CONFIG_PPC_BOOK3S_32
379 /* 32 bit Book3S always has 32 byte dcbz */ 379 /* 32 bit Book3S always has 32 byte dcbz */
380 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 380 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
381 #endif 381 #endif
382 382
383 /* On some CPUs we can execute paired single operations natively */ 383 /* On some CPUs we can execute paired single operations natively */
384 asm ( "mfpvr %0" : "=r"(host_pvr)); 384 asm ( "mfpvr %0" : "=r"(host_pvr));
385 switch (host_pvr) { 385 switch (host_pvr) {
386 case 0x00080200: /* lonestar 2.0 */ 386 case 0x00080200: /* lonestar 2.0 */
387 case 0x00088202: /* lonestar 2.2 */ 387 case 0x00088202: /* lonestar 2.2 */
388 case 0x70000100: /* gekko 1.0 */ 388 case 0x70000100: /* gekko 1.0 */
389 case 0x00080100: /* gekko 2.0 */ 389 case 0x00080100: /* gekko 2.0 */
390 case 0x00083203: /* gekko 2.3a */ 390 case 0x00083203: /* gekko 2.3a */
391 case 0x00083213: /* gekko 2.3b */ 391 case 0x00083213: /* gekko 2.3b */
392 case 0x00083204: /* gekko 2.4 */ 392 case 0x00083204: /* gekko 2.4 */
393 case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ 393 case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
394 case 0x00087200: /* broadway */ 394 case 0x00087200: /* broadway */
395 vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; 395 vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
396 /* Enable HID2.PSE - in case we need it later */ 396 /* Enable HID2.PSE - in case we need it later */
397 mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); 397 mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
398 } 398 }
399 } 399 }
400 400
401 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To 401 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
402 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to 402 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
403 * emulate 32 bytes dcbz length. 403 * emulate 32 bytes dcbz length.
404 * 404 *
405 * The Book3s_64 inventors also realized this case and implemented a special bit 405 * The Book3s_64 inventors also realized this case and implemented a special bit
406 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it. 406 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
407 * 407 *
408 * My approach here is to patch the dcbz instruction on executing pages. 408 * My approach here is to patch the dcbz instruction on executing pages.
409 */ 409 */
410 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 410 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
411 { 411 {
412 struct page *hpage; 412 struct page *hpage;
413 u64 hpage_offset; 413 u64 hpage_offset;
414 u32 *page; 414 u32 *page;
415 int i; 415 int i;
416 416
417 hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 417 hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
418 if (is_error_page(hpage)) 418 if (is_error_page(hpage))
419 return; 419 return;
420 420
421 hpage_offset = pte->raddr & ~PAGE_MASK; 421 hpage_offset = pte->raddr & ~PAGE_MASK;
422 hpage_offset &= ~0xFFFULL; 422 hpage_offset &= ~0xFFFULL;
423 hpage_offset /= 4; 423 hpage_offset /= 4;
424 424
425 get_page(hpage); 425 get_page(hpage);
426 page = kmap_atomic(hpage, KM_USER0); 426 page = kmap_atomic(hpage, KM_USER0);
427 427
428 /* patch dcbz into reserved instruction, so we trap */ 428 /* patch dcbz into reserved instruction, so we trap */
429 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) 429 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
430 if ((page[i] & 0xff0007ff) == INS_DCBZ) 430 if ((page[i] & 0xff0007ff) == INS_DCBZ)
431 page[i] &= 0xfffffff7; 431 page[i] &= 0xfffffff7;
432 432
433 kunmap_atomic(page, KM_USER0); 433 kunmap_atomic(page, KM_USER0);
434 put_page(hpage); 434 put_page(hpage);
435 } 435 }
436 436
437 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, 437 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
438 struct kvmppc_pte *pte) 438 struct kvmppc_pte *pte)
439 { 439 {
440 int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR)); 440 int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR));
441 int r; 441 int r;
442 442
443 if (relocated) { 443 if (relocated) {
444 r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); 444 r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
445 } else { 445 } else {
446 pte->eaddr = eaddr; 446 pte->eaddr = eaddr;
447 pte->raddr = eaddr & 0xffffffff; 447 pte->raddr = eaddr & 0xffffffff;
448 pte->vpage = VSID_REAL | eaddr >> 12; 448 pte->vpage = VSID_REAL | eaddr >> 12;
449 pte->may_read = true; 449 pte->may_read = true;
450 pte->may_write = true; 450 pte->may_write = true;
451 pte->may_execute = true; 451 pte->may_execute = true;
452 r = 0; 452 r = 0;
453 } 453 }
454 454
455 return r; 455 return r;
456 } 456 }
457 457
458 static hva_t kvmppc_bad_hva(void) 458 static hva_t kvmppc_bad_hva(void)
459 { 459 {
460 return PAGE_OFFSET; 460 return PAGE_OFFSET;
461 } 461 }
462 462
463 static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, 463 static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
464 bool read) 464 bool read)
465 { 465 {
466 hva_t hpage; 466 hva_t hpage;
467 467
468 if (read && !pte->may_read) 468 if (read && !pte->may_read)
469 goto err; 469 goto err;
470 470
471 if (!read && !pte->may_write) 471 if (!read && !pte->may_write)
472 goto err; 472 goto err;
473 473
474 hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 474 hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
475 if (kvm_is_error_hva(hpage)) 475 if (kvm_is_error_hva(hpage))
476 goto err; 476 goto err;
477 477
478 return hpage | (pte->raddr & ~PAGE_MASK); 478 return hpage | (pte->raddr & ~PAGE_MASK);
479 err: 479 err:
480 return kvmppc_bad_hva(); 480 return kvmppc_bad_hva();
481 } 481 }
482 482
483 int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 483 int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
484 bool data) 484 bool data)
485 { 485 {
486 struct kvmppc_pte pte; 486 struct kvmppc_pte pte;
487 487
488 vcpu->stat.st++; 488 vcpu->stat.st++;
489 489
490 if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 490 if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
491 return -ENOENT; 491 return -ENOENT;
492 492
493 *eaddr = pte.raddr; 493 *eaddr = pte.raddr;
494 494
495 if (!pte.may_write) 495 if (!pte.may_write)
496 return -EPERM; 496 return -EPERM;
497 497
498 if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) 498 if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
499 return EMULATE_DO_MMIO; 499 return EMULATE_DO_MMIO;
500 500
501 return EMULATE_DONE; 501 return EMULATE_DONE;
502 } 502 }
503 503
504 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 504 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
505 bool data) 505 bool data)
506 { 506 {
507 struct kvmppc_pte pte; 507 struct kvmppc_pte pte;
508 hva_t hva = *eaddr; 508 hva_t hva = *eaddr;
509 509
510 vcpu->stat.ld++; 510 vcpu->stat.ld++;
511 511
512 if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 512 if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
513 goto nopte; 513 goto nopte;
514 514
515 *eaddr = pte.raddr; 515 *eaddr = pte.raddr;
516 516
517 hva = kvmppc_pte_to_hva(vcpu, &pte, true); 517 hva = kvmppc_pte_to_hva(vcpu, &pte, true);
518 if (kvm_is_error_hva(hva)) 518 if (kvm_is_error_hva(hva))
519 goto mmio; 519 goto mmio;
520 520
521 if (copy_from_user(ptr, (void __user *)hva, size)) { 521 if (copy_from_user(ptr, (void __user *)hva, size)) {
522 printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); 522 printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
523 goto mmio; 523 goto mmio;
524 } 524 }
525 525
526 return EMULATE_DONE; 526 return EMULATE_DONE;
527 527
528 nopte: 528 nopte:
529 return -ENOENT; 529 return -ENOENT;
530 mmio: 530 mmio:
531 return EMULATE_DO_MMIO; 531 return EMULATE_DO_MMIO;
532 } 532 }
533 533
534 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) 534 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
535 { 535 {
536 return kvm_is_visible_gfn(vcpu->kvm, gfn); 536 return kvm_is_visible_gfn(vcpu->kvm, gfn);
537 } 537 }
538 538
539 int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, 539 int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
540 ulong eaddr, int vec) 540 ulong eaddr, int vec)
541 { 541 {
542 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); 542 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
543 int r = RESUME_GUEST; 543 int r = RESUME_GUEST;
544 int relocated; 544 int relocated;
545 int page_found = 0; 545 int page_found = 0;
546 struct kvmppc_pte pte; 546 struct kvmppc_pte pte;
547 bool is_mmio = false; 547 bool is_mmio = false;
548 bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; 548 bool dr = (vcpu->arch.msr & MSR_DR) ? true : false;
549 bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; 549 bool ir = (vcpu->arch.msr & MSR_IR) ? true : false;
550 u64 vsid; 550 u64 vsid;
551 551
552 relocated = data ? dr : ir; 552 relocated = data ? dr : ir;
553 553
554 /* Resolve real address if translation turned on */ 554 /* Resolve real address if translation turned on */
555 if (relocated) { 555 if (relocated) {
556 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); 556 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
557 } else { 557 } else {
558 pte.may_execute = true; 558 pte.may_execute = true;
559 pte.may_read = true; 559 pte.may_read = true;
560 pte.may_write = true; 560 pte.may_write = true;
561 pte.raddr = eaddr & 0xffffffff; 561 pte.raddr = eaddr & 0xffffffff;
562 pte.eaddr = eaddr; 562 pte.eaddr = eaddr;
563 pte.vpage = eaddr >> 12; 563 pte.vpage = eaddr >> 12;
564 } 564 }
565 565
566 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 566 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
567 case 0: 567 case 0:
568 pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); 568 pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
569 break; 569 break;
570 case MSR_DR: 570 case MSR_DR:
571 case MSR_IR: 571 case MSR_IR:
572 vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); 572 vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
573 573
574 if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR) 574 if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR)
575 pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); 575 pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
576 else 576 else
577 pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); 577 pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
578 pte.vpage |= vsid; 578 pte.vpage |= vsid;
579 579
580 if (vsid == -1) 580 if (vsid == -1)
581 page_found = -EINVAL; 581 page_found = -EINVAL;
582 break; 582 break;
583 } 583 }
584 584
585 if (vcpu->arch.mmu.is_dcbz32(vcpu) && 585 if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
586 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 586 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
587 /* 587 /*
588 * If we do the dcbz hack, we have to NX on every execution, 588 * If we do the dcbz hack, we have to NX on every execution,
589 * so we can patch the executing code. This renders our guest 589 * so we can patch the executing code. This renders our guest
590 * NX-less. 590 * NX-less.
591 */ 591 */
592 pte.may_execute = !data; 592 pte.may_execute = !data;
593 } 593 }
594 594
595 if (page_found == -ENOENT) { 595 if (page_found == -ENOENT) {
596 /* Page not found in guest PTE entries */ 596 /* Page not found in guest PTE entries */
597 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); 597 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
598 to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; 598 to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
599 vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); 599 vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
600 kvmppc_book3s_queue_irqprio(vcpu, vec); 600 kvmppc_book3s_queue_irqprio(vcpu, vec);
601 } else if (page_found == -EPERM) { 601 } else if (page_found == -EPERM) {
602 /* Storage protection */ 602 /* Storage protection */
603 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); 603 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
604 to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; 604 to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
605 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; 605 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
606 vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); 606 vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
607 kvmppc_book3s_queue_irqprio(vcpu, vec); 607 kvmppc_book3s_queue_irqprio(vcpu, vec);
608 } else if (page_found == -EINVAL) { 608 } else if (page_found == -EINVAL) {
609 /* Page not found in guest SLB */ 609 /* Page not found in guest SLB */
610 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); 610 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
611 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 611 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
612 } else if (!is_mmio && 612 } else if (!is_mmio &&
613 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { 613 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
614 /* The guest's PTE is not mapped yet. Map on the host */ 614 /* The guest's PTE is not mapped yet. Map on the host */
615 kvmppc_mmu_map_page(vcpu, &pte); 615 kvmppc_mmu_map_page(vcpu, &pte);
616 if (data) 616 if (data)
617 vcpu->stat.sp_storage++; 617 vcpu->stat.sp_storage++;
618 else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 618 else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
619 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) 619 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
620 kvmppc_patch_dcbz(vcpu, &pte); 620 kvmppc_patch_dcbz(vcpu, &pte);
621 } else { 621 } else {
622 /* MMIO */ 622 /* MMIO */
623 vcpu->stat.mmio_exits++; 623 vcpu->stat.mmio_exits++;
624 vcpu->arch.paddr_accessed = pte.raddr; 624 vcpu->arch.paddr_accessed = pte.raddr;
625 r = kvmppc_emulate_mmio(run, vcpu); 625 r = kvmppc_emulate_mmio(run, vcpu);
626 if ( r == RESUME_HOST_NV ) 626 if ( r == RESUME_HOST_NV )
627 r = RESUME_HOST; 627 r = RESUME_HOST;
628 } 628 }
629 629
630 return r; 630 return r;
631 } 631 }
632 632
633 static inline int get_fpr_index(int i) 633 static inline int get_fpr_index(int i)
634 { 634 {
635 #ifdef CONFIG_VSX 635 #ifdef CONFIG_VSX
636 i *= 2; 636 i *= 2;
637 #endif 637 #endif
638 return i; 638 return i;
639 } 639 }
640 640
641 /* Give up external provider (FPU, Altivec, VSX) */ 641 /* Give up external provider (FPU, Altivec, VSX) */
642 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) 642 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
643 { 643 {
644 struct thread_struct *t = &current->thread; 644 struct thread_struct *t = &current->thread;
645 u64 *vcpu_fpr = vcpu->arch.fpr; 645 u64 *vcpu_fpr = vcpu->arch.fpr;
646 #ifdef CONFIG_VSX 646 #ifdef CONFIG_VSX
647 u64 *vcpu_vsx = vcpu->arch.vsr; 647 u64 *vcpu_vsx = vcpu->arch.vsr;
648 #endif 648 #endif
649 u64 *thread_fpr = (u64*)t->fpr; 649 u64 *thread_fpr = (u64*)t->fpr;
650 int i; 650 int i;
651 651
652 if (!(vcpu->arch.guest_owned_ext & msr)) 652 if (!(vcpu->arch.guest_owned_ext & msr))
653 return; 653 return;
654 654
655 #ifdef DEBUG_EXT 655 #ifdef DEBUG_EXT
656 printk(KERN_INFO "Giving up ext 0x%lx\n", msr); 656 printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
657 #endif 657 #endif
658 658
659 switch (msr) { 659 switch (msr) {
660 case MSR_FP: 660 case MSR_FP:
661 giveup_fpu(current); 661 giveup_fpu(current);
662 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 662 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
663 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; 663 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
664 664
665 vcpu->arch.fpscr = t->fpscr.val; 665 vcpu->arch.fpscr = t->fpscr.val;
666 break; 666 break;
667 case MSR_VEC: 667 case MSR_VEC:
668 #ifdef CONFIG_ALTIVEC 668 #ifdef CONFIG_ALTIVEC
669 giveup_altivec(current); 669 giveup_altivec(current);
670 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); 670 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
671 vcpu->arch.vscr = t->vscr; 671 vcpu->arch.vscr = t->vscr;
672 #endif 672 #endif
673 break; 673 break;
674 case MSR_VSX: 674 case MSR_VSX:
675 #ifdef CONFIG_VSX 675 #ifdef CONFIG_VSX
676 __giveup_vsx(current); 676 __giveup_vsx(current);
677 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) 677 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
678 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; 678 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
679 #endif 679 #endif
680 break; 680 break;
681 default: 681 default:
682 BUG(); 682 BUG();
683 } 683 }
684 684
685 vcpu->arch.guest_owned_ext &= ~msr; 685 vcpu->arch.guest_owned_ext &= ~msr;
686 current->thread.regs->msr &= ~msr; 686 current->thread.regs->msr &= ~msr;
687 kvmppc_recalc_shadow_msr(vcpu); 687 kvmppc_recalc_shadow_msr(vcpu);
688 } 688 }
689 689
690 static int kvmppc_read_inst(struct kvm_vcpu *vcpu) 690 static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
691 { 691 {
692 ulong srr0 = kvmppc_get_pc(vcpu); 692 ulong srr0 = kvmppc_get_pc(vcpu);
693 u32 last_inst = kvmppc_get_last_inst(vcpu); 693 u32 last_inst = kvmppc_get_last_inst(vcpu);
694 int ret; 694 int ret;
695 695
696 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); 696 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
697 if (ret == -ENOENT) { 697 if (ret == -ENOENT) {
698 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1); 698 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1);
699 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0); 699 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0);
700 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); 700 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
701 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); 701 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
702 return EMULATE_AGAIN; 702 return EMULATE_AGAIN;
703 } 703 }
704 704
705 return EMULATE_DONE; 705 return EMULATE_DONE;
706 } 706 }
707 707
708 static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) 708 static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
709 { 709 {
710 710
711 /* Need to do paired single emulation? */ 711 /* Need to do paired single emulation? */
712 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) 712 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
713 return EMULATE_DONE; 713 return EMULATE_DONE;
714 714
715 /* Read out the instruction */ 715 /* Read out the instruction */
716 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) 716 if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
717 /* Need to emulate */ 717 /* Need to emulate */
718 return EMULATE_FAIL; 718 return EMULATE_FAIL;
719 719
720 return EMULATE_AGAIN; 720 return EMULATE_AGAIN;
721 } 721 }
722 722
723 /* Handle external providers (FPU, Altivec, VSX) */ 723 /* Handle external providers (FPU, Altivec, VSX) */
724 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 724 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
725 ulong msr) 725 ulong msr)
726 { 726 {
727 struct thread_struct *t = &current->thread; 727 struct thread_struct *t = &current->thread;
728 u64 *vcpu_fpr = vcpu->arch.fpr; 728 u64 *vcpu_fpr = vcpu->arch.fpr;
729 #ifdef CONFIG_VSX 729 #ifdef CONFIG_VSX
730 u64 *vcpu_vsx = vcpu->arch.vsr; 730 u64 *vcpu_vsx = vcpu->arch.vsr;
731 #endif 731 #endif
732 u64 *thread_fpr = (u64*)t->fpr; 732 u64 *thread_fpr = (u64*)t->fpr;
733 int i; 733 int i;
734 734
735 /* When we have paired singles, we emulate in software */ 735 /* When we have paired singles, we emulate in software */
736 if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) 736 if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
737 return RESUME_GUEST; 737 return RESUME_GUEST;
738 738
739 if (!(vcpu->arch.msr & msr)) { 739 if (!(vcpu->arch.msr & msr)) {
740 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 740 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
741 return RESUME_GUEST; 741 return RESUME_GUEST;
742 } 742 }
743 743
744 /* We already own the ext */ 744 /* We already own the ext */
745 if (vcpu->arch.guest_owned_ext & msr) { 745 if (vcpu->arch.guest_owned_ext & msr) {
746 return RESUME_GUEST; 746 return RESUME_GUEST;
747 } 747 }
748 748
749 #ifdef DEBUG_EXT 749 #ifdef DEBUG_EXT
750 printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 750 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
751 #endif 751 #endif
752 752
753 current->thread.regs->msr |= msr; 753 current->thread.regs->msr |= msr;
754 754
755 switch (msr) { 755 switch (msr) {
756 case MSR_FP: 756 case MSR_FP:
757 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 757 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
758 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; 758 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
759 759
760 t->fpscr.val = vcpu->arch.fpscr; 760 t->fpscr.val = vcpu->arch.fpscr;
761 t->fpexc_mode = 0; 761 t->fpexc_mode = 0;
762 kvmppc_load_up_fpu(); 762 kvmppc_load_up_fpu();
763 break; 763 break;
764 case MSR_VEC: 764 case MSR_VEC:
765 #ifdef CONFIG_ALTIVEC 765 #ifdef CONFIG_ALTIVEC
766 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); 766 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
767 t->vscr = vcpu->arch.vscr; 767 t->vscr = vcpu->arch.vscr;
768 t->vrsave = -1; 768 t->vrsave = -1;
769 kvmppc_load_up_altivec(); 769 kvmppc_load_up_altivec();
770 #endif 770 #endif
771 break; 771 break;
772 case MSR_VSX: 772 case MSR_VSX:
773 #ifdef CONFIG_VSX 773 #ifdef CONFIG_VSX
774 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) 774 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
775 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; 775 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
776 kvmppc_load_up_vsx(); 776 kvmppc_load_up_vsx();
777 #endif 777 #endif
778 break; 778 break;
779 default: 779 default:
780 BUG(); 780 BUG();
781 } 781 }
782 782
783 vcpu->arch.guest_owned_ext |= msr; 783 vcpu->arch.guest_owned_ext |= msr;
784 784
785 kvmppc_recalc_shadow_msr(vcpu); 785 kvmppc_recalc_shadow_msr(vcpu);
786 786
787 return RESUME_GUEST; 787 return RESUME_GUEST;
788 } 788 }
789 789
790 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 790 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
791 unsigned int exit_nr) 791 unsigned int exit_nr)
792 { 792 {
793 int r = RESUME_HOST; 793 int r = RESUME_HOST;
794 794
795 vcpu->stat.sum_exits++; 795 vcpu->stat.sum_exits++;
796 796
797 run->exit_reason = KVM_EXIT_UNKNOWN; 797 run->exit_reason = KVM_EXIT_UNKNOWN;
798 run->ready_for_interrupt_injection = 1; 798 run->ready_for_interrupt_injection = 1;
799 #ifdef EXIT_DEBUG 799 #ifdef EXIT_DEBUG
800 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", 800 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
801 exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), 801 exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
802 kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1); 802 kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1);
803 #elif defined (EXIT_DEBUG_SIMPLE) 803 #elif defined (EXIT_DEBUG_SIMPLE)
804 if ((exit_nr != 0x900) && (exit_nr != 0x500)) 804 if ((exit_nr != 0x900) && (exit_nr != 0x500))
805 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", 805 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
806 exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), 806 exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
807 vcpu->arch.msr); 807 vcpu->arch.msr);
808 #endif 808 #endif
809 kvm_resched(vcpu); 809 kvm_resched(vcpu);
810 switch (exit_nr) { 810 switch (exit_nr) {
811 case BOOK3S_INTERRUPT_INST_STORAGE: 811 case BOOK3S_INTERRUPT_INST_STORAGE:
812 vcpu->stat.pf_instruc++; 812 vcpu->stat.pf_instruc++;
813 813
814 #ifdef CONFIG_PPC_BOOK3S_32 814 #ifdef CONFIG_PPC_BOOK3S_32
815 /* We set segments as unused segments when invalidating them. So 815 /* We set segments as unused segments when invalidating them. So
816 * treat the respective fault as segment fault. */ 816 * treat the respective fault as segment fault. */
817 if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] 817 if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
818 == SR_INVALID) { 818 == SR_INVALID) {
819 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 819 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
820 r = RESUME_GUEST; 820 r = RESUME_GUEST;
821 break; 821 break;
822 } 822 }
823 #endif 823 #endif
824 824
825 /* only care about PTEG not found errors, but leave NX alone */ 825 /* only care about PTEG not found errors, but leave NX alone */
826 if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) { 826 if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
827 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); 827 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
828 vcpu->stat.sp_instruc++; 828 vcpu->stat.sp_instruc++;
829 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 829 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
830 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 830 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
831 /* 831 /*
832 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page, 832 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
833 * so we can't use the NX bit inside the guest. Let's cross our fingers, 833 * so we can't use the NX bit inside the guest. Let's cross our fingers,
834 * that no guest that needs the dcbz hack does NX. 834 * that no guest that needs the dcbz hack does NX.
835 */ 835 */
836 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); 836 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
837 r = RESUME_GUEST; 837 r = RESUME_GUEST;
838 } else { 838 } else {
839 vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000; 839 vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
840 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 840 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
841 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); 841 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
842 r = RESUME_GUEST; 842 r = RESUME_GUEST;
843 } 843 }
844 break; 844 break;
845 case BOOK3S_INTERRUPT_DATA_STORAGE: 845 case BOOK3S_INTERRUPT_DATA_STORAGE:
846 { 846 {
847 ulong dar = kvmppc_get_fault_dar(vcpu); 847 ulong dar = kvmppc_get_fault_dar(vcpu);
848 vcpu->stat.pf_storage++; 848 vcpu->stat.pf_storage++;
849 849
850 #ifdef CONFIG_PPC_BOOK3S_32 850 #ifdef CONFIG_PPC_BOOK3S_32
851 /* We set segments as unused segments when invalidating them. So 851 /* We set segments as unused segments when invalidating them. So
852 * treat the respective fault as segment fault. */ 852 * treat the respective fault as segment fault. */
853 if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) { 853 if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
854 kvmppc_mmu_map_segment(vcpu, dar); 854 kvmppc_mmu_map_segment(vcpu, dar);
855 r = RESUME_GUEST; 855 r = RESUME_GUEST;
856 break; 856 break;
857 } 857 }
858 #endif 858 #endif
859 859
860 /* The only case we need to handle is missing shadow PTEs */ 860 /* The only case we need to handle is missing shadow PTEs */
861 if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { 861 if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
862 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 862 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
863 } else { 863 } else {
864 vcpu->arch.dear = dar; 864 vcpu->arch.dear = dar;
865 to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; 865 to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
866 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 866 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
867 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL); 867 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL);
868 r = RESUME_GUEST; 868 r = RESUME_GUEST;
869 } 869 }
870 break; 870 break;
871 } 871 }
872 case BOOK3S_INTERRUPT_DATA_SEGMENT: 872 case BOOK3S_INTERRUPT_DATA_SEGMENT:
873 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { 873 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
874 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); 874 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
875 kvmppc_book3s_queue_irqprio(vcpu, 875 kvmppc_book3s_queue_irqprio(vcpu,
876 BOOK3S_INTERRUPT_DATA_SEGMENT); 876 BOOK3S_INTERRUPT_DATA_SEGMENT);
877 } 877 }
878 r = RESUME_GUEST; 878 r = RESUME_GUEST;
879 break; 879 break;
880 case BOOK3S_INTERRUPT_INST_SEGMENT: 880 case BOOK3S_INTERRUPT_INST_SEGMENT:
881 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { 881 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
882 kvmppc_book3s_queue_irqprio(vcpu, 882 kvmppc_book3s_queue_irqprio(vcpu,
883 BOOK3S_INTERRUPT_INST_SEGMENT); 883 BOOK3S_INTERRUPT_INST_SEGMENT);
884 } 884 }
885 r = RESUME_GUEST; 885 r = RESUME_GUEST;
886 break; 886 break;
887 /* We're good on these - the host merely wanted to get our attention */ 887 /* We're good on these - the host merely wanted to get our attention */
888 case BOOK3S_INTERRUPT_DECREMENTER: 888 case BOOK3S_INTERRUPT_DECREMENTER:
889 vcpu->stat.dec_exits++; 889 vcpu->stat.dec_exits++;
890 r = RESUME_GUEST; 890 r = RESUME_GUEST;
891 break; 891 break;
892 case BOOK3S_INTERRUPT_EXTERNAL: 892 case BOOK3S_INTERRUPT_EXTERNAL:
893 vcpu->stat.ext_intr_exits++; 893 vcpu->stat.ext_intr_exits++;
894 r = RESUME_GUEST; 894 r = RESUME_GUEST;
895 break; 895 break;
896 case BOOK3S_INTERRUPT_PERFMON: 896 case BOOK3S_INTERRUPT_PERFMON:
897 r = RESUME_GUEST; 897 r = RESUME_GUEST;
898 break; 898 break;
899 case BOOK3S_INTERRUPT_PROGRAM: 899 case BOOK3S_INTERRUPT_PROGRAM:
900 { 900 {
901 enum emulation_result er; 901 enum emulation_result er;
902 ulong flags; 902 ulong flags;
903 903
904 program_interrupt: 904 program_interrupt:
905 flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; 905 flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
906 906
907 if (vcpu->arch.msr & MSR_PR) { 907 if (vcpu->arch.msr & MSR_PR) {
908 #ifdef EXIT_DEBUG 908 #ifdef EXIT_DEBUG
909 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); 909 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
910 #endif 910 #endif
911 if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != 911 if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
912 (INS_DCBZ & 0xfffffff7)) { 912 (INS_DCBZ & 0xfffffff7)) {
913 kvmppc_core_queue_program(vcpu, flags); 913 kvmppc_core_queue_program(vcpu, flags);
914 r = RESUME_GUEST; 914 r = RESUME_GUEST;
915 break; 915 break;
916 } 916 }
917 } 917 }
918 918
919 vcpu->stat.emulated_inst_exits++; 919 vcpu->stat.emulated_inst_exits++;
920 er = kvmppc_emulate_instruction(run, vcpu); 920 er = kvmppc_emulate_instruction(run, vcpu);
921 switch (er) { 921 switch (er) {
922 case EMULATE_DONE: 922 case EMULATE_DONE:
923 r = RESUME_GUEST_NV; 923 r = RESUME_GUEST_NV;
924 break; 924 break;
925 case EMULATE_AGAIN: 925 case EMULATE_AGAIN:
926 r = RESUME_GUEST; 926 r = RESUME_GUEST;
927 break; 927 break;
928 case EMULATE_FAIL: 928 case EMULATE_FAIL:
929 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 929 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
930 __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); 930 __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
931 kvmppc_core_queue_program(vcpu, flags); 931 kvmppc_core_queue_program(vcpu, flags);
932 r = RESUME_GUEST; 932 r = RESUME_GUEST;
933 break; 933 break;
934 case EMULATE_DO_MMIO: 934 case EMULATE_DO_MMIO:
935 run->exit_reason = KVM_EXIT_MMIO; 935 run->exit_reason = KVM_EXIT_MMIO;
936 r = RESUME_HOST_NV; 936 r = RESUME_HOST_NV;
937 break; 937 break;
938 default: 938 default:
939 BUG(); 939 BUG();
940 } 940 }
941 break; 941 break;
942 } 942 }
943 case BOOK3S_INTERRUPT_SYSCALL: 943 case BOOK3S_INTERRUPT_SYSCALL:
944 // XXX make user settable 944 // XXX make user settable
945 if (vcpu->arch.osi_enabled && 945 if (vcpu->arch.osi_enabled &&
946 (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && 946 (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
947 (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { 947 (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
948 u64 *gprs = run->osi.gprs; 948 u64 *gprs = run->osi.gprs;
949 int i; 949 int i;
950 950
951 run->exit_reason = KVM_EXIT_OSI; 951 run->exit_reason = KVM_EXIT_OSI;
952 for (i = 0; i < 32; i++) 952 for (i = 0; i < 32; i++)
953 gprs[i] = kvmppc_get_gpr(vcpu, i); 953 gprs[i] = kvmppc_get_gpr(vcpu, i);
954 vcpu->arch.osi_needed = 1; 954 vcpu->arch.osi_needed = 1;
955 r = RESUME_HOST_NV; 955 r = RESUME_HOST_NV;
956 956
957 } else { 957 } else {
958 vcpu->stat.syscall_exits++; 958 vcpu->stat.syscall_exits++;
959 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 959 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
960 r = RESUME_GUEST; 960 r = RESUME_GUEST;
961 } 961 }
962 break; 962 break;
963 case BOOK3S_INTERRUPT_FP_UNAVAIL: 963 case BOOK3S_INTERRUPT_FP_UNAVAIL:
964 case BOOK3S_INTERRUPT_ALTIVEC: 964 case BOOK3S_INTERRUPT_ALTIVEC:
965 case BOOK3S_INTERRUPT_VSX: 965 case BOOK3S_INTERRUPT_VSX:
966 { 966 {
967 int ext_msr = 0; 967 int ext_msr = 0;
968 968
969 switch (exit_nr) { 969 switch (exit_nr) {
970 case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; 970 case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break;
971 case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; 971 case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break;
972 case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; 972 case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break;
973 } 973 }
974 974
975 switch (kvmppc_check_ext(vcpu, exit_nr)) { 975 switch (kvmppc_check_ext(vcpu, exit_nr)) {
976 case EMULATE_DONE: 976 case EMULATE_DONE:
977 /* everything ok - let's enable the ext */ 977 /* everything ok - let's enable the ext */
978 r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); 978 r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
979 break; 979 break;
980 case EMULATE_FAIL: 980 case EMULATE_FAIL:
981 /* we need to emulate this instruction */ 981 /* we need to emulate this instruction */
982 goto program_interrupt; 982 goto program_interrupt;
983 break; 983 break;
984 default: 984 default:
985 /* nothing to worry about - go again */ 985 /* nothing to worry about - go again */
986 break; 986 break;
987 } 987 }
988 break; 988 break;
989 } 989 }
990 case BOOK3S_INTERRUPT_ALIGNMENT: 990 case BOOK3S_INTERRUPT_ALIGNMENT:
991 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { 991 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
992 to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu, 992 to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu,
993 kvmppc_get_last_inst(vcpu)); 993 kvmppc_get_last_inst(vcpu));
994 vcpu->arch.dear = kvmppc_alignment_dar(vcpu, 994 vcpu->arch.dear = kvmppc_alignment_dar(vcpu,
995 kvmppc_get_last_inst(vcpu)); 995 kvmppc_get_last_inst(vcpu));
996 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 996 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
997 } 997 }
998 r = RESUME_GUEST; 998 r = RESUME_GUEST;
999 break; 999 break;
1000 case BOOK3S_INTERRUPT_MACHINE_CHECK: 1000 case BOOK3S_INTERRUPT_MACHINE_CHECK:
1001 case BOOK3S_INTERRUPT_TRACE: 1001 case BOOK3S_INTERRUPT_TRACE:
1002 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 1002 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1003 r = RESUME_GUEST; 1003 r = RESUME_GUEST;
1004 break; 1004 break;
1005 default: 1005 default:
1006 /* Ugh - bork here! What did we get? */ 1006 /* Ugh - bork here! What did we get? */
1007 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 1007 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
1008 exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1); 1008 exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
1009 r = RESUME_HOST; 1009 r = RESUME_HOST;
1010 BUG(); 1010 BUG();
1011 break; 1011 break;
1012 } 1012 }
1013 1013
1014 1014
1015 if (!(r & RESUME_HOST)) { 1015 if (!(r & RESUME_HOST)) {
1016 /* To avoid clobbering exit_reason, only check for signals if 1016 /* To avoid clobbering exit_reason, only check for signals if
1017 * we aren't already exiting to userspace for some other 1017 * we aren't already exiting to userspace for some other
1018 * reason. */ 1018 * reason. */
1019 if (signal_pending(current)) { 1019 if (signal_pending(current)) {
1020 #ifdef EXIT_DEBUG 1020 #ifdef EXIT_DEBUG
1021 printk(KERN_EMERG "KVM: Going back to host\n"); 1021 printk(KERN_EMERG "KVM: Going back to host\n");
1022 #endif 1022 #endif
1023 vcpu->stat.signal_exits++; 1023 vcpu->stat.signal_exits++;
1024 run->exit_reason = KVM_EXIT_INTR; 1024 run->exit_reason = KVM_EXIT_INTR;
1025 r = -EINTR; 1025 r = -EINTR;
1026 } else { 1026 } else {
1027 /* In case an interrupt came in that was triggered 1027 /* In case an interrupt came in that was triggered
1028 * from userspace (like DEC), we need to check what 1028 * from userspace (like DEC), we need to check what
1029 * to inject now! */ 1029 * to inject now! */
1030 kvmppc_core_deliver_interrupts(vcpu); 1030 kvmppc_core_deliver_interrupts(vcpu);
1031 } 1031 }
1032 } 1032 }
1033 1033
1034 #ifdef EXIT_DEBUG 1034 #ifdef EXIT_DEBUG
1035 printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r); 1035 printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r);
1036 #endif 1036 #endif
1037 1037
1038 return r; 1038 return r;
1039 } 1039 }
1040 1040
1041 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 1041 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1042 { 1042 {
1043 return 0; 1043 return 0;
1044 } 1044 }
1045 1045
1046 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1046 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1047 { 1047 {
1048 int i; 1048 int i;
1049 1049
1050 vcpu_load(vcpu);
1051
1052 regs->pc = kvmppc_get_pc(vcpu); 1050 regs->pc = kvmppc_get_pc(vcpu);
1053 regs->cr = kvmppc_get_cr(vcpu); 1051 regs->cr = kvmppc_get_cr(vcpu);
1054 regs->ctr = kvmppc_get_ctr(vcpu); 1052 regs->ctr = kvmppc_get_ctr(vcpu);
1055 regs->lr = kvmppc_get_lr(vcpu); 1053 regs->lr = kvmppc_get_lr(vcpu);
1056 regs->xer = kvmppc_get_xer(vcpu); 1054 regs->xer = kvmppc_get_xer(vcpu);
1057 regs->msr = vcpu->arch.msr; 1055 regs->msr = vcpu->arch.msr;
1058 regs->srr0 = vcpu->arch.srr0; 1056 regs->srr0 = vcpu->arch.srr0;
1059 regs->srr1 = vcpu->arch.srr1; 1057 regs->srr1 = vcpu->arch.srr1;
1060 regs->pid = vcpu->arch.pid; 1058 regs->pid = vcpu->arch.pid;
1061 regs->sprg0 = vcpu->arch.sprg0; 1059 regs->sprg0 = vcpu->arch.sprg0;
1062 regs->sprg1 = vcpu->arch.sprg1; 1060 regs->sprg1 = vcpu->arch.sprg1;
1063 regs->sprg2 = vcpu->arch.sprg2; 1061 regs->sprg2 = vcpu->arch.sprg2;
1064 regs->sprg3 = vcpu->arch.sprg3; 1062 regs->sprg3 = vcpu->arch.sprg3;
1065 regs->sprg5 = vcpu->arch.sprg4; 1063 regs->sprg5 = vcpu->arch.sprg4;
1066 regs->sprg6 = vcpu->arch.sprg5; 1064 regs->sprg6 = vcpu->arch.sprg5;
1067 regs->sprg7 = vcpu->arch.sprg6; 1065 regs->sprg7 = vcpu->arch.sprg6;
1068 1066
1069 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1067 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
1070 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 1068 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
1071 1069
1072 vcpu_put(vcpu);
1073
1074 return 0; 1070 return 0;
1075 } 1071 }
1076 1072
1077 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1073 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1078 { 1074 {
1079 int i; 1075 int i;
1080 1076
1081 vcpu_load(vcpu);
1082
1083 kvmppc_set_pc(vcpu, regs->pc); 1077 kvmppc_set_pc(vcpu, regs->pc);
1084 kvmppc_set_cr(vcpu, regs->cr); 1078 kvmppc_set_cr(vcpu, regs->cr);
1085 kvmppc_set_ctr(vcpu, regs->ctr); 1079 kvmppc_set_ctr(vcpu, regs->ctr);
1086 kvmppc_set_lr(vcpu, regs->lr); 1080 kvmppc_set_lr(vcpu, regs->lr);
1087 kvmppc_set_xer(vcpu, regs->xer); 1081 kvmppc_set_xer(vcpu, regs->xer);
1088 kvmppc_set_msr(vcpu, regs->msr); 1082 kvmppc_set_msr(vcpu, regs->msr);
1089 vcpu->arch.srr0 = regs->srr0; 1083 vcpu->arch.srr0 = regs->srr0;
1090 vcpu->arch.srr1 = regs->srr1; 1084 vcpu->arch.srr1 = regs->srr1;
1091 vcpu->arch.sprg0 = regs->sprg0; 1085 vcpu->arch.sprg0 = regs->sprg0;
1092 vcpu->arch.sprg1 = regs->sprg1; 1086 vcpu->arch.sprg1 = regs->sprg1;
1093 vcpu->arch.sprg2 = regs->sprg2; 1087 vcpu->arch.sprg2 = regs->sprg2;
1094 vcpu->arch.sprg3 = regs->sprg3; 1088 vcpu->arch.sprg3 = regs->sprg3;
1095 vcpu->arch.sprg5 = regs->sprg4; 1089 vcpu->arch.sprg5 = regs->sprg4;
1096 vcpu->arch.sprg6 = regs->sprg5; 1090 vcpu->arch.sprg6 = regs->sprg5;
1097 vcpu->arch.sprg7 = regs->sprg6; 1091 vcpu->arch.sprg7 = regs->sprg6;
1098 1092
1099 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1093 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
1100 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 1094 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
1101 1095
1102 vcpu_put(vcpu);
1103
1104 return 0; 1096 return 0;
1105 } 1097 }
1106 1098
1107 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 1099 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1108 struct kvm_sregs *sregs) 1100 struct kvm_sregs *sregs)
1109 { 1101 {
1110 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1102 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
1111 int i; 1103 int i;
1112 1104
1113 vcpu_load(vcpu);
1114
1115 sregs->pvr = vcpu->arch.pvr; 1105 sregs->pvr = vcpu->arch.pvr;
1116 1106
1117 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; 1107 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
1118 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 1108 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
1119 for (i = 0; i < 64; i++) { 1109 for (i = 0; i < 64; i++) {
1120 sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i; 1110 sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i;
1121 sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv; 1111 sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
1122 } 1112 }
1123 } else { 1113 } else {
1124 for (i = 0; i < 16; i++) { 1114 for (i = 0; i < 16; i++) {
1125 sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw; 1115 sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
1126 sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw; 1116 sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
1127 } 1117 }
1128 for (i = 0; i < 8; i++) { 1118 for (i = 0; i < 8; i++) {
1129 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; 1119 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
1130 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; 1120 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
1131 } 1121 }
1132 } 1122 }
1133 1123
1134 vcpu_put(vcpu);
1135
1136 return 0; 1124 return 0;
1137 } 1125 }
1138 1126
1139 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 1127 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1140 struct kvm_sregs *sregs) 1128 struct kvm_sregs *sregs)
1141 { 1129 {
1142 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1130 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
1143 int i; 1131 int i;
1144 1132
1145 vcpu_load(vcpu);
1146
1147 kvmppc_set_pvr(vcpu, sregs->pvr); 1133 kvmppc_set_pvr(vcpu, sregs->pvr);
1148 1134
1149 vcpu3s->sdr1 = sregs->u.s.sdr1; 1135 vcpu3s->sdr1 = sregs->u.s.sdr1;
1150 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 1136 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
1151 for (i = 0; i < 64; i++) { 1137 for (i = 0; i < 64; i++) {
1152 vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv, 1138 vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
1153 sregs->u.s.ppc64.slb[i].slbe); 1139 sregs->u.s.ppc64.slb[i].slbe);
1154 } 1140 }
1155 } else { 1141 } else {
1156 for (i = 0; i < 16; i++) { 1142 for (i = 0; i < 16; i++) {
1157 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]); 1143 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
1158 } 1144 }
1159 for (i = 0; i < 8; i++) { 1145 for (i = 0; i < 8; i++) {
1160 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false, 1146 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
1161 (u32)sregs->u.s.ppc32.ibat[i]); 1147 (u32)sregs->u.s.ppc32.ibat[i]);
1162 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true, 1148 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
1163 (u32)(sregs->u.s.ppc32.ibat[i] >> 32)); 1149 (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
1164 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false, 1150 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
1165 (u32)sregs->u.s.ppc32.dbat[i]); 1151 (u32)sregs->u.s.ppc32.dbat[i]);
1166 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true, 1152 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
1167 (u32)(sregs->u.s.ppc32.dbat[i] >> 32)); 1153 (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
1168 } 1154 }
1169 } 1155 }
1170 1156
1171 /* Flush the MMU after messing with the segments */ 1157 /* Flush the MMU after messing with the segments */
1172 kvmppc_mmu_pte_flush(vcpu, 0, 0); 1158 kvmppc_mmu_pte_flush(vcpu, 0, 0);
1173
1174 vcpu_put(vcpu);
1175 1159
1176 return 0; 1160 return 0;
1177 } 1161 }
1178 1162
1179 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1163 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1180 { 1164 {
1181 return -ENOTSUPP; 1165 return -ENOTSUPP;
1182 } 1166 }
1183 1167
1184 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1168 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1185 { 1169 {
1186 return -ENOTSUPP; 1170 return -ENOTSUPP;
1187 } 1171 }
1188 1172
1189 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 1173 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1190 struct kvm_translation *tr) 1174 struct kvm_translation *tr)
1191 { 1175 {
1192 return 0; 1176 return 0;
1193 } 1177 }
1194 1178
1195 /* 1179 /*
1196 * Get (and clear) the dirty memory log for a memory slot. 1180 * Get (and clear) the dirty memory log for a memory slot.
1197 */ 1181 */
1198 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 1182 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1199 struct kvm_dirty_log *log) 1183 struct kvm_dirty_log *log)
1200 { 1184 {
1201 struct kvm_memory_slot *memslot; 1185 struct kvm_memory_slot *memslot;
1202 struct kvm_vcpu *vcpu; 1186 struct kvm_vcpu *vcpu;
1203 ulong ga, ga_end; 1187 ulong ga, ga_end;
1204 int is_dirty = 0; 1188 int is_dirty = 0;
1205 int r; 1189 int r;
1206 unsigned long n; 1190 unsigned long n;
1207 1191
1208 mutex_lock(&kvm->slots_lock); 1192 mutex_lock(&kvm->slots_lock);
1209 1193
1210 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1194 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1211 if (r) 1195 if (r)
1212 goto out; 1196 goto out;
1213 1197
1214 /* If nothing is dirty, don't bother messing with page tables. */ 1198 /* If nothing is dirty, don't bother messing with page tables. */
1215 if (is_dirty) { 1199 if (is_dirty) {
1216 memslot = &kvm->memslots->memslots[log->slot]; 1200 memslot = &kvm->memslots->memslots[log->slot];
1217 1201
1218 ga = memslot->base_gfn << PAGE_SHIFT; 1202 ga = memslot->base_gfn << PAGE_SHIFT;
1219 ga_end = ga + (memslot->npages << PAGE_SHIFT); 1203 ga_end = ga + (memslot->npages << PAGE_SHIFT);
1220 1204
1221 kvm_for_each_vcpu(n, vcpu, kvm) 1205 kvm_for_each_vcpu(n, vcpu, kvm)
1222 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); 1206 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
1223 1207
1224 n = kvm_dirty_bitmap_bytes(memslot); 1208 n = kvm_dirty_bitmap_bytes(memslot);
1225 memset(memslot->dirty_bitmap, 0, n); 1209 memset(memslot->dirty_bitmap, 0, n);
1226 } 1210 }
1227 1211
1228 r = 0; 1212 r = 0;
1229 out: 1213 out:
1230 mutex_unlock(&kvm->slots_lock); 1214 mutex_unlock(&kvm->slots_lock);
1231 return r; 1215 return r;
1232 } 1216 }
1233 1217
1234 int kvmppc_core_check_processor_compat(void) 1218 int kvmppc_core_check_processor_compat(void)
1235 { 1219 {
1236 return 0; 1220 return 0;
1237 } 1221 }
1238 1222
1239 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 1223 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1240 { 1224 {
1241 struct kvmppc_vcpu_book3s *vcpu_book3s; 1225 struct kvmppc_vcpu_book3s *vcpu_book3s;
1242 struct kvm_vcpu *vcpu; 1226 struct kvm_vcpu *vcpu;
1243 int err = -ENOMEM; 1227 int err = -ENOMEM;
1244 1228
1245 vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s)); 1229 vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
1246 if (!vcpu_book3s) 1230 if (!vcpu_book3s)
1247 goto out; 1231 goto out;
1248 1232
1249 memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s)); 1233 memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s));
1250 1234
1251 vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *) 1235 vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
1252 kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); 1236 kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
1253 if (!vcpu_book3s->shadow_vcpu) 1237 if (!vcpu_book3s->shadow_vcpu)
1254 goto free_vcpu; 1238 goto free_vcpu;
1255 1239
1256 vcpu = &vcpu_book3s->vcpu; 1240 vcpu = &vcpu_book3s->vcpu;
1257 err = kvm_vcpu_init(vcpu, kvm, id); 1241 err = kvm_vcpu_init(vcpu, kvm, id);
1258 if (err) 1242 if (err)
1259 goto free_shadow_vcpu; 1243 goto free_shadow_vcpu;
1260 1244
1261 vcpu->arch.host_retip = kvm_return_point; 1245 vcpu->arch.host_retip = kvm_return_point;
1262 vcpu->arch.host_msr = mfmsr(); 1246 vcpu->arch.host_msr = mfmsr();
1263 #ifdef CONFIG_PPC_BOOK3S_64 1247 #ifdef CONFIG_PPC_BOOK3S_64
1264 /* default to book3s_64 (970fx) */ 1248 /* default to book3s_64 (970fx) */
1265 vcpu->arch.pvr = 0x3C0301; 1249 vcpu->arch.pvr = 0x3C0301;
1266 #else 1250 #else
1267 /* default to book3s_32 (750) */ 1251 /* default to book3s_32 (750) */
1268 vcpu->arch.pvr = 0x84202; 1252 vcpu->arch.pvr = 0x84202;
1269 #endif 1253 #endif
1270 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 1254 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
1271 vcpu_book3s->slb_nr = 64; 1255 vcpu_book3s->slb_nr = 64;
1272 1256
1273 /* remember where some real-mode handlers are */ 1257 /* remember where some real-mode handlers are */
1274 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; 1258 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
1275 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; 1259 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
1276 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; 1260 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
1277 #ifdef CONFIG_PPC_BOOK3S_64 1261 #ifdef CONFIG_PPC_BOOK3S_64
1278 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; 1262 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
1279 #else 1263 #else
1280 vcpu->arch.rmcall = (ulong)kvmppc_rmcall; 1264 vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
1281 #endif 1265 #endif
1282 1266
1283 vcpu->arch.shadow_msr = MSR_USER64; 1267 vcpu->arch.shadow_msr = MSR_USER64;
1284 1268
1285 err = kvmppc_mmu_init(vcpu); 1269 err = kvmppc_mmu_init(vcpu);
1286 if (err < 0) 1270 if (err < 0)
1287 goto free_shadow_vcpu; 1271 goto free_shadow_vcpu;
1288 1272
1289 return vcpu; 1273 return vcpu;
1290 1274
1291 free_shadow_vcpu: 1275 free_shadow_vcpu:
1292 kfree(vcpu_book3s->shadow_vcpu); 1276 kfree(vcpu_book3s->shadow_vcpu);
1293 free_vcpu: 1277 free_vcpu:
1294 vfree(vcpu_book3s); 1278 vfree(vcpu_book3s);
1295 out: 1279 out:
1296 return ERR_PTR(err); 1280 return ERR_PTR(err);
1297 } 1281 }
1298 1282
1299 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 1283 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
1300 { 1284 {
1301 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 1285 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
1302 1286
1303 kvm_vcpu_uninit(vcpu); 1287 kvm_vcpu_uninit(vcpu);
1304 kfree(vcpu_book3s->shadow_vcpu); 1288 kfree(vcpu_book3s->shadow_vcpu);
1305 vfree(vcpu_book3s); 1289 vfree(vcpu_book3s);
1306 } 1290 }
1307 1291
1308 extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 1292 extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
1309 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1293 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1310 { 1294 {
1311 int ret; 1295 int ret;
1312 struct thread_struct ext_bkp; 1296 struct thread_struct ext_bkp;
1313 #ifdef CONFIG_ALTIVEC 1297 #ifdef CONFIG_ALTIVEC
1314 bool save_vec = current->thread.used_vr; 1298 bool save_vec = current->thread.used_vr;
1315 #endif 1299 #endif
1316 #ifdef CONFIG_VSX 1300 #ifdef CONFIG_VSX
1317 bool save_vsx = current->thread.used_vsr; 1301 bool save_vsx = current->thread.used_vsr;
1318 #endif 1302 #endif
1319 ulong ext_msr; 1303 ulong ext_msr;
1320 1304
1321 /* No need to go into the guest when all we do is going out */ 1305 /* No need to go into the guest when all we do is going out */
1322 if (signal_pending(current)) { 1306 if (signal_pending(current)) {
1323 kvm_run->exit_reason = KVM_EXIT_INTR; 1307 kvm_run->exit_reason = KVM_EXIT_INTR;
1324 return -EINTR; 1308 return -EINTR;
1325 } 1309 }
1326 1310
1327 /* Save FPU state in stack */ 1311 /* Save FPU state in stack */
1328 if (current->thread.regs->msr & MSR_FP) 1312 if (current->thread.regs->msr & MSR_FP)
1329 giveup_fpu(current); 1313 giveup_fpu(current);
1330 memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr)); 1314 memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
1331 ext_bkp.fpscr = current->thread.fpscr; 1315 ext_bkp.fpscr = current->thread.fpscr;
1332 ext_bkp.fpexc_mode = current->thread.fpexc_mode; 1316 ext_bkp.fpexc_mode = current->thread.fpexc_mode;
1333 1317
1334 #ifdef CONFIG_ALTIVEC 1318 #ifdef CONFIG_ALTIVEC
1335 /* Save Altivec state in stack */ 1319 /* Save Altivec state in stack */
1336 if (save_vec) { 1320 if (save_vec) {
1337 if (current->thread.regs->msr & MSR_VEC) 1321 if (current->thread.regs->msr & MSR_VEC)
1338 giveup_altivec(current); 1322 giveup_altivec(current);
1339 memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr)); 1323 memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
1340 ext_bkp.vscr = current->thread.vscr; 1324 ext_bkp.vscr = current->thread.vscr;
1341 ext_bkp.vrsave = current->thread.vrsave; 1325 ext_bkp.vrsave = current->thread.vrsave;
1342 } 1326 }
1343 ext_bkp.used_vr = current->thread.used_vr; 1327 ext_bkp.used_vr = current->thread.used_vr;
1344 #endif 1328 #endif
1345 1329
1346 #ifdef CONFIG_VSX 1330 #ifdef CONFIG_VSX
1347 /* Save VSX state in stack */ 1331 /* Save VSX state in stack */
1348 if (save_vsx && (current->thread.regs->msr & MSR_VSX)) 1332 if (save_vsx && (current->thread.regs->msr & MSR_VSX))
1349 __giveup_vsx(current); 1333 __giveup_vsx(current);
1350 ext_bkp.used_vsr = current->thread.used_vsr; 1334 ext_bkp.used_vsr = current->thread.used_vsr;
1351 #endif 1335 #endif
1352 1336
1353 /* Remember the MSR with disabled extensions */ 1337 /* Remember the MSR with disabled extensions */
1354 ext_msr = current->thread.regs->msr; 1338 ext_msr = current->thread.regs->msr;
1355 1339
1356 /* XXX we get called with irq disabled - change that! */ 1340 /* XXX we get called with irq disabled - change that! */
1357 local_irq_enable(); 1341 local_irq_enable();
1358 1342
1359 /* Preload FPU if it's enabled */ 1343 /* Preload FPU if it's enabled */
1360 if (vcpu->arch.msr & MSR_FP) 1344 if (vcpu->arch.msr & MSR_FP)
1361 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 1345 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
1362 1346
1363 ret = __kvmppc_vcpu_entry(kvm_run, vcpu); 1347 ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
1364 1348
1365 local_irq_disable(); 1349 local_irq_disable();
1366 1350
1367 current->thread.regs->msr = ext_msr; 1351 current->thread.regs->msr = ext_msr;
1368 1352
1369 /* Make sure we save the guest FPU/Altivec/VSX state */ 1353 /* Make sure we save the guest FPU/Altivec/VSX state */
1370 kvmppc_giveup_ext(vcpu, MSR_FP); 1354 kvmppc_giveup_ext(vcpu, MSR_FP);
1371 kvmppc_giveup_ext(vcpu, MSR_VEC); 1355 kvmppc_giveup_ext(vcpu, MSR_VEC);
1372 kvmppc_giveup_ext(vcpu, MSR_VSX); 1356 kvmppc_giveup_ext(vcpu, MSR_VSX);
1373 1357
1374 /* Restore FPU state from stack */ 1358 /* Restore FPU state from stack */
1375 memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr)); 1359 memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
1376 current->thread.fpscr = ext_bkp.fpscr; 1360 current->thread.fpscr = ext_bkp.fpscr;
1377 current->thread.fpexc_mode = ext_bkp.fpexc_mode; 1361 current->thread.fpexc_mode = ext_bkp.fpexc_mode;
1378 1362
1379 #ifdef CONFIG_ALTIVEC 1363 #ifdef CONFIG_ALTIVEC
1380 /* Restore Altivec state from stack */ 1364 /* Restore Altivec state from stack */
1381 if (save_vec && current->thread.used_vr) { 1365 if (save_vec && current->thread.used_vr) {
1382 memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr)); 1366 memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
1383 current->thread.vscr = ext_bkp.vscr; 1367 current->thread.vscr = ext_bkp.vscr;
1384 current->thread.vrsave= ext_bkp.vrsave; 1368 current->thread.vrsave= ext_bkp.vrsave;
1385 } 1369 }
1386 current->thread.used_vr = ext_bkp.used_vr; 1370 current->thread.used_vr = ext_bkp.used_vr;
1387 #endif 1371 #endif
1388 1372
1389 #ifdef CONFIG_VSX 1373 #ifdef CONFIG_VSX
1390 current->thread.used_vsr = ext_bkp.used_vsr; 1374 current->thread.used_vsr = ext_bkp.used_vsr;
1391 #endif 1375 #endif
1392 1376
1393 return ret; 1377 return ret;
1394 } 1378 }
1395 1379
1396 static int kvmppc_book3s_init(void) 1380 static int kvmppc_book3s_init(void)
1397 { 1381 {
1398 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, 1382 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
1399 THIS_MODULE); 1383 THIS_MODULE);
1400 } 1384 }
1401 1385
1402 static void kvmppc_book3s_exit(void) 1386 static void kvmppc_book3s_exit(void)
1403 { 1387 {
1404 kvm_exit(); 1388 kvm_exit();
1405 } 1389 }
1406 1390
1407 module_init(kvmppc_book3s_init); 1391 module_init(kvmppc_book3s_init);
1408 module_exit(kvmppc_book3s_exit); 1392 module_exit(kvmppc_book3s_exit);
1409 1393
arch/powerpc/kvm/booke.c
1 /* 1 /*
2 * This program is free software; you can redistribute it and/or modify 2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as 3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation. 4 * published by the Free Software Foundation.
5 * 5 *
6 * This program is distributed in the hope that it will be useful, 6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details. 9 * GNU General Public License for more details.
10 * 10 *
11 * You should have received a copy of the GNU General Public License 11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software 12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 * 14 *
15 * Copyright IBM Corp. 2007 15 * Copyright IBM Corp. 2007
16 * 16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19 */ 19 */
20 20
21 #include <linux/errno.h> 21 #include <linux/errno.h>
22 #include <linux/err.h> 22 #include <linux/err.h>
23 #include <linux/kvm_host.h> 23 #include <linux/kvm_host.h>
24 #include <linux/gfp.h> 24 #include <linux/gfp.h>
25 #include <linux/module.h> 25 #include <linux/module.h>
26 #include <linux/vmalloc.h> 26 #include <linux/vmalloc.h>
27 #include <linux/fs.h> 27 #include <linux/fs.h>
28 28
29 #include <asm/cputable.h> 29 #include <asm/cputable.h>
30 #include <asm/uaccess.h> 30 #include <asm/uaccess.h>
31 #include <asm/kvm_ppc.h> 31 #include <asm/kvm_ppc.h>
32 #include "timing.h" 32 #include "timing.h"
33 #include <asm/cacheflush.h> 33 #include <asm/cacheflush.h>
34 34
35 #include "booke.h" 35 #include "booke.h"
36 36
37 unsigned long kvmppc_booke_handlers; 37 unsigned long kvmppc_booke_handlers;
38 38
39 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 39 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
41 41
42 struct kvm_stats_debugfs_item debugfs_entries[] = { 42 struct kvm_stats_debugfs_item debugfs_entries[] = {
43 { "mmio", VCPU_STAT(mmio_exits) }, 43 { "mmio", VCPU_STAT(mmio_exits) },
44 { "dcr", VCPU_STAT(dcr_exits) }, 44 { "dcr", VCPU_STAT(dcr_exits) },
45 { "sig", VCPU_STAT(signal_exits) }, 45 { "sig", VCPU_STAT(signal_exits) },
46 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, 46 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
47 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, 47 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
48 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, 48 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
49 { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) }, 49 { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) },
50 { "sysc", VCPU_STAT(syscall_exits) }, 50 { "sysc", VCPU_STAT(syscall_exits) },
51 { "isi", VCPU_STAT(isi_exits) }, 51 { "isi", VCPU_STAT(isi_exits) },
52 { "dsi", VCPU_STAT(dsi_exits) }, 52 { "dsi", VCPU_STAT(dsi_exits) },
53 { "inst_emu", VCPU_STAT(emulated_inst_exits) }, 53 { "inst_emu", VCPU_STAT(emulated_inst_exits) },
54 { "dec", VCPU_STAT(dec_exits) }, 54 { "dec", VCPU_STAT(dec_exits) },
55 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 55 { "ext_intr", VCPU_STAT(ext_intr_exits) },
56 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 56 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
57 { NULL } 57 { NULL }
58 }; 58 };
59 59
60 /* TODO: use vcpu_printf() */ 60 /* TODO: use vcpu_printf() */
61 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) 61 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
62 { 62 {
63 int i; 63 int i;
64 64
65 printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr); 65 printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
66 printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); 66 printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
67 printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1); 67 printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
68 68
69 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); 69 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
70 70
71 for (i = 0; i < 32; i += 4) { 71 for (i = 0; i < 32; i += 4) {
72 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, 72 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
73 kvmppc_get_gpr(vcpu, i), 73 kvmppc_get_gpr(vcpu, i),
74 kvmppc_get_gpr(vcpu, i+1), 74 kvmppc_get_gpr(vcpu, i+1),
75 kvmppc_get_gpr(vcpu, i+2), 75 kvmppc_get_gpr(vcpu, i+2),
76 kvmppc_get_gpr(vcpu, i+3)); 76 kvmppc_get_gpr(vcpu, i+3));
77 } 77 }
78 } 78 }
79 79
80 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, 80 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
81 unsigned int priority) 81 unsigned int priority)
82 { 82 {
83 set_bit(priority, &vcpu->arch.pending_exceptions); 83 set_bit(priority, &vcpu->arch.pending_exceptions);
84 } 84 }
85 85
86 static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, 86 static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
87 ulong dear_flags, ulong esr_flags) 87 ulong dear_flags, ulong esr_flags)
88 { 88 {
89 vcpu->arch.queued_dear = dear_flags; 89 vcpu->arch.queued_dear = dear_flags;
90 vcpu->arch.queued_esr = esr_flags; 90 vcpu->arch.queued_esr = esr_flags;
91 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); 91 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
92 } 92 }
93 93
94 static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, 94 static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
95 ulong dear_flags, ulong esr_flags) 95 ulong dear_flags, ulong esr_flags)
96 { 96 {
97 vcpu->arch.queued_dear = dear_flags; 97 vcpu->arch.queued_dear = dear_flags;
98 vcpu->arch.queued_esr = esr_flags; 98 vcpu->arch.queued_esr = esr_flags;
99 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); 99 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
100 } 100 }
101 101
102 static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, 102 static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
103 ulong esr_flags) 103 ulong esr_flags)
104 { 104 {
105 vcpu->arch.queued_esr = esr_flags; 105 vcpu->arch.queued_esr = esr_flags;
106 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); 106 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
107 } 107 }
108 108
109 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) 109 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
110 { 110 {
111 vcpu->arch.queued_esr = esr_flags; 111 vcpu->arch.queued_esr = esr_flags;
112 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); 112 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
113 } 113 }
114 114
115 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 115 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
116 { 116 {
117 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); 117 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
118 } 118 }
119 119
120 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) 120 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
121 { 121 {
122 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 122 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
123 } 123 }
124 124
125 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) 125 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
126 { 126 {
127 clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 127 clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
128 } 128 }
129 129
130 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 130 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
131 struct kvm_interrupt *irq) 131 struct kvm_interrupt *irq)
132 { 132 {
133 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); 133 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
134 } 134 }
135 135
136 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, 136 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
137 struct kvm_interrupt *irq) 137 struct kvm_interrupt *irq)
138 { 138 {
139 clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); 139 clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
140 } 140 }
141 141
142 /* Deliver the interrupt of the corresponding priority, if possible. */ 142 /* Deliver the interrupt of the corresponding priority, if possible. */
143 static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, 143 static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
144 unsigned int priority) 144 unsigned int priority)
145 { 145 {
146 int allowed = 0; 146 int allowed = 0;
147 ulong msr_mask; 147 ulong msr_mask;
148 bool update_esr = false, update_dear = false; 148 bool update_esr = false, update_dear = false;
149 149
150 switch (priority) { 150 switch (priority) {
151 case BOOKE_IRQPRIO_DTLB_MISS: 151 case BOOKE_IRQPRIO_DTLB_MISS:
152 case BOOKE_IRQPRIO_DATA_STORAGE: 152 case BOOKE_IRQPRIO_DATA_STORAGE:
153 update_dear = true; 153 update_dear = true;
154 /* fall through */ 154 /* fall through */
155 case BOOKE_IRQPRIO_INST_STORAGE: 155 case BOOKE_IRQPRIO_INST_STORAGE:
156 case BOOKE_IRQPRIO_PROGRAM: 156 case BOOKE_IRQPRIO_PROGRAM:
157 update_esr = true; 157 update_esr = true;
158 /* fall through */ 158 /* fall through */
159 case BOOKE_IRQPRIO_ITLB_MISS: 159 case BOOKE_IRQPRIO_ITLB_MISS:
160 case BOOKE_IRQPRIO_SYSCALL: 160 case BOOKE_IRQPRIO_SYSCALL:
161 case BOOKE_IRQPRIO_FP_UNAVAIL: 161 case BOOKE_IRQPRIO_FP_UNAVAIL:
162 case BOOKE_IRQPRIO_SPE_UNAVAIL: 162 case BOOKE_IRQPRIO_SPE_UNAVAIL:
163 case BOOKE_IRQPRIO_SPE_FP_DATA: 163 case BOOKE_IRQPRIO_SPE_FP_DATA:
164 case BOOKE_IRQPRIO_SPE_FP_ROUND: 164 case BOOKE_IRQPRIO_SPE_FP_ROUND:
165 case BOOKE_IRQPRIO_AP_UNAVAIL: 165 case BOOKE_IRQPRIO_AP_UNAVAIL:
166 case BOOKE_IRQPRIO_ALIGNMENT: 166 case BOOKE_IRQPRIO_ALIGNMENT:
167 allowed = 1; 167 allowed = 1;
168 msr_mask = MSR_CE|MSR_ME|MSR_DE; 168 msr_mask = MSR_CE|MSR_ME|MSR_DE;
169 break; 169 break;
170 case BOOKE_IRQPRIO_CRITICAL: 170 case BOOKE_IRQPRIO_CRITICAL:
171 case BOOKE_IRQPRIO_WATCHDOG: 171 case BOOKE_IRQPRIO_WATCHDOG:
172 allowed = vcpu->arch.msr & MSR_CE; 172 allowed = vcpu->arch.msr & MSR_CE;
173 msr_mask = MSR_ME; 173 msr_mask = MSR_ME;
174 break; 174 break;
175 case BOOKE_IRQPRIO_MACHINE_CHECK: 175 case BOOKE_IRQPRIO_MACHINE_CHECK:
176 allowed = vcpu->arch.msr & MSR_ME; 176 allowed = vcpu->arch.msr & MSR_ME;
177 msr_mask = 0; 177 msr_mask = 0;
178 break; 178 break;
179 case BOOKE_IRQPRIO_EXTERNAL: 179 case BOOKE_IRQPRIO_EXTERNAL:
180 case BOOKE_IRQPRIO_DECREMENTER: 180 case BOOKE_IRQPRIO_DECREMENTER:
181 case BOOKE_IRQPRIO_FIT: 181 case BOOKE_IRQPRIO_FIT:
182 allowed = vcpu->arch.msr & MSR_EE; 182 allowed = vcpu->arch.msr & MSR_EE;
183 msr_mask = MSR_CE|MSR_ME|MSR_DE; 183 msr_mask = MSR_CE|MSR_ME|MSR_DE;
184 break; 184 break;
185 case BOOKE_IRQPRIO_DEBUG: 185 case BOOKE_IRQPRIO_DEBUG:
186 allowed = vcpu->arch.msr & MSR_DE; 186 allowed = vcpu->arch.msr & MSR_DE;
187 msr_mask = MSR_ME; 187 msr_mask = MSR_ME;
188 break; 188 break;
189 } 189 }
190 190
191 if (allowed) { 191 if (allowed) {
192 vcpu->arch.srr0 = vcpu->arch.pc; 192 vcpu->arch.srr0 = vcpu->arch.pc;
193 vcpu->arch.srr1 = vcpu->arch.msr; 193 vcpu->arch.srr1 = vcpu->arch.msr;
194 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; 194 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
195 if (update_esr == true) 195 if (update_esr == true)
196 vcpu->arch.esr = vcpu->arch.queued_esr; 196 vcpu->arch.esr = vcpu->arch.queued_esr;
197 if (update_dear == true) 197 if (update_dear == true)
198 vcpu->arch.dear = vcpu->arch.queued_dear; 198 vcpu->arch.dear = vcpu->arch.queued_dear;
199 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); 199 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
200 200
201 clear_bit(priority, &vcpu->arch.pending_exceptions); 201 clear_bit(priority, &vcpu->arch.pending_exceptions);
202 } 202 }
203 203
204 return allowed; 204 return allowed;
205 } 205 }
206 206
207 /* Check pending exceptions and deliver one, if possible. */ 207 /* Check pending exceptions and deliver one, if possible. */
208 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) 208 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
209 { 209 {
210 unsigned long *pending = &vcpu->arch.pending_exceptions; 210 unsigned long *pending = &vcpu->arch.pending_exceptions;
211 unsigned int priority; 211 unsigned int priority;
212 212
213 priority = __ffs(*pending); 213 priority = __ffs(*pending);
214 while (priority <= BOOKE_IRQPRIO_MAX) { 214 while (priority <= BOOKE_IRQPRIO_MAX) {
215 if (kvmppc_booke_irqprio_deliver(vcpu, priority)) 215 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
216 break; 216 break;
217 217
218 priority = find_next_bit(pending, 218 priority = find_next_bit(pending,
219 BITS_PER_BYTE * sizeof(*pending), 219 BITS_PER_BYTE * sizeof(*pending),
220 priority + 1); 220 priority + 1);
221 } 221 }
222 } 222 }
223 223
224 /** 224 /**
225 * kvmppc_handle_exit 225 * kvmppc_handle_exit
226 * 226 *
227 * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) 227 * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
228 */ 228 */
229 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 229 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
230 unsigned int exit_nr) 230 unsigned int exit_nr)
231 { 231 {
232 enum emulation_result er; 232 enum emulation_result er;
233 int r = RESUME_HOST; 233 int r = RESUME_HOST;
234 234
235 /* update before a new last_exit_type is rewritten */ 235 /* update before a new last_exit_type is rewritten */
236 kvmppc_update_timing_stats(vcpu); 236 kvmppc_update_timing_stats(vcpu);
237 237
238 local_irq_enable(); 238 local_irq_enable();
239 239
240 run->exit_reason = KVM_EXIT_UNKNOWN; 240 run->exit_reason = KVM_EXIT_UNKNOWN;
241 run->ready_for_interrupt_injection = 1; 241 run->ready_for_interrupt_injection = 1;
242 242
243 switch (exit_nr) { 243 switch (exit_nr) {
244 case BOOKE_INTERRUPT_MACHINE_CHECK: 244 case BOOKE_INTERRUPT_MACHINE_CHECK:
245 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); 245 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
246 kvmppc_dump_vcpu(vcpu); 246 kvmppc_dump_vcpu(vcpu);
247 r = RESUME_HOST; 247 r = RESUME_HOST;
248 break; 248 break;
249 249
250 case BOOKE_INTERRUPT_EXTERNAL: 250 case BOOKE_INTERRUPT_EXTERNAL:
251 kvmppc_account_exit(vcpu, EXT_INTR_EXITS); 251 kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
252 if (need_resched()) 252 if (need_resched())
253 cond_resched(); 253 cond_resched();
254 r = RESUME_GUEST; 254 r = RESUME_GUEST;
255 break; 255 break;
256 256
257 case BOOKE_INTERRUPT_DECREMENTER: 257 case BOOKE_INTERRUPT_DECREMENTER:
258 /* Since we switched IVPR back to the host's value, the host 258 /* Since we switched IVPR back to the host's value, the host
259 * handled this interrupt the moment we enabled interrupts. 259 * handled this interrupt the moment we enabled interrupts.
260 * Now we just offer it a chance to reschedule the guest. */ 260 * Now we just offer it a chance to reschedule the guest. */
261 kvmppc_account_exit(vcpu, DEC_EXITS); 261 kvmppc_account_exit(vcpu, DEC_EXITS);
262 if (need_resched()) 262 if (need_resched())
263 cond_resched(); 263 cond_resched();
264 r = RESUME_GUEST; 264 r = RESUME_GUEST;
265 break; 265 break;
266 266
267 case BOOKE_INTERRUPT_PROGRAM: 267 case BOOKE_INTERRUPT_PROGRAM:
268 if (vcpu->arch.msr & MSR_PR) { 268 if (vcpu->arch.msr & MSR_PR) {
269 /* Program traps generated by user-level software must be handled 269 /* Program traps generated by user-level software must be handled
270 * by the guest kernel. */ 270 * by the guest kernel. */
271 kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); 271 kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
272 r = RESUME_GUEST; 272 r = RESUME_GUEST;
273 kvmppc_account_exit(vcpu, USR_PR_INST); 273 kvmppc_account_exit(vcpu, USR_PR_INST);
274 break; 274 break;
275 } 275 }
276 276
277 er = kvmppc_emulate_instruction(run, vcpu); 277 er = kvmppc_emulate_instruction(run, vcpu);
278 switch (er) { 278 switch (er) {
279 case EMULATE_DONE: 279 case EMULATE_DONE:
280 /* don't overwrite subtypes, just account kvm_stats */ 280 /* don't overwrite subtypes, just account kvm_stats */
281 kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); 281 kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
282 /* Future optimization: only reload non-volatiles if 282 /* Future optimization: only reload non-volatiles if
283 * they were actually modified by emulation. */ 283 * they were actually modified by emulation. */
284 r = RESUME_GUEST_NV; 284 r = RESUME_GUEST_NV;
285 break; 285 break;
286 case EMULATE_DO_DCR: 286 case EMULATE_DO_DCR:
287 run->exit_reason = KVM_EXIT_DCR; 287 run->exit_reason = KVM_EXIT_DCR;
288 r = RESUME_HOST; 288 r = RESUME_HOST;
289 break; 289 break;
290 case EMULATE_FAIL: 290 case EMULATE_FAIL:
291 /* XXX Deliver Program interrupt to guest. */ 291 /* XXX Deliver Program interrupt to guest. */
292 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 292 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
293 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 293 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
294 /* For debugging, encode the failing instruction and 294 /* For debugging, encode the failing instruction and
295 * report it to userspace. */ 295 * report it to userspace. */
296 run->hw.hardware_exit_reason = ~0ULL << 32; 296 run->hw.hardware_exit_reason = ~0ULL << 32;
297 run->hw.hardware_exit_reason |= vcpu->arch.last_inst; 297 run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
298 r = RESUME_HOST; 298 r = RESUME_HOST;
299 break; 299 break;
300 default: 300 default:
301 BUG(); 301 BUG();
302 } 302 }
303 break; 303 break;
304 304
305 case BOOKE_INTERRUPT_FP_UNAVAIL: 305 case BOOKE_INTERRUPT_FP_UNAVAIL:
306 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); 306 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
307 kvmppc_account_exit(vcpu, FP_UNAVAIL); 307 kvmppc_account_exit(vcpu, FP_UNAVAIL);
308 r = RESUME_GUEST; 308 r = RESUME_GUEST;
309 break; 309 break;
310 310
311 case BOOKE_INTERRUPT_SPE_UNAVAIL: 311 case BOOKE_INTERRUPT_SPE_UNAVAIL:
312 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL); 312 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
313 r = RESUME_GUEST; 313 r = RESUME_GUEST;
314 break; 314 break;
315 315
316 case BOOKE_INTERRUPT_SPE_FP_DATA: 316 case BOOKE_INTERRUPT_SPE_FP_DATA:
317 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA); 317 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
318 r = RESUME_GUEST; 318 r = RESUME_GUEST;
319 break; 319 break;
320 320
321 case BOOKE_INTERRUPT_SPE_FP_ROUND: 321 case BOOKE_INTERRUPT_SPE_FP_ROUND:
322 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); 322 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
323 r = RESUME_GUEST; 323 r = RESUME_GUEST;
324 break; 324 break;
325 325
326 case BOOKE_INTERRUPT_DATA_STORAGE: 326 case BOOKE_INTERRUPT_DATA_STORAGE:
327 kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear, 327 kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
328 vcpu->arch.fault_esr); 328 vcpu->arch.fault_esr);
329 kvmppc_account_exit(vcpu, DSI_EXITS); 329 kvmppc_account_exit(vcpu, DSI_EXITS);
330 r = RESUME_GUEST; 330 r = RESUME_GUEST;
331 break; 331 break;
332 332
333 case BOOKE_INTERRUPT_INST_STORAGE: 333 case BOOKE_INTERRUPT_INST_STORAGE:
334 kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr); 334 kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
335 kvmppc_account_exit(vcpu, ISI_EXITS); 335 kvmppc_account_exit(vcpu, ISI_EXITS);
336 r = RESUME_GUEST; 336 r = RESUME_GUEST;
337 break; 337 break;
338 338
339 case BOOKE_INTERRUPT_SYSCALL: 339 case BOOKE_INTERRUPT_SYSCALL:
340 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); 340 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
341 kvmppc_account_exit(vcpu, SYSCALL_EXITS); 341 kvmppc_account_exit(vcpu, SYSCALL_EXITS);
342 r = RESUME_GUEST; 342 r = RESUME_GUEST;
343 break; 343 break;
344 344
345 case BOOKE_INTERRUPT_DTLB_MISS: { 345 case BOOKE_INTERRUPT_DTLB_MISS: {
346 unsigned long eaddr = vcpu->arch.fault_dear; 346 unsigned long eaddr = vcpu->arch.fault_dear;
347 int gtlb_index; 347 int gtlb_index;
348 gpa_t gpaddr; 348 gpa_t gpaddr;
349 gfn_t gfn; 349 gfn_t gfn;
350 350
351 /* Check the guest TLB. */ 351 /* Check the guest TLB. */
352 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); 352 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
353 if (gtlb_index < 0) { 353 if (gtlb_index < 0) {
354 /* The guest didn't have a mapping for it. */ 354 /* The guest didn't have a mapping for it. */
355 kvmppc_core_queue_dtlb_miss(vcpu, 355 kvmppc_core_queue_dtlb_miss(vcpu,
356 vcpu->arch.fault_dear, 356 vcpu->arch.fault_dear,
357 vcpu->arch.fault_esr); 357 vcpu->arch.fault_esr);
358 kvmppc_mmu_dtlb_miss(vcpu); 358 kvmppc_mmu_dtlb_miss(vcpu);
359 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); 359 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
360 r = RESUME_GUEST; 360 r = RESUME_GUEST;
361 break; 361 break;
362 } 362 }
363 363
364 gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); 364 gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
365 gfn = gpaddr >> PAGE_SHIFT; 365 gfn = gpaddr >> PAGE_SHIFT;
366 366
367 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { 367 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
368 /* The guest TLB had a mapping, but the shadow TLB 368 /* The guest TLB had a mapping, but the shadow TLB
369 * didn't, and it is RAM. This could be because: 369 * didn't, and it is RAM. This could be because:
370 * a) the entry is mapping the host kernel, or 370 * a) the entry is mapping the host kernel, or
371 * b) the guest used a large mapping which we're faking 371 * b) the guest used a large mapping which we're faking
372 * Either way, we need to satisfy the fault without 372 * Either way, we need to satisfy the fault without
373 * invoking the guest. */ 373 * invoking the guest. */
374 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); 374 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
375 kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); 375 kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
376 r = RESUME_GUEST; 376 r = RESUME_GUEST;
377 } else { 377 } else {
378 /* Guest has mapped and accessed a page which is not 378 /* Guest has mapped and accessed a page which is not
379 * actually RAM. */ 379 * actually RAM. */
380 vcpu->arch.paddr_accessed = gpaddr; 380 vcpu->arch.paddr_accessed = gpaddr;
381 r = kvmppc_emulate_mmio(run, vcpu); 381 r = kvmppc_emulate_mmio(run, vcpu);
382 kvmppc_account_exit(vcpu, MMIO_EXITS); 382 kvmppc_account_exit(vcpu, MMIO_EXITS);
383 } 383 }
384 384
385 break; 385 break;
386 } 386 }
387 387
388 case BOOKE_INTERRUPT_ITLB_MISS: { 388 case BOOKE_INTERRUPT_ITLB_MISS: {
389 unsigned long eaddr = vcpu->arch.pc; 389 unsigned long eaddr = vcpu->arch.pc;
390 gpa_t gpaddr; 390 gpa_t gpaddr;
391 gfn_t gfn; 391 gfn_t gfn;
392 int gtlb_index; 392 int gtlb_index;
393 393
394 r = RESUME_GUEST; 394 r = RESUME_GUEST;
395 395
396 /* Check the guest TLB. */ 396 /* Check the guest TLB. */
397 gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr); 397 gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
398 if (gtlb_index < 0) { 398 if (gtlb_index < 0) {
399 /* The guest didn't have a mapping for it. */ 399 /* The guest didn't have a mapping for it. */
400 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); 400 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
401 kvmppc_mmu_itlb_miss(vcpu); 401 kvmppc_mmu_itlb_miss(vcpu);
402 kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); 402 kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
403 break; 403 break;
404 } 404 }
405 405
406 kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); 406 kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
407 407
408 gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); 408 gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
409 gfn = gpaddr >> PAGE_SHIFT; 409 gfn = gpaddr >> PAGE_SHIFT;
410 410
411 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { 411 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
412 /* The guest TLB had a mapping, but the shadow TLB 412 /* The guest TLB had a mapping, but the shadow TLB
413 * didn't. This could be because: 413 * didn't. This could be because:
414 * a) the entry is mapping the host kernel, or 414 * a) the entry is mapping the host kernel, or
415 * b) the guest used a large mapping which we're faking 415 * b) the guest used a large mapping which we're faking
416 * Either way, we need to satisfy the fault without 416 * Either way, we need to satisfy the fault without
417 * invoking the guest. */ 417 * invoking the guest. */
418 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); 418 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
419 } else { 419 } else {
420 /* Guest mapped and leaped at non-RAM! */ 420 /* Guest mapped and leaped at non-RAM! */
421 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); 421 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
422 } 422 }
423 423
424 break; 424 break;
425 } 425 }
426 426
427 case BOOKE_INTERRUPT_DEBUG: { 427 case BOOKE_INTERRUPT_DEBUG: {
428 u32 dbsr; 428 u32 dbsr;
429 429
430 vcpu->arch.pc = mfspr(SPRN_CSRR0); 430 vcpu->arch.pc = mfspr(SPRN_CSRR0);
431 431
432 /* clear IAC events in DBSR register */ 432 /* clear IAC events in DBSR register */
433 dbsr = mfspr(SPRN_DBSR); 433 dbsr = mfspr(SPRN_DBSR);
434 dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4; 434 dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
435 mtspr(SPRN_DBSR, dbsr); 435 mtspr(SPRN_DBSR, dbsr);
436 436
437 run->exit_reason = KVM_EXIT_DEBUG; 437 run->exit_reason = KVM_EXIT_DEBUG;
438 kvmppc_account_exit(vcpu, DEBUG_EXITS); 438 kvmppc_account_exit(vcpu, DEBUG_EXITS);
439 r = RESUME_HOST; 439 r = RESUME_HOST;
440 break; 440 break;
441 } 441 }
442 442
443 default: 443 default:
444 printk(KERN_EMERG "exit_nr %d\n", exit_nr); 444 printk(KERN_EMERG "exit_nr %d\n", exit_nr);
445 BUG(); 445 BUG();
446 } 446 }
447 447
448 local_irq_disable(); 448 local_irq_disable();
449 449
450 kvmppc_core_deliver_interrupts(vcpu); 450 kvmppc_core_deliver_interrupts(vcpu);
451 451
452 if (!(r & RESUME_HOST)) { 452 if (!(r & RESUME_HOST)) {
453 /* To avoid clobbering exit_reason, only check for signals if 453 /* To avoid clobbering exit_reason, only check for signals if
454 * we aren't already exiting to userspace for some other 454 * we aren't already exiting to userspace for some other
455 * reason. */ 455 * reason. */
456 if (signal_pending(current)) { 456 if (signal_pending(current)) {
457 run->exit_reason = KVM_EXIT_INTR; 457 run->exit_reason = KVM_EXIT_INTR;
458 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 458 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
459 kvmppc_account_exit(vcpu, SIGNAL_EXITS); 459 kvmppc_account_exit(vcpu, SIGNAL_EXITS);
460 } 460 }
461 } 461 }
462 462
463 return r; 463 return r;
464 } 464 }
465 465
466 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ 466 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
467 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 467 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
468 { 468 {
469 vcpu->arch.pc = 0; 469 vcpu->arch.pc = 0;
470 vcpu->arch.msr = 0; 470 vcpu->arch.msr = 0;
471 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ 471 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
472 472
473 vcpu->arch.shadow_pid = 1; 473 vcpu->arch.shadow_pid = 1;
474 474
475 /* Eye-catching number so we know if the guest takes an interrupt 475 /* Eye-catching number so we know if the guest takes an interrupt
476 * before it's programmed its own IVPR. */ 476 * before it's programmed its own IVPR. */
477 vcpu->arch.ivpr = 0x55550000; 477 vcpu->arch.ivpr = 0x55550000;
478 478
479 kvmppc_init_timing_stats(vcpu); 479 kvmppc_init_timing_stats(vcpu);
480 480
481 return kvmppc_core_vcpu_setup(vcpu); 481 return kvmppc_core_vcpu_setup(vcpu);
482 } 482 }
483 483
484 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 484 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
485 { 485 {
486 int i; 486 int i;
487 487
488 vcpu_load(vcpu);
489
490 regs->pc = vcpu->arch.pc; 488 regs->pc = vcpu->arch.pc;
491 regs->cr = kvmppc_get_cr(vcpu); 489 regs->cr = kvmppc_get_cr(vcpu);
492 regs->ctr = vcpu->arch.ctr; 490 regs->ctr = vcpu->arch.ctr;
493 regs->lr = vcpu->arch.lr; 491 regs->lr = vcpu->arch.lr;
494 regs->xer = kvmppc_get_xer(vcpu); 492 regs->xer = kvmppc_get_xer(vcpu);
495 regs->msr = vcpu->arch.msr; 493 regs->msr = vcpu->arch.msr;
496 regs->srr0 = vcpu->arch.srr0; 494 regs->srr0 = vcpu->arch.srr0;
497 regs->srr1 = vcpu->arch.srr1; 495 regs->srr1 = vcpu->arch.srr1;
498 regs->pid = vcpu->arch.pid; 496 regs->pid = vcpu->arch.pid;
499 regs->sprg0 = vcpu->arch.sprg0; 497 regs->sprg0 = vcpu->arch.sprg0;
500 regs->sprg1 = vcpu->arch.sprg1; 498 regs->sprg1 = vcpu->arch.sprg1;
501 regs->sprg2 = vcpu->arch.sprg2; 499 regs->sprg2 = vcpu->arch.sprg2;
502 regs->sprg3 = vcpu->arch.sprg3; 500 regs->sprg3 = vcpu->arch.sprg3;
503 regs->sprg5 = vcpu->arch.sprg4; 501 regs->sprg5 = vcpu->arch.sprg4;
504 regs->sprg6 = vcpu->arch.sprg5; 502 regs->sprg6 = vcpu->arch.sprg5;
505 regs->sprg7 = vcpu->arch.sprg6; 503 regs->sprg7 = vcpu->arch.sprg6;
506 504
507 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 505 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
508 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 506 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
509 507
510 vcpu_put(vcpu);
511
512 return 0; 508 return 0;
513 } 509 }
514 510
515 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 511 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
516 { 512 {
517 int i; 513 int i;
518 514
519 vcpu_load(vcpu);
520
521 vcpu->arch.pc = regs->pc; 515 vcpu->arch.pc = regs->pc;
522 kvmppc_set_cr(vcpu, regs->cr); 516 kvmppc_set_cr(vcpu, regs->cr);
523 vcpu->arch.ctr = regs->ctr; 517 vcpu->arch.ctr = regs->ctr;
524 vcpu->arch.lr = regs->lr; 518 vcpu->arch.lr = regs->lr;
525 kvmppc_set_xer(vcpu, regs->xer); 519 kvmppc_set_xer(vcpu, regs->xer);
526 kvmppc_set_msr(vcpu, regs->msr); 520 kvmppc_set_msr(vcpu, regs->msr);
527 vcpu->arch.srr0 = regs->srr0; 521 vcpu->arch.srr0 = regs->srr0;
528 vcpu->arch.srr1 = regs->srr1; 522 vcpu->arch.srr1 = regs->srr1;
529 vcpu->arch.sprg0 = regs->sprg0; 523 vcpu->arch.sprg0 = regs->sprg0;
530 vcpu->arch.sprg1 = regs->sprg1; 524 vcpu->arch.sprg1 = regs->sprg1;
531 vcpu->arch.sprg2 = regs->sprg2; 525 vcpu->arch.sprg2 = regs->sprg2;
532 vcpu->arch.sprg3 = regs->sprg3; 526 vcpu->arch.sprg3 = regs->sprg3;
533 vcpu->arch.sprg5 = regs->sprg4; 527 vcpu->arch.sprg5 = regs->sprg4;
534 vcpu->arch.sprg6 = regs->sprg5; 528 vcpu->arch.sprg6 = regs->sprg5;
535 vcpu->arch.sprg7 = regs->sprg6; 529 vcpu->arch.sprg7 = regs->sprg6;
536 530
537 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 531 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
538 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 532 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
539 533
540 vcpu_put(vcpu);
541
542 return 0; 534 return 0;
543 } 535 }
544 536
545 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 537 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
546 struct kvm_sregs *sregs) 538 struct kvm_sregs *sregs)
547 { 539 {
548 return -ENOTSUPP; 540 return -ENOTSUPP;
549 } 541 }
550 542
551 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 543 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
552 struct kvm_sregs *sregs) 544 struct kvm_sregs *sregs)
553 { 545 {
554 return -ENOTSUPP; 546 return -ENOTSUPP;
555 } 547 }
556 548
557 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 549 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
558 { 550 {
559 return -ENOTSUPP; 551 return -ENOTSUPP;
560 } 552 }
561 553
562 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 554 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
563 { 555 {
564 return -ENOTSUPP; 556 return -ENOTSUPP;
565 } 557 }
566 558
567 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 559 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
568 struct kvm_translation *tr) 560 struct kvm_translation *tr)
569 { 561 {
570 int r; 562 int r;
571 563
572 vcpu_load(vcpu);
573 r = kvmppc_core_vcpu_translate(vcpu, tr); 564 r = kvmppc_core_vcpu_translate(vcpu, tr);
574 vcpu_put(vcpu);
575 return r; 565 return r;
576 } 566 }
577 567
578 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 568 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
579 { 569 {
580 return -ENOTSUPP; 570 return -ENOTSUPP;
581 } 571 }
582 572
583 int __init kvmppc_booke_init(void) 573 int __init kvmppc_booke_init(void)
584 { 574 {
585 unsigned long ivor[16]; 575 unsigned long ivor[16];
586 unsigned long max_ivor = 0; 576 unsigned long max_ivor = 0;
587 int i; 577 int i;
588 578
589 /* We install our own exception handlers by hijacking IVPR. IVPR must 579 /* We install our own exception handlers by hijacking IVPR. IVPR must
590 * be 16-bit aligned, so we need a 64KB allocation. */ 580 * be 16-bit aligned, so we need a 64KB allocation. */
591 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, 581 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
592 VCPU_SIZE_ORDER); 582 VCPU_SIZE_ORDER);
593 if (!kvmppc_booke_handlers) 583 if (!kvmppc_booke_handlers)
594 return -ENOMEM; 584 return -ENOMEM;
595 585
596 /* XXX make sure our handlers are smaller than Linux's */ 586 /* XXX make sure our handlers are smaller than Linux's */
597 587
598 /* Copy our interrupt handlers to match host IVORs. That way we don't 588 /* Copy our interrupt handlers to match host IVORs. That way we don't
599 * have to swap the IVORs on every guest/host transition. */ 589 * have to swap the IVORs on every guest/host transition. */
600 ivor[0] = mfspr(SPRN_IVOR0); 590 ivor[0] = mfspr(SPRN_IVOR0);
601 ivor[1] = mfspr(SPRN_IVOR1); 591 ivor[1] = mfspr(SPRN_IVOR1);
602 ivor[2] = mfspr(SPRN_IVOR2); 592 ivor[2] = mfspr(SPRN_IVOR2);
603 ivor[3] = mfspr(SPRN_IVOR3); 593 ivor[3] = mfspr(SPRN_IVOR3);
604 ivor[4] = mfspr(SPRN_IVOR4); 594 ivor[4] = mfspr(SPRN_IVOR4);
605 ivor[5] = mfspr(SPRN_IVOR5); 595 ivor[5] = mfspr(SPRN_IVOR5);
606 ivor[6] = mfspr(SPRN_IVOR6); 596 ivor[6] = mfspr(SPRN_IVOR6);
607 ivor[7] = mfspr(SPRN_IVOR7); 597 ivor[7] = mfspr(SPRN_IVOR7);
608 ivor[8] = mfspr(SPRN_IVOR8); 598 ivor[8] = mfspr(SPRN_IVOR8);
609 ivor[9] = mfspr(SPRN_IVOR9); 599 ivor[9] = mfspr(SPRN_IVOR9);
610 ivor[10] = mfspr(SPRN_IVOR10); 600 ivor[10] = mfspr(SPRN_IVOR10);
611 ivor[11] = mfspr(SPRN_IVOR11); 601 ivor[11] = mfspr(SPRN_IVOR11);
612 ivor[12] = mfspr(SPRN_IVOR12); 602 ivor[12] = mfspr(SPRN_IVOR12);
613 ivor[13] = mfspr(SPRN_IVOR13); 603 ivor[13] = mfspr(SPRN_IVOR13);
614 ivor[14] = mfspr(SPRN_IVOR14); 604 ivor[14] = mfspr(SPRN_IVOR14);
615 ivor[15] = mfspr(SPRN_IVOR15); 605 ivor[15] = mfspr(SPRN_IVOR15);
616 606
617 for (i = 0; i < 16; i++) { 607 for (i = 0; i < 16; i++) {
618 if (ivor[i] > max_ivor) 608 if (ivor[i] > max_ivor)
619 max_ivor = ivor[i]; 609 max_ivor = ivor[i];
620 610
621 memcpy((void *)kvmppc_booke_handlers + ivor[i], 611 memcpy((void *)kvmppc_booke_handlers + ivor[i],
622 kvmppc_handlers_start + i * kvmppc_handler_len, 612 kvmppc_handlers_start + i * kvmppc_handler_len,
623 kvmppc_handler_len); 613 kvmppc_handler_len);
624 } 614 }
625 flush_icache_range(kvmppc_booke_handlers, 615 flush_icache_range(kvmppc_booke_handlers,
626 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); 616 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
627 617
628 return 0; 618 return 0;
629 } 619 }
630 620
631 void __exit kvmppc_booke_exit(void) 621 void __exit kvmppc_booke_exit(void)
632 { 622 {
633 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); 623 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
634 kvm_exit(); 624 kvm_exit();
635 } 625 }
636 626
arch/powerpc/kvm/powerpc.c
1 /* 1 /*
2 * This program is free software; you can redistribute it and/or modify 2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as 3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation. 4 * published by the Free Software Foundation.
5 * 5 *
6 * This program is distributed in the hope that it will be useful, 6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details. 9 * GNU General Public License for more details.
10 * 10 *
11 * You should have received a copy of the GNU General Public License 11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software 12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 * 14 *
15 * Copyright IBM Corp. 2007 15 * Copyright IBM Corp. 2007
16 * 16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19 */ 19 */
20 20
21 #include <linux/errno.h> 21 #include <linux/errno.h>
22 #include <linux/err.h> 22 #include <linux/err.h>
23 #include <linux/kvm_host.h> 23 #include <linux/kvm_host.h>
24 #include <linux/module.h> 24 #include <linux/module.h>
25 #include <linux/vmalloc.h> 25 #include <linux/vmalloc.h>
26 #include <linux/hrtimer.h> 26 #include <linux/hrtimer.h>
27 #include <linux/fs.h> 27 #include <linux/fs.h>
28 #include <linux/slab.h> 28 #include <linux/slab.h>
29 #include <asm/cputable.h> 29 #include <asm/cputable.h>
30 #include <asm/uaccess.h> 30 #include <asm/uaccess.h>
31 #include <asm/kvm_ppc.h> 31 #include <asm/kvm_ppc.h>
32 #include <asm/tlbflush.h> 32 #include <asm/tlbflush.h>
33 #include "timing.h" 33 #include "timing.h"
34 #include "../mm/mmu_decl.h" 34 #include "../mm/mmu_decl.h"
35 35
36 #define CREATE_TRACE_POINTS 36 #define CREATE_TRACE_POINTS
37 #include "trace.h" 37 #include "trace.h"
38 38
39 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 39 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
40 { 40 {
41 return gfn; 41 return gfn;
42 } 42 }
43 43
44 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 44 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
45 { 45 {
46 return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); 46 return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
47 } 47 }
48 48
49 49
50 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) 50 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
51 { 51 {
52 enum emulation_result er; 52 enum emulation_result er;
53 int r; 53 int r;
54 54
55 er = kvmppc_emulate_instruction(run, vcpu); 55 er = kvmppc_emulate_instruction(run, vcpu);
56 switch (er) { 56 switch (er) {
57 case EMULATE_DONE: 57 case EMULATE_DONE:
58 /* Future optimization: only reload non-volatiles if they were 58 /* Future optimization: only reload non-volatiles if they were
59 * actually modified. */ 59 * actually modified. */
60 r = RESUME_GUEST_NV; 60 r = RESUME_GUEST_NV;
61 break; 61 break;
62 case EMULATE_DO_MMIO: 62 case EMULATE_DO_MMIO:
63 run->exit_reason = KVM_EXIT_MMIO; 63 run->exit_reason = KVM_EXIT_MMIO;
64 /* We must reload nonvolatiles because "update" load/store 64 /* We must reload nonvolatiles because "update" load/store
65 * instructions modify register state. */ 65 * instructions modify register state. */
66 /* Future optimization: only reload non-volatiles if they were 66 /* Future optimization: only reload non-volatiles if they were
67 * actually modified. */ 67 * actually modified. */
68 r = RESUME_HOST_NV; 68 r = RESUME_HOST_NV;
69 break; 69 break;
70 case EMULATE_FAIL: 70 case EMULATE_FAIL:
71 /* XXX Deliver Program interrupt to guest. */ 71 /* XXX Deliver Program interrupt to guest. */
72 printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, 72 printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
73 kvmppc_get_last_inst(vcpu)); 73 kvmppc_get_last_inst(vcpu));
74 r = RESUME_HOST; 74 r = RESUME_HOST;
75 break; 75 break;
76 default: 76 default:
77 BUG(); 77 BUG();
78 } 78 }
79 79
80 return r; 80 return r;
81 } 81 }
82 82
83 int kvm_arch_hardware_enable(void *garbage) 83 int kvm_arch_hardware_enable(void *garbage)
84 { 84 {
85 return 0; 85 return 0;
86 } 86 }
87 87
88 void kvm_arch_hardware_disable(void *garbage) 88 void kvm_arch_hardware_disable(void *garbage)
89 { 89 {
90 } 90 }
91 91
92 int kvm_arch_hardware_setup(void) 92 int kvm_arch_hardware_setup(void)
93 { 93 {
94 return 0; 94 return 0;
95 } 95 }
96 96
97 void kvm_arch_hardware_unsetup(void) 97 void kvm_arch_hardware_unsetup(void)
98 { 98 {
99 } 99 }
100 100
101 void kvm_arch_check_processor_compat(void *rtn) 101 void kvm_arch_check_processor_compat(void *rtn)
102 { 102 {
103 *(int *)rtn = kvmppc_core_check_processor_compat(); 103 *(int *)rtn = kvmppc_core_check_processor_compat();
104 } 104 }
105 105
106 struct kvm *kvm_arch_create_vm(void) 106 struct kvm *kvm_arch_create_vm(void)
107 { 107 {
108 struct kvm *kvm; 108 struct kvm *kvm;
109 109
110 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); 110 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
111 if (!kvm) 111 if (!kvm)
112 return ERR_PTR(-ENOMEM); 112 return ERR_PTR(-ENOMEM);
113 113
114 return kvm; 114 return kvm;
115 } 115 }
116 116
117 static void kvmppc_free_vcpus(struct kvm *kvm) 117 static void kvmppc_free_vcpus(struct kvm *kvm)
118 { 118 {
119 unsigned int i; 119 unsigned int i;
120 struct kvm_vcpu *vcpu; 120 struct kvm_vcpu *vcpu;
121 121
122 kvm_for_each_vcpu(i, vcpu, kvm) 122 kvm_for_each_vcpu(i, vcpu, kvm)
123 kvm_arch_vcpu_free(vcpu); 123 kvm_arch_vcpu_free(vcpu);
124 124
125 mutex_lock(&kvm->lock); 125 mutex_lock(&kvm->lock);
126 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 126 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
127 kvm->vcpus[i] = NULL; 127 kvm->vcpus[i] = NULL;
128 128
129 atomic_set(&kvm->online_vcpus, 0); 129 atomic_set(&kvm->online_vcpus, 0);
130 mutex_unlock(&kvm->lock); 130 mutex_unlock(&kvm->lock);
131 } 131 }
132 132
133 void kvm_arch_sync_events(struct kvm *kvm) 133 void kvm_arch_sync_events(struct kvm *kvm)
134 { 134 {
135 } 135 }
136 136
137 void kvm_arch_destroy_vm(struct kvm *kvm) 137 void kvm_arch_destroy_vm(struct kvm *kvm)
138 { 138 {
139 kvmppc_free_vcpus(kvm); 139 kvmppc_free_vcpus(kvm);
140 kvm_free_physmem(kvm); 140 kvm_free_physmem(kvm);
141 cleanup_srcu_struct(&kvm->srcu); 141 cleanup_srcu_struct(&kvm->srcu);
142 kfree(kvm); 142 kfree(kvm);
143 } 143 }
144 144
145 int kvm_dev_ioctl_check_extension(long ext) 145 int kvm_dev_ioctl_check_extension(long ext)
146 { 146 {
147 int r; 147 int r;
148 148
149 switch (ext) { 149 switch (ext) {
150 case KVM_CAP_PPC_SEGSTATE: 150 case KVM_CAP_PPC_SEGSTATE:
151 case KVM_CAP_PPC_PAIRED_SINGLES: 151 case KVM_CAP_PPC_PAIRED_SINGLES:
152 case KVM_CAP_PPC_UNSET_IRQ: 152 case KVM_CAP_PPC_UNSET_IRQ:
153 case KVM_CAP_ENABLE_CAP: 153 case KVM_CAP_ENABLE_CAP:
154 case KVM_CAP_PPC_OSI: 154 case KVM_CAP_PPC_OSI:
155 r = 1; 155 r = 1;
156 break; 156 break;
157 case KVM_CAP_COALESCED_MMIO: 157 case KVM_CAP_COALESCED_MMIO:
158 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 158 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
159 break; 159 break;
160 default: 160 default:
161 r = 0; 161 r = 0;
162 break; 162 break;
163 } 163 }
164 return r; 164 return r;
165 165
166 } 166 }
167 167
168 long kvm_arch_dev_ioctl(struct file *filp, 168 long kvm_arch_dev_ioctl(struct file *filp,
169 unsigned int ioctl, unsigned long arg) 169 unsigned int ioctl, unsigned long arg)
170 { 170 {
171 return -EINVAL; 171 return -EINVAL;
172 } 172 }
173 173
174 int kvm_arch_prepare_memory_region(struct kvm *kvm, 174 int kvm_arch_prepare_memory_region(struct kvm *kvm,
175 struct kvm_memory_slot *memslot, 175 struct kvm_memory_slot *memslot,
176 struct kvm_memory_slot old, 176 struct kvm_memory_slot old,
177 struct kvm_userspace_memory_region *mem, 177 struct kvm_userspace_memory_region *mem,
178 int user_alloc) 178 int user_alloc)
179 { 179 {
180 return 0; 180 return 0;
181 } 181 }
182 182
183 void kvm_arch_commit_memory_region(struct kvm *kvm, 183 void kvm_arch_commit_memory_region(struct kvm *kvm,
184 struct kvm_userspace_memory_region *mem, 184 struct kvm_userspace_memory_region *mem,
185 struct kvm_memory_slot old, 185 struct kvm_memory_slot old,
186 int user_alloc) 186 int user_alloc)
187 { 187 {
188 return; 188 return;
189 } 189 }
190 190
191 191
192 void kvm_arch_flush_shadow(struct kvm *kvm) 192 void kvm_arch_flush_shadow(struct kvm *kvm)
193 { 193 {
194 } 194 }
195 195
196 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 196 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
197 { 197 {
198 struct kvm_vcpu *vcpu; 198 struct kvm_vcpu *vcpu;
199 vcpu = kvmppc_core_vcpu_create(kvm, id); 199 vcpu = kvmppc_core_vcpu_create(kvm, id);
200 if (!IS_ERR(vcpu)) 200 if (!IS_ERR(vcpu))
201 kvmppc_create_vcpu_debugfs(vcpu, id); 201 kvmppc_create_vcpu_debugfs(vcpu, id);
202 return vcpu; 202 return vcpu;
203 } 203 }
204 204
205 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 205 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
206 { 206 {
207 /* Make sure we're not using the vcpu anymore */ 207 /* Make sure we're not using the vcpu anymore */
208 hrtimer_cancel(&vcpu->arch.dec_timer); 208 hrtimer_cancel(&vcpu->arch.dec_timer);
209 tasklet_kill(&vcpu->arch.tasklet); 209 tasklet_kill(&vcpu->arch.tasklet);
210 210
211 kvmppc_remove_vcpu_debugfs(vcpu); 211 kvmppc_remove_vcpu_debugfs(vcpu);
212 kvmppc_core_vcpu_free(vcpu); 212 kvmppc_core_vcpu_free(vcpu);
213 } 213 }
214 214
215 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 215 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
216 { 216 {
217 kvm_arch_vcpu_free(vcpu); 217 kvm_arch_vcpu_free(vcpu);
218 } 218 }
219 219
220 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 220 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
221 { 221 {
222 return kvmppc_core_pending_dec(vcpu); 222 return kvmppc_core_pending_dec(vcpu);
223 } 223 }
224 224
225 static void kvmppc_decrementer_func(unsigned long data) 225 static void kvmppc_decrementer_func(unsigned long data)
226 { 226 {
227 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 227 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
228 228
229 kvmppc_core_queue_dec(vcpu); 229 kvmppc_core_queue_dec(vcpu);
230 230
231 if (waitqueue_active(&vcpu->wq)) { 231 if (waitqueue_active(&vcpu->wq)) {
232 wake_up_interruptible(&vcpu->wq); 232 wake_up_interruptible(&vcpu->wq);
233 vcpu->stat.halt_wakeup++; 233 vcpu->stat.halt_wakeup++;
234 } 234 }
235 } 235 }
236 236
237 /* 237 /*
238 * low level hrtimer wake routine. Because this runs in hardirq context 238 * low level hrtimer wake routine. Because this runs in hardirq context
239 * we schedule a tasklet to do the real work. 239 * we schedule a tasklet to do the real work.
240 */ 240 */
241 enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) 241 enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
242 { 242 {
243 struct kvm_vcpu *vcpu; 243 struct kvm_vcpu *vcpu;
244 244
245 vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); 245 vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
246 tasklet_schedule(&vcpu->arch.tasklet); 246 tasklet_schedule(&vcpu->arch.tasklet);
247 247
248 return HRTIMER_NORESTART; 248 return HRTIMER_NORESTART;
249 } 249 }
250 250
251 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 251 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
252 { 252 {
253 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 253 hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
254 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); 254 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
255 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; 255 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
256 256
257 return 0; 257 return 0;
258 } 258 }
259 259
260 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 260 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
261 { 261 {
262 kvmppc_mmu_destroy(vcpu); 262 kvmppc_mmu_destroy(vcpu);
263 } 263 }
264 264
265 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 265 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
266 { 266 {
267 kvmppc_core_vcpu_load(vcpu, cpu); 267 kvmppc_core_vcpu_load(vcpu, cpu);
268 } 268 }
269 269
270 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 270 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
271 { 271 {
272 kvmppc_core_vcpu_put(vcpu); 272 kvmppc_core_vcpu_put(vcpu);
273 } 273 }
274 274
275 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 275 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
276 struct kvm_guest_debug *dbg) 276 struct kvm_guest_debug *dbg)
277 { 277 {
278 return -EINVAL; 278 return -EINVAL;
279 } 279 }
280 280
281 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 281 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
282 struct kvm_run *run) 282 struct kvm_run *run)
283 { 283 {
284 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data); 284 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
285 } 285 }
286 286
287 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 287 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
288 struct kvm_run *run) 288 struct kvm_run *run)
289 { 289 {
290 u64 gpr; 290 u64 gpr;
291 291
292 if (run->mmio.len > sizeof(gpr)) { 292 if (run->mmio.len > sizeof(gpr)) {
293 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); 293 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
294 return; 294 return;
295 } 295 }
296 296
297 if (vcpu->arch.mmio_is_bigendian) { 297 if (vcpu->arch.mmio_is_bigendian) {
298 switch (run->mmio.len) { 298 switch (run->mmio.len) {
299 case 8: gpr = *(u64 *)run->mmio.data; break; 299 case 8: gpr = *(u64 *)run->mmio.data; break;
300 case 4: gpr = *(u32 *)run->mmio.data; break; 300 case 4: gpr = *(u32 *)run->mmio.data; break;
301 case 2: gpr = *(u16 *)run->mmio.data; break; 301 case 2: gpr = *(u16 *)run->mmio.data; break;
302 case 1: gpr = *(u8 *)run->mmio.data; break; 302 case 1: gpr = *(u8 *)run->mmio.data; break;
303 } 303 }
304 } else { 304 } else {
305 /* Convert BE data from userland back to LE. */ 305 /* Convert BE data from userland back to LE. */
306 switch (run->mmio.len) { 306 switch (run->mmio.len) {
307 case 4: gpr = ld_le32((u32 *)run->mmio.data); break; 307 case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
308 case 2: gpr = ld_le16((u16 *)run->mmio.data); break; 308 case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
309 case 1: gpr = *(u8 *)run->mmio.data; break; 309 case 1: gpr = *(u8 *)run->mmio.data; break;
310 } 310 }
311 } 311 }
312 312
313 if (vcpu->arch.mmio_sign_extend) { 313 if (vcpu->arch.mmio_sign_extend) {
314 switch (run->mmio.len) { 314 switch (run->mmio.len) {
315 #ifdef CONFIG_PPC64 315 #ifdef CONFIG_PPC64
316 case 4: 316 case 4:
317 gpr = (s64)(s32)gpr; 317 gpr = (s64)(s32)gpr;
318 break; 318 break;
319 #endif 319 #endif
320 case 2: 320 case 2:
321 gpr = (s64)(s16)gpr; 321 gpr = (s64)(s16)gpr;
322 break; 322 break;
323 case 1: 323 case 1:
324 gpr = (s64)(s8)gpr; 324 gpr = (s64)(s8)gpr;
325 break; 325 break;
326 } 326 }
327 } 327 }
328 328
329 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 329 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
330 330
331 switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) { 331 switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) {
332 case KVM_REG_GPR: 332 case KVM_REG_GPR:
333 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 333 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
334 break; 334 break;
335 case KVM_REG_FPR: 335 case KVM_REG_FPR:
336 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 336 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
337 break; 337 break;
338 #ifdef CONFIG_PPC_BOOK3S 338 #ifdef CONFIG_PPC_BOOK3S
339 case KVM_REG_QPR: 339 case KVM_REG_QPR:
340 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 340 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
341 break; 341 break;
342 case KVM_REG_FQPR: 342 case KVM_REG_FQPR:
343 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 343 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
344 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 344 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
345 break; 345 break;
346 #endif 346 #endif
347 default: 347 default:
348 BUG(); 348 BUG();
349 } 349 }
350 } 350 }
351 351
352 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 352 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
353 unsigned int rt, unsigned int bytes, int is_bigendian) 353 unsigned int rt, unsigned int bytes, int is_bigendian)
354 { 354 {
355 if (bytes > sizeof(run->mmio.data)) { 355 if (bytes > sizeof(run->mmio.data)) {
356 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, 356 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
357 run->mmio.len); 357 run->mmio.len);
358 } 358 }
359 359
360 run->mmio.phys_addr = vcpu->arch.paddr_accessed; 360 run->mmio.phys_addr = vcpu->arch.paddr_accessed;
361 run->mmio.len = bytes; 361 run->mmio.len = bytes;
362 run->mmio.is_write = 0; 362 run->mmio.is_write = 0;
363 363
364 vcpu->arch.io_gpr = rt; 364 vcpu->arch.io_gpr = rt;
365 vcpu->arch.mmio_is_bigendian = is_bigendian; 365 vcpu->arch.mmio_is_bigendian = is_bigendian;
366 vcpu->mmio_needed = 1; 366 vcpu->mmio_needed = 1;
367 vcpu->mmio_is_write = 0; 367 vcpu->mmio_is_write = 0;
368 vcpu->arch.mmio_sign_extend = 0; 368 vcpu->arch.mmio_sign_extend = 0;
369 369
370 return EMULATE_DO_MMIO; 370 return EMULATE_DO_MMIO;
371 } 371 }
372 372
373 /* Same as above, but sign extends */ 373 /* Same as above, but sign extends */
374 int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, 374 int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
375 unsigned int rt, unsigned int bytes, int is_bigendian) 375 unsigned int rt, unsigned int bytes, int is_bigendian)
376 { 376 {
377 int r; 377 int r;
378 378
379 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); 379 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
380 vcpu->arch.mmio_sign_extend = 1; 380 vcpu->arch.mmio_sign_extend = 1;
381 381
382 return r; 382 return r;
383 } 383 }
384 384
385 int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 385 int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
386 u64 val, unsigned int bytes, int is_bigendian) 386 u64 val, unsigned int bytes, int is_bigendian)
387 { 387 {
388 void *data = run->mmio.data; 388 void *data = run->mmio.data;
389 389
390 if (bytes > sizeof(run->mmio.data)) { 390 if (bytes > sizeof(run->mmio.data)) {
391 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, 391 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
392 run->mmio.len); 392 run->mmio.len);
393 } 393 }
394 394
395 run->mmio.phys_addr = vcpu->arch.paddr_accessed; 395 run->mmio.phys_addr = vcpu->arch.paddr_accessed;
396 run->mmio.len = bytes; 396 run->mmio.len = bytes;
397 run->mmio.is_write = 1; 397 run->mmio.is_write = 1;
398 vcpu->mmio_needed = 1; 398 vcpu->mmio_needed = 1;
399 vcpu->mmio_is_write = 1; 399 vcpu->mmio_is_write = 1;
400 400
401 /* Store the value at the lowest bytes in 'data'. */ 401 /* Store the value at the lowest bytes in 'data'. */
402 if (is_bigendian) { 402 if (is_bigendian) {
403 switch (bytes) { 403 switch (bytes) {
404 case 8: *(u64 *)data = val; break; 404 case 8: *(u64 *)data = val; break;
405 case 4: *(u32 *)data = val; break; 405 case 4: *(u32 *)data = val; break;
406 case 2: *(u16 *)data = val; break; 406 case 2: *(u16 *)data = val; break;
407 case 1: *(u8 *)data = val; break; 407 case 1: *(u8 *)data = val; break;
408 } 408 }
409 } else { 409 } else {
410 /* Store LE value into 'data'. */ 410 /* Store LE value into 'data'. */
411 switch (bytes) { 411 switch (bytes) {
412 case 4: st_le32(data, val); break; 412 case 4: st_le32(data, val); break;
413 case 2: st_le16(data, val); break; 413 case 2: st_le16(data, val); break;
414 case 1: *(u8 *)data = val; break; 414 case 1: *(u8 *)data = val; break;
415 } 415 }
416 } 416 }
417 417
418 return EMULATE_DO_MMIO; 418 return EMULATE_DO_MMIO;
419 } 419 }
420 420
421 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 421 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
422 { 422 {
423 int r; 423 int r;
424 sigset_t sigsaved; 424 sigset_t sigsaved;
425 425
426 vcpu_load(vcpu);
427
428 if (vcpu->sigset_active) 426 if (vcpu->sigset_active)
429 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 427 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
430 428
431 if (vcpu->mmio_needed) { 429 if (vcpu->mmio_needed) {
432 if (!vcpu->mmio_is_write) 430 if (!vcpu->mmio_is_write)
433 kvmppc_complete_mmio_load(vcpu, run); 431 kvmppc_complete_mmio_load(vcpu, run);
434 vcpu->mmio_needed = 0; 432 vcpu->mmio_needed = 0;
435 } else if (vcpu->arch.dcr_needed) { 433 } else if (vcpu->arch.dcr_needed) {
436 if (!vcpu->arch.dcr_is_write) 434 if (!vcpu->arch.dcr_is_write)
437 kvmppc_complete_dcr_load(vcpu, run); 435 kvmppc_complete_dcr_load(vcpu, run);
438 vcpu->arch.dcr_needed = 0; 436 vcpu->arch.dcr_needed = 0;
439 } else if (vcpu->arch.osi_needed) { 437 } else if (vcpu->arch.osi_needed) {
440 u64 *gprs = run->osi.gprs; 438 u64 *gprs = run->osi.gprs;
441 int i; 439 int i;
442 440
443 for (i = 0; i < 32; i++) 441 for (i = 0; i < 32; i++)
444 kvmppc_set_gpr(vcpu, i, gprs[i]); 442 kvmppc_set_gpr(vcpu, i, gprs[i]);
445 vcpu->arch.osi_needed = 0; 443 vcpu->arch.osi_needed = 0;
446 } 444 }
447 445
448 kvmppc_core_deliver_interrupts(vcpu); 446 kvmppc_core_deliver_interrupts(vcpu);
449 447
450 local_irq_disable(); 448 local_irq_disable();
451 kvm_guest_enter(); 449 kvm_guest_enter();
452 r = __kvmppc_vcpu_run(run, vcpu); 450 r = __kvmppc_vcpu_run(run, vcpu);
453 kvm_guest_exit(); 451 kvm_guest_exit();
454 local_irq_enable(); 452 local_irq_enable();
455 453
456 if (vcpu->sigset_active) 454 if (vcpu->sigset_active)
457 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 455 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
458
459 vcpu_put(vcpu);
460 456
461 return r; 457 return r;
462 } 458 }
463 459
464 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 460 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
465 { 461 {
466 if (irq->irq == KVM_INTERRUPT_UNSET) 462 if (irq->irq == KVM_INTERRUPT_UNSET)
467 kvmppc_core_dequeue_external(vcpu, irq); 463 kvmppc_core_dequeue_external(vcpu, irq);
468 else 464 else
469 kvmppc_core_queue_external(vcpu, irq); 465 kvmppc_core_queue_external(vcpu, irq);
470 466
471 if (waitqueue_active(&vcpu->wq)) { 467 if (waitqueue_active(&vcpu->wq)) {
472 wake_up_interruptible(&vcpu->wq); 468 wake_up_interruptible(&vcpu->wq);
473 vcpu->stat.halt_wakeup++; 469 vcpu->stat.halt_wakeup++;
474 } 470 }
475 471
476 return 0; 472 return 0;
477 } 473 }
478 474
479 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 475 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
480 struct kvm_enable_cap *cap) 476 struct kvm_enable_cap *cap)
481 { 477 {
482 int r; 478 int r;
483 479
484 if (cap->flags) 480 if (cap->flags)
485 return -EINVAL; 481 return -EINVAL;
486 482
487 switch (cap->cap) { 483 switch (cap->cap) {
488 case KVM_CAP_PPC_OSI: 484 case KVM_CAP_PPC_OSI:
489 r = 0; 485 r = 0;
490 vcpu->arch.osi_enabled = true; 486 vcpu->arch.osi_enabled = true;
491 break; 487 break;
492 default: 488 default:
493 r = -EINVAL; 489 r = -EINVAL;
494 break; 490 break;
495 } 491 }
496 492
497 return r; 493 return r;
498 } 494 }
499 495
500 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 496 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
501 struct kvm_mp_state *mp_state) 497 struct kvm_mp_state *mp_state)
502 { 498 {
503 return -EINVAL; 499 return -EINVAL;
504 } 500 }
505 501
506 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 502 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
507 struct kvm_mp_state *mp_state) 503 struct kvm_mp_state *mp_state)
508 { 504 {
509 return -EINVAL; 505 return -EINVAL;
510 } 506 }
511 507
512 long kvm_arch_vcpu_ioctl(struct file *filp, 508 long kvm_arch_vcpu_ioctl(struct file *filp,
513 unsigned int ioctl, unsigned long arg) 509 unsigned int ioctl, unsigned long arg)
514 { 510 {
515 struct kvm_vcpu *vcpu = filp->private_data; 511 struct kvm_vcpu *vcpu = filp->private_data;
516 void __user *argp = (void __user *)arg; 512 void __user *argp = (void __user *)arg;
517 long r; 513 long r;
518 514
519 switch (ioctl) { 515 switch (ioctl) {
520 case KVM_INTERRUPT: { 516 case KVM_INTERRUPT: {
521 struct kvm_interrupt irq; 517 struct kvm_interrupt irq;
522 r = -EFAULT; 518 r = -EFAULT;
523 if (copy_from_user(&irq, argp, sizeof(irq))) 519 if (copy_from_user(&irq, argp, sizeof(irq)))
524 goto out; 520 goto out;
525 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 521 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
526 break; 522 break;
527 } 523 }
528 case KVM_ENABLE_CAP: 524 case KVM_ENABLE_CAP:
529 { 525 {
530 struct kvm_enable_cap cap; 526 struct kvm_enable_cap cap;
531 r = -EFAULT; 527 r = -EFAULT;
532 if (copy_from_user(&cap, argp, sizeof(cap))) 528 if (copy_from_user(&cap, argp, sizeof(cap)))
533 goto out; 529 goto out;
534 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 530 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
535 break; 531 break;
536 } 532 }
537 default: 533 default:
538 r = -EINVAL; 534 r = -EINVAL;
539 } 535 }
540 536
541 out: 537 out:
542 return r; 538 return r;
543 } 539 }
544 540
545 long kvm_arch_vm_ioctl(struct file *filp, 541 long kvm_arch_vm_ioctl(struct file *filp,
546 unsigned int ioctl, unsigned long arg) 542 unsigned int ioctl, unsigned long arg)
547 { 543 {
548 long r; 544 long r;
549 545
550 switch (ioctl) { 546 switch (ioctl) {
551 default: 547 default:
552 r = -ENOTTY; 548 r = -ENOTTY;
553 } 549 }
554 550
555 return r; 551 return r;
556 } 552 }
557 553
558 int kvm_arch_init(void *opaque) 554 int kvm_arch_init(void *opaque)
559 { 555 {
560 return 0; 556 return 0;
561 } 557 }
562 558
563 void kvm_arch_exit(void) 559 void kvm_arch_exit(void)
564 { 560 {
565 } 561 }
566 562
arch/s390/kvm/kvm-s390.c
1 /* 1 /*
2 * s390host.c -- hosting zSeries kernel virtual machines 2 * s390host.c -- hosting zSeries kernel virtual machines
3 * 3 *
4 * Copyright IBM Corp. 2008,2009 4 * Copyright IBM Corp. 2008,2009
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only) 7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation. 8 * as published by the Free Software Foundation.
9 * 9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com> 11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com> 12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com> 13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 */ 14 */
15 15
16 #include <linux/compiler.h> 16 #include <linux/compiler.h>
17 #include <linux/err.h> 17 #include <linux/err.h>
18 #include <linux/fs.h> 18 #include <linux/fs.h>
19 #include <linux/hrtimer.h> 19 #include <linux/hrtimer.h>
20 #include <linux/init.h> 20 #include <linux/init.h>
21 #include <linux/kvm.h> 21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h> 22 #include <linux/kvm_host.h>
23 #include <linux/module.h> 23 #include <linux/module.h>
24 #include <linux/slab.h> 24 #include <linux/slab.h>
25 #include <linux/timer.h> 25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h> 26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h> 27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h> 28 #include <asm/pgtable.h>
29 #include <asm/nmi.h> 29 #include <asm/nmi.h>
30 #include <asm/system.h> 30 #include <asm/system.h>
31 #include "kvm-s390.h" 31 #include "kvm-s390.h"
32 #include "gaccess.h" 32 #include "gaccess.h"
33 33
34 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 34 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35 35
36 struct kvm_stats_debugfs_item debugfs_entries[] = { 36 struct kvm_stats_debugfs_item debugfs_entries[] = {
37 { "userspace_handled", VCPU_STAT(exit_userspace) }, 37 { "userspace_handled", VCPU_STAT(exit_userspace) },
38 { "exit_null", VCPU_STAT(exit_null) }, 38 { "exit_null", VCPU_STAT(exit_null) },
39 { "exit_validity", VCPU_STAT(exit_validity) }, 39 { "exit_validity", VCPU_STAT(exit_validity) },
40 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 40 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
41 { "exit_external_request", VCPU_STAT(exit_external_request) }, 41 { "exit_external_request", VCPU_STAT(exit_external_request) },
42 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 42 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
43 { "exit_instruction", VCPU_STAT(exit_instruction) }, 43 { "exit_instruction", VCPU_STAT(exit_instruction) },
44 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 44 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
45 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 45 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
46 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 46 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
47 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 47 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
48 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 48 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
49 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 49 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
50 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, 50 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
51 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 51 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
52 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 52 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
53 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 53 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
54 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 54 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
55 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 55 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
56 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 56 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
57 { "instruction_spx", VCPU_STAT(instruction_spx) }, 57 { "instruction_spx", VCPU_STAT(instruction_spx) },
58 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 58 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
59 { "instruction_stap", VCPU_STAT(instruction_stap) }, 59 { "instruction_stap", VCPU_STAT(instruction_stap) },
60 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, 60 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
61 { "instruction_stsch", VCPU_STAT(instruction_stsch) }, 61 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
62 { "instruction_chsc", VCPU_STAT(instruction_chsc) }, 62 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
63 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 63 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
64 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 64 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
65 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 65 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
66 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 66 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
67 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 67 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
68 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 68 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
69 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 69 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
70 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 70 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
71 { "diagnose_44", VCPU_STAT(diagnose_44) }, 71 { "diagnose_44", VCPU_STAT(diagnose_44) },
72 { NULL } 72 { NULL }
73 }; 73 };
74 74
75 static unsigned long long *facilities; 75 static unsigned long long *facilities;
76 76
77 /* Section: not file related */ 77 /* Section: not file related */
78 int kvm_arch_hardware_enable(void *garbage) 78 int kvm_arch_hardware_enable(void *garbage)
79 { 79 {
80 /* every s390 is virtualization enabled ;-) */ 80 /* every s390 is virtualization enabled ;-) */
81 return 0; 81 return 0;
82 } 82 }
83 83
84 void kvm_arch_hardware_disable(void *garbage) 84 void kvm_arch_hardware_disable(void *garbage)
85 { 85 {
86 } 86 }
87 87
88 int kvm_arch_hardware_setup(void) 88 int kvm_arch_hardware_setup(void)
89 { 89 {
90 return 0; 90 return 0;
91 } 91 }
92 92
93 void kvm_arch_hardware_unsetup(void) 93 void kvm_arch_hardware_unsetup(void)
94 { 94 {
95 } 95 }
96 96
97 void kvm_arch_check_processor_compat(void *rtn) 97 void kvm_arch_check_processor_compat(void *rtn)
98 { 98 {
99 } 99 }
100 100
101 int kvm_arch_init(void *opaque) 101 int kvm_arch_init(void *opaque)
102 { 102 {
103 return 0; 103 return 0;
104 } 104 }
105 105
106 void kvm_arch_exit(void) 106 void kvm_arch_exit(void)
107 { 107 {
108 } 108 }
109 109
110 /* Section: device related */ 110 /* Section: device related */
111 long kvm_arch_dev_ioctl(struct file *filp, 111 long kvm_arch_dev_ioctl(struct file *filp,
112 unsigned int ioctl, unsigned long arg) 112 unsigned int ioctl, unsigned long arg)
113 { 113 {
114 if (ioctl == KVM_S390_ENABLE_SIE) 114 if (ioctl == KVM_S390_ENABLE_SIE)
115 return s390_enable_sie(); 115 return s390_enable_sie();
116 return -EINVAL; 116 return -EINVAL;
117 } 117 }
118 118
119 int kvm_dev_ioctl_check_extension(long ext) 119 int kvm_dev_ioctl_check_extension(long ext)
120 { 120 {
121 int r; 121 int r;
122 122
123 switch (ext) { 123 switch (ext) {
124 case KVM_CAP_S390_PSW: 124 case KVM_CAP_S390_PSW:
125 r = 1; 125 r = 1;
126 break; 126 break;
127 default: 127 default:
128 r = 0; 128 r = 0;
129 } 129 }
130 return r; 130 return r;
131 } 131 }
132 132
133 /* Section: vm related */ 133 /* Section: vm related */
134 /* 134 /*
135 * Get (and clear) the dirty memory log for a memory slot. 135 * Get (and clear) the dirty memory log for a memory slot.
136 */ 136 */
137 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 137 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
138 struct kvm_dirty_log *log) 138 struct kvm_dirty_log *log)
139 { 139 {
140 return 0; 140 return 0;
141 } 141 }
142 142
143 long kvm_arch_vm_ioctl(struct file *filp, 143 long kvm_arch_vm_ioctl(struct file *filp,
144 unsigned int ioctl, unsigned long arg) 144 unsigned int ioctl, unsigned long arg)
145 { 145 {
146 struct kvm *kvm = filp->private_data; 146 struct kvm *kvm = filp->private_data;
147 void __user *argp = (void __user *)arg; 147 void __user *argp = (void __user *)arg;
148 int r; 148 int r;
149 149
150 switch (ioctl) { 150 switch (ioctl) {
151 case KVM_S390_INTERRUPT: { 151 case KVM_S390_INTERRUPT: {
152 struct kvm_s390_interrupt s390int; 152 struct kvm_s390_interrupt s390int;
153 153
154 r = -EFAULT; 154 r = -EFAULT;
155 if (copy_from_user(&s390int, argp, sizeof(s390int))) 155 if (copy_from_user(&s390int, argp, sizeof(s390int)))
156 break; 156 break;
157 r = kvm_s390_inject_vm(kvm, &s390int); 157 r = kvm_s390_inject_vm(kvm, &s390int);
158 break; 158 break;
159 } 159 }
160 default: 160 default:
161 r = -ENOTTY; 161 r = -ENOTTY;
162 } 162 }
163 163
164 return r; 164 return r;
165 } 165 }
166 166
167 struct kvm *kvm_arch_create_vm(void) 167 struct kvm *kvm_arch_create_vm(void)
168 { 168 {
169 struct kvm *kvm; 169 struct kvm *kvm;
170 int rc; 170 int rc;
171 char debug_name[16]; 171 char debug_name[16];
172 172
173 rc = s390_enable_sie(); 173 rc = s390_enable_sie();
174 if (rc) 174 if (rc)
175 goto out_nokvm; 175 goto out_nokvm;
176 176
177 rc = -ENOMEM; 177 rc = -ENOMEM;
178 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); 178 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
179 if (!kvm) 179 if (!kvm)
180 goto out_nokvm; 180 goto out_nokvm;
181 181
182 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); 182 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
183 if (!kvm->arch.sca) 183 if (!kvm->arch.sca)
184 goto out_nosca; 184 goto out_nosca;
185 185
186 sprintf(debug_name, "kvm-%u", current->pid); 186 sprintf(debug_name, "kvm-%u", current->pid);
187 187
188 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); 188 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
189 if (!kvm->arch.dbf) 189 if (!kvm->arch.dbf)
190 goto out_nodbf; 190 goto out_nodbf;
191 191
192 spin_lock_init(&kvm->arch.float_int.lock); 192 spin_lock_init(&kvm->arch.float_int.lock);
193 INIT_LIST_HEAD(&kvm->arch.float_int.list); 193 INIT_LIST_HEAD(&kvm->arch.float_int.list);
194 194
195 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 195 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
196 VM_EVENT(kvm, 3, "%s", "vm created"); 196 VM_EVENT(kvm, 3, "%s", "vm created");
197 197
198 return kvm; 198 return kvm;
199 out_nodbf: 199 out_nodbf:
200 free_page((unsigned long)(kvm->arch.sca)); 200 free_page((unsigned long)(kvm->arch.sca));
201 out_nosca: 201 out_nosca:
202 kfree(kvm); 202 kfree(kvm);
203 out_nokvm: 203 out_nokvm:
204 return ERR_PTR(rc); 204 return ERR_PTR(rc);
205 } 205 }
206 206
207 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 207 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
208 { 208 {
209 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 209 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
210 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == 210 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
211 (__u64) vcpu->arch.sie_block) 211 (__u64) vcpu->arch.sie_block)
212 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; 212 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
213 smp_mb(); 213 smp_mb();
214 free_page((unsigned long)(vcpu->arch.sie_block)); 214 free_page((unsigned long)(vcpu->arch.sie_block));
215 kvm_vcpu_uninit(vcpu); 215 kvm_vcpu_uninit(vcpu);
216 kfree(vcpu); 216 kfree(vcpu);
217 } 217 }
218 218
219 static void kvm_free_vcpus(struct kvm *kvm) 219 static void kvm_free_vcpus(struct kvm *kvm)
220 { 220 {
221 unsigned int i; 221 unsigned int i;
222 struct kvm_vcpu *vcpu; 222 struct kvm_vcpu *vcpu;
223 223
224 kvm_for_each_vcpu(i, vcpu, kvm) 224 kvm_for_each_vcpu(i, vcpu, kvm)
225 kvm_arch_vcpu_destroy(vcpu); 225 kvm_arch_vcpu_destroy(vcpu);
226 226
227 mutex_lock(&kvm->lock); 227 mutex_lock(&kvm->lock);
228 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 228 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
229 kvm->vcpus[i] = NULL; 229 kvm->vcpus[i] = NULL;
230 230
231 atomic_set(&kvm->online_vcpus, 0); 231 atomic_set(&kvm->online_vcpus, 0);
232 mutex_unlock(&kvm->lock); 232 mutex_unlock(&kvm->lock);
233 } 233 }
234 234
235 void kvm_arch_sync_events(struct kvm *kvm) 235 void kvm_arch_sync_events(struct kvm *kvm)
236 { 236 {
237 } 237 }
238 238
239 void kvm_arch_destroy_vm(struct kvm *kvm) 239 void kvm_arch_destroy_vm(struct kvm *kvm)
240 { 240 {
241 kvm_free_vcpus(kvm); 241 kvm_free_vcpus(kvm);
242 kvm_free_physmem(kvm); 242 kvm_free_physmem(kvm);
243 free_page((unsigned long)(kvm->arch.sca)); 243 free_page((unsigned long)(kvm->arch.sca));
244 debug_unregister(kvm->arch.dbf); 244 debug_unregister(kvm->arch.dbf);
245 cleanup_srcu_struct(&kvm->srcu); 245 cleanup_srcu_struct(&kvm->srcu);
246 kfree(kvm); 246 kfree(kvm);
247 } 247 }
248 248
249 /* Section: vcpu related */ 249 /* Section: vcpu related */
250 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 250 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
251 { 251 {
252 return 0; 252 return 0;
253 } 253 }
254 254
255 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 255 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
256 { 256 {
257 /* Nothing todo */ 257 /* Nothing todo */
258 } 258 }
259 259
260 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 260 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
261 { 261 {
262 save_fp_regs(&vcpu->arch.host_fpregs); 262 save_fp_regs(&vcpu->arch.host_fpregs);
263 save_access_regs(vcpu->arch.host_acrs); 263 save_access_regs(vcpu->arch.host_acrs);
264 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; 264 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
265 restore_fp_regs(&vcpu->arch.guest_fpregs); 265 restore_fp_regs(&vcpu->arch.guest_fpregs);
266 restore_access_regs(vcpu->arch.guest_acrs); 266 restore_access_regs(vcpu->arch.guest_acrs);
267 } 267 }
268 268
269 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 269 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
270 { 270 {
271 save_fp_regs(&vcpu->arch.guest_fpregs); 271 save_fp_regs(&vcpu->arch.guest_fpregs);
272 save_access_regs(vcpu->arch.guest_acrs); 272 save_access_regs(vcpu->arch.guest_acrs);
273 restore_fp_regs(&vcpu->arch.host_fpregs); 273 restore_fp_regs(&vcpu->arch.host_fpregs);
274 restore_access_regs(vcpu->arch.host_acrs); 274 restore_access_regs(vcpu->arch.host_acrs);
275 } 275 }
276 276
277 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 277 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
278 { 278 {
279 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 279 /* this equals initial cpu reset in pop, but we don't switch to ESA */
280 vcpu->arch.sie_block->gpsw.mask = 0UL; 280 vcpu->arch.sie_block->gpsw.mask = 0UL;
281 vcpu->arch.sie_block->gpsw.addr = 0UL; 281 vcpu->arch.sie_block->gpsw.addr = 0UL;
282 vcpu->arch.sie_block->prefix = 0UL; 282 vcpu->arch.sie_block->prefix = 0UL;
283 vcpu->arch.sie_block->ihcpu = 0xffff; 283 vcpu->arch.sie_block->ihcpu = 0xffff;
284 vcpu->arch.sie_block->cputm = 0UL; 284 vcpu->arch.sie_block->cputm = 0UL;
285 vcpu->arch.sie_block->ckc = 0UL; 285 vcpu->arch.sie_block->ckc = 0UL;
286 vcpu->arch.sie_block->todpr = 0; 286 vcpu->arch.sie_block->todpr = 0;
287 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 287 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
288 vcpu->arch.sie_block->gcr[0] = 0xE0UL; 288 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
289 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; 289 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
290 vcpu->arch.guest_fpregs.fpc = 0; 290 vcpu->arch.guest_fpregs.fpc = 0;
291 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); 291 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
292 vcpu->arch.sie_block->gbea = 1; 292 vcpu->arch.sie_block->gbea = 1;
293 } 293 }
294 294
295 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 295 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
296 { 296 {
297 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); 297 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
298 set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests); 298 set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests);
299 vcpu->arch.sie_block->ecb = 2; 299 vcpu->arch.sie_block->ecb = 2;
300 vcpu->arch.sie_block->eca = 0xC1002001U; 300 vcpu->arch.sie_block->eca = 0xC1002001U;
301 vcpu->arch.sie_block->fac = (int) (long) facilities; 301 vcpu->arch.sie_block->fac = (int) (long) facilities;
302 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 302 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
303 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, 303 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
304 (unsigned long) vcpu); 304 (unsigned long) vcpu);
305 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 305 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
306 get_cpu_id(&vcpu->arch.cpu_id); 306 get_cpu_id(&vcpu->arch.cpu_id);
307 vcpu->arch.cpu_id.version = 0xff; 307 vcpu->arch.cpu_id.version = 0xff;
308 return 0; 308 return 0;
309 } 309 }
310 310
311 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 311 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
312 unsigned int id) 312 unsigned int id)
313 { 313 {
314 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); 314 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
315 int rc = -ENOMEM; 315 int rc = -ENOMEM;
316 316
317 if (!vcpu) 317 if (!vcpu)
318 goto out_nomem; 318 goto out_nomem;
319 319
320 vcpu->arch.sie_block = (struct kvm_s390_sie_block *) 320 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
321 get_zeroed_page(GFP_KERNEL); 321 get_zeroed_page(GFP_KERNEL);
322 322
323 if (!vcpu->arch.sie_block) 323 if (!vcpu->arch.sie_block)
324 goto out_free_cpu; 324 goto out_free_cpu;
325 325
326 vcpu->arch.sie_block->icpua = id; 326 vcpu->arch.sie_block->icpua = id;
327 BUG_ON(!kvm->arch.sca); 327 BUG_ON(!kvm->arch.sca);
328 if (!kvm->arch.sca->cpu[id].sda) 328 if (!kvm->arch.sca->cpu[id].sda)
329 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; 329 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
330 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); 330 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
331 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; 331 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
332 332
333 spin_lock_init(&vcpu->arch.local_int.lock); 333 spin_lock_init(&vcpu->arch.local_int.lock);
334 INIT_LIST_HEAD(&vcpu->arch.local_int.list); 334 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
335 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 335 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
336 spin_lock(&kvm->arch.float_int.lock); 336 spin_lock(&kvm->arch.float_int.lock);
337 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; 337 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
338 init_waitqueue_head(&vcpu->arch.local_int.wq); 338 init_waitqueue_head(&vcpu->arch.local_int.wq);
339 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 339 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
340 spin_unlock(&kvm->arch.float_int.lock); 340 spin_unlock(&kvm->arch.float_int.lock);
341 341
342 rc = kvm_vcpu_init(vcpu, kvm, id); 342 rc = kvm_vcpu_init(vcpu, kvm, id);
343 if (rc) 343 if (rc)
344 goto out_free_sie_block; 344 goto out_free_sie_block;
345 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 345 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
346 vcpu->arch.sie_block); 346 vcpu->arch.sie_block);
347 347
348 return vcpu; 348 return vcpu;
349 out_free_sie_block: 349 out_free_sie_block:
350 free_page((unsigned long)(vcpu->arch.sie_block)); 350 free_page((unsigned long)(vcpu->arch.sie_block));
351 out_free_cpu: 351 out_free_cpu:
352 kfree(vcpu); 352 kfree(vcpu);
353 out_nomem: 353 out_nomem:
354 return ERR_PTR(rc); 354 return ERR_PTR(rc);
355 } 355 }
356 356
357 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 357 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
358 { 358 {
359 /* kvm common code refers to this, but never calls it */ 359 /* kvm common code refers to this, but never calls it */
360 BUG(); 360 BUG();
361 return 0; 361 return 0;
362 } 362 }
363 363
364 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 364 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
365 { 365 {
366 vcpu_load(vcpu); 366 vcpu_load(vcpu);
367 kvm_s390_vcpu_initial_reset(vcpu); 367 kvm_s390_vcpu_initial_reset(vcpu);
368 vcpu_put(vcpu); 368 vcpu_put(vcpu);
369 return 0; 369 return 0;
370 } 370 }
371 371
372 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 372 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
373 { 373 {
374 vcpu_load(vcpu);
375 memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs)); 374 memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs));
376 vcpu_put(vcpu);
377 return 0; 375 return 0;
378 } 376 }
379 377
380 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 378 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
381 { 379 {
382 vcpu_load(vcpu);
383 memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); 380 memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
384 vcpu_put(vcpu);
385 return 0; 381 return 0;
386 } 382 }
387 383
388 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 384 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
389 struct kvm_sregs *sregs) 385 struct kvm_sregs *sregs)
390 { 386 {
391 vcpu_load(vcpu);
392 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); 387 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
393 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 388 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
394 vcpu_put(vcpu);
395 return 0; 389 return 0;
396 } 390 }
397 391
398 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 392 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
399 struct kvm_sregs *sregs) 393 struct kvm_sregs *sregs)
400 { 394 {
401 vcpu_load(vcpu);
402 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); 395 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
403 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 396 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
404 vcpu_put(vcpu);
405 return 0; 397 return 0;
406 } 398 }
407 399
408 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 400 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
409 { 401 {
410 vcpu_load(vcpu);
411 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 402 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
412 vcpu->arch.guest_fpregs.fpc = fpu->fpc; 403 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
413 vcpu_put(vcpu);
414 return 0; 404 return 0;
415 } 405 }
416 406
417 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 407 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
418 { 408 {
419 vcpu_load(vcpu);
420 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); 409 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
421 fpu->fpc = vcpu->arch.guest_fpregs.fpc; 410 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
422 vcpu_put(vcpu);
423 return 0; 411 return 0;
424 } 412 }
425 413
426 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 414 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
427 { 415 {
428 int rc = 0; 416 int rc = 0;
429 417
430 vcpu_load(vcpu); 418 vcpu_load(vcpu);
431 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) 419 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
432 rc = -EBUSY; 420 rc = -EBUSY;
433 else { 421 else {
434 vcpu->run->psw_mask = psw.mask; 422 vcpu->run->psw_mask = psw.mask;
435 vcpu->run->psw_addr = psw.addr; 423 vcpu->run->psw_addr = psw.addr;
436 } 424 }
437 vcpu_put(vcpu); 425 vcpu_put(vcpu);
438 return rc; 426 return rc;
439 } 427 }
440 428
441 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 429 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
442 struct kvm_translation *tr) 430 struct kvm_translation *tr)
443 { 431 {
444 return -EINVAL; /* not implemented yet */ 432 return -EINVAL; /* not implemented yet */
445 } 433 }
446 434
447 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 435 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
448 struct kvm_guest_debug *dbg) 436 struct kvm_guest_debug *dbg)
449 { 437 {
450 return -EINVAL; /* not implemented yet */ 438 return -EINVAL; /* not implemented yet */
451 } 439 }
452 440
453 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 441 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
454 struct kvm_mp_state *mp_state) 442 struct kvm_mp_state *mp_state)
455 { 443 {
456 return -EINVAL; /* not implemented yet */ 444 return -EINVAL; /* not implemented yet */
457 } 445 }
458 446
459 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 447 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
460 struct kvm_mp_state *mp_state) 448 struct kvm_mp_state *mp_state)
461 { 449 {
462 return -EINVAL; /* not implemented yet */ 450 return -EINVAL; /* not implemented yet */
463 } 451 }
464 452
465 static void __vcpu_run(struct kvm_vcpu *vcpu) 453 static void __vcpu_run(struct kvm_vcpu *vcpu)
466 { 454 {
467 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); 455 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
468 456
469 if (need_resched()) 457 if (need_resched())
470 schedule(); 458 schedule();
471 459
472 if (test_thread_flag(TIF_MCCK_PENDING)) 460 if (test_thread_flag(TIF_MCCK_PENDING))
473 s390_handle_mcck(); 461 s390_handle_mcck();
474 462
475 kvm_s390_deliver_pending_interrupts(vcpu); 463 kvm_s390_deliver_pending_interrupts(vcpu);
476 464
477 vcpu->arch.sie_block->icptcode = 0; 465 vcpu->arch.sie_block->icptcode = 0;
478 local_irq_disable(); 466 local_irq_disable();
479 kvm_guest_enter(); 467 kvm_guest_enter();
480 local_irq_enable(); 468 local_irq_enable();
481 VCPU_EVENT(vcpu, 6, "entering sie flags %x", 469 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
482 atomic_read(&vcpu->arch.sie_block->cpuflags)); 470 atomic_read(&vcpu->arch.sie_block->cpuflags));
483 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { 471 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
484 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 472 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
485 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 473 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
486 } 474 }
487 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 475 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
488 vcpu->arch.sie_block->icptcode); 476 vcpu->arch.sie_block->icptcode);
489 local_irq_disable(); 477 local_irq_disable();
490 kvm_guest_exit(); 478 kvm_guest_exit();
491 local_irq_enable(); 479 local_irq_enable();
492 480
493 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16); 481 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
494 } 482 }
495 483
496 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 484 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
497 { 485 {
498 int rc; 486 int rc;
499 sigset_t sigsaved; 487 sigset_t sigsaved;
500 488
501 vcpu_load(vcpu);
502
503 rerun_vcpu: 489 rerun_vcpu:
504 if (vcpu->requests) 490 if (vcpu->requests)
505 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 491 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
506 kvm_s390_vcpu_set_mem(vcpu); 492 kvm_s390_vcpu_set_mem(vcpu);
507 493
508 /* verify, that memory has been registered */ 494 /* verify, that memory has been registered */
509 if (!vcpu->arch.sie_block->gmslm) { 495 if (!vcpu->arch.sie_block->gmslm) {
510 vcpu_put(vcpu); 496 vcpu_put(vcpu);
511 VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu"); 497 VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu");
512 return -EINVAL; 498 return -EINVAL;
513 } 499 }
514 500
515 if (vcpu->sigset_active) 501 if (vcpu->sigset_active)
516 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 502 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
517 503
518 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 504 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
519 505
520 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); 506 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
521 507
522 switch (kvm_run->exit_reason) { 508 switch (kvm_run->exit_reason) {
523 case KVM_EXIT_S390_SIEIC: 509 case KVM_EXIT_S390_SIEIC:
524 case KVM_EXIT_UNKNOWN: 510 case KVM_EXIT_UNKNOWN:
525 case KVM_EXIT_INTR: 511 case KVM_EXIT_INTR:
526 case KVM_EXIT_S390_RESET: 512 case KVM_EXIT_S390_RESET:
527 break; 513 break;
528 default: 514 default:
529 BUG(); 515 BUG();
530 } 516 }
531 517
532 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 518 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
533 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 519 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
534 520
535 might_fault(); 521 might_fault();
536 522
537 do { 523 do {
538 __vcpu_run(vcpu); 524 __vcpu_run(vcpu);
539 rc = kvm_handle_sie_intercept(vcpu); 525 rc = kvm_handle_sie_intercept(vcpu);
540 } while (!signal_pending(current) && !rc); 526 } while (!signal_pending(current) && !rc);
541 527
542 if (rc == SIE_INTERCEPT_RERUNVCPU) 528 if (rc == SIE_INTERCEPT_RERUNVCPU)
543 goto rerun_vcpu; 529 goto rerun_vcpu;
544 530
545 if (signal_pending(current) && !rc) { 531 if (signal_pending(current) && !rc) {
546 kvm_run->exit_reason = KVM_EXIT_INTR; 532 kvm_run->exit_reason = KVM_EXIT_INTR;
547 rc = -EINTR; 533 rc = -EINTR;
548 } 534 }
549 535
550 if (rc == -EOPNOTSUPP) { 536 if (rc == -EOPNOTSUPP) {
551 /* intercept cannot be handled in-kernel, prepare kvm-run */ 537 /* intercept cannot be handled in-kernel, prepare kvm-run */
552 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; 538 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
553 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 539 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
554 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 540 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
555 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 541 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
556 rc = 0; 542 rc = 0;
557 } 543 }
558 544
559 if (rc == -EREMOTE) { 545 if (rc == -EREMOTE) {
560 /* intercept was handled, but userspace support is needed 546 /* intercept was handled, but userspace support is needed
561 * kvm_run has been prepared by the handler */ 547 * kvm_run has been prepared by the handler */
562 rc = 0; 548 rc = 0;
563 } 549 }
564 550
565 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 551 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
566 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 552 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
567 553
568 if (vcpu->sigset_active) 554 if (vcpu->sigset_active)
569 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 555 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
570
571 vcpu_put(vcpu);
572 556
573 vcpu->stat.exit_userspace++; 557 vcpu->stat.exit_userspace++;
574 return rc; 558 return rc;
575 } 559 }
576 560
577 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from, 561 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
578 unsigned long n, int prefix) 562 unsigned long n, int prefix)
579 { 563 {
580 if (prefix) 564 if (prefix)
581 return copy_to_guest(vcpu, guestdest, from, n); 565 return copy_to_guest(vcpu, guestdest, from, n);
582 else 566 else
583 return copy_to_guest_absolute(vcpu, guestdest, from, n); 567 return copy_to_guest_absolute(vcpu, guestdest, from, n);
584 } 568 }
585 569
586 /* 570 /*
587 * store status at address 571 * store status at address
588 * we use have two special cases: 572 * we use have two special cases:
589 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 573 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
590 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 574 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
591 */ 575 */
592 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 576 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
593 { 577 {
594 const unsigned char archmode = 1; 578 const unsigned char archmode = 1;
595 int prefix; 579 int prefix;
596 580
597 if (addr == KVM_S390_STORE_STATUS_NOADDR) { 581 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
598 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) 582 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
599 return -EFAULT; 583 return -EFAULT;
600 addr = SAVE_AREA_BASE; 584 addr = SAVE_AREA_BASE;
601 prefix = 0; 585 prefix = 0;
602 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) { 586 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
603 if (copy_to_guest(vcpu, 163ul, &archmode, 1)) 587 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
604 return -EFAULT; 588 return -EFAULT;
605 addr = SAVE_AREA_BASE; 589 addr = SAVE_AREA_BASE;
606 prefix = 1; 590 prefix = 1;
607 } else 591 } else
608 prefix = 0; 592 prefix = 0;
609 593
610 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), 594 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
611 vcpu->arch.guest_fpregs.fprs, 128, prefix)) 595 vcpu->arch.guest_fpregs.fprs, 128, prefix))
612 return -EFAULT; 596 return -EFAULT;
613 597
614 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs), 598 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
615 vcpu->arch.guest_gprs, 128, prefix)) 599 vcpu->arch.guest_gprs, 128, prefix))
616 return -EFAULT; 600 return -EFAULT;
617 601
618 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw), 602 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
619 &vcpu->arch.sie_block->gpsw, 16, prefix)) 603 &vcpu->arch.sie_block->gpsw, 16, prefix))
620 return -EFAULT; 604 return -EFAULT;
621 605
622 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg), 606 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
623 &vcpu->arch.sie_block->prefix, 4, prefix)) 607 &vcpu->arch.sie_block->prefix, 4, prefix))
624 return -EFAULT; 608 return -EFAULT;
625 609
626 if (__guestcopy(vcpu, 610 if (__guestcopy(vcpu,
627 addr + offsetof(struct save_area, fp_ctrl_reg), 611 addr + offsetof(struct save_area, fp_ctrl_reg),
628 &vcpu->arch.guest_fpregs.fpc, 4, prefix)) 612 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
629 return -EFAULT; 613 return -EFAULT;
630 614
631 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg), 615 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
632 &vcpu->arch.sie_block->todpr, 4, prefix)) 616 &vcpu->arch.sie_block->todpr, 4, prefix))
633 return -EFAULT; 617 return -EFAULT;
634 618
635 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer), 619 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
636 &vcpu->arch.sie_block->cputm, 8, prefix)) 620 &vcpu->arch.sie_block->cputm, 8, prefix))
637 return -EFAULT; 621 return -EFAULT;
638 622
639 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), 623 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
640 &vcpu->arch.sie_block->ckc, 8, prefix)) 624 &vcpu->arch.sie_block->ckc, 8, prefix))
641 return -EFAULT; 625 return -EFAULT;
642 626
643 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), 627 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
644 &vcpu->arch.guest_acrs, 64, prefix)) 628 &vcpu->arch.guest_acrs, 64, prefix))
645 return -EFAULT; 629 return -EFAULT;
646 630
647 if (__guestcopy(vcpu, 631 if (__guestcopy(vcpu,
648 addr + offsetof(struct save_area, ctrl_regs), 632 addr + offsetof(struct save_area, ctrl_regs),
649 &vcpu->arch.sie_block->gcr, 128, prefix)) 633 &vcpu->arch.sie_block->gcr, 128, prefix))
650 return -EFAULT; 634 return -EFAULT;
651 return 0; 635 return 0;
652 } 636 }
653 637
654 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 638 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
655 { 639 {
656 int rc; 640 int rc;
657 641
658 vcpu_load(vcpu); 642 vcpu_load(vcpu);
659 rc = __kvm_s390_vcpu_store_status(vcpu, addr); 643 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
660 vcpu_put(vcpu); 644 vcpu_put(vcpu);
661 return rc; 645 return rc;
662 } 646 }
663 647
664 long kvm_arch_vcpu_ioctl(struct file *filp, 648 long kvm_arch_vcpu_ioctl(struct file *filp,
665 unsigned int ioctl, unsigned long arg) 649 unsigned int ioctl, unsigned long arg)
666 { 650 {
667 struct kvm_vcpu *vcpu = filp->private_data; 651 struct kvm_vcpu *vcpu = filp->private_data;
668 void __user *argp = (void __user *)arg; 652 void __user *argp = (void __user *)arg;
669 653
670 switch (ioctl) { 654 switch (ioctl) {
671 case KVM_S390_INTERRUPT: { 655 case KVM_S390_INTERRUPT: {
672 struct kvm_s390_interrupt s390int; 656 struct kvm_s390_interrupt s390int;
673 657
674 if (copy_from_user(&s390int, argp, sizeof(s390int))) 658 if (copy_from_user(&s390int, argp, sizeof(s390int)))
675 return -EFAULT; 659 return -EFAULT;
676 return kvm_s390_inject_vcpu(vcpu, &s390int); 660 return kvm_s390_inject_vcpu(vcpu, &s390int);
677 } 661 }
678 case KVM_S390_STORE_STATUS: 662 case KVM_S390_STORE_STATUS:
679 return kvm_s390_vcpu_store_status(vcpu, arg); 663 return kvm_s390_vcpu_store_status(vcpu, arg);
680 case KVM_S390_SET_INITIAL_PSW: { 664 case KVM_S390_SET_INITIAL_PSW: {
681 psw_t psw; 665 psw_t psw;
682 666
683 if (copy_from_user(&psw, argp, sizeof(psw))) 667 if (copy_from_user(&psw, argp, sizeof(psw)))
684 return -EFAULT; 668 return -EFAULT;
685 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 669 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
686 } 670 }
687 case KVM_S390_INITIAL_RESET: 671 case KVM_S390_INITIAL_RESET:
688 return kvm_arch_vcpu_ioctl_initial_reset(vcpu); 672 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
689 default: 673 default:
690 ; 674 ;
691 } 675 }
692 return -EINVAL; 676 return -EINVAL;
693 } 677 }
694 678
695 /* Section: memory related */ 679 /* Section: memory related */
696 int kvm_arch_prepare_memory_region(struct kvm *kvm, 680 int kvm_arch_prepare_memory_region(struct kvm *kvm,
697 struct kvm_memory_slot *memslot, 681 struct kvm_memory_slot *memslot,
698 struct kvm_memory_slot old, 682 struct kvm_memory_slot old,
699 struct kvm_userspace_memory_region *mem, 683 struct kvm_userspace_memory_region *mem,
700 int user_alloc) 684 int user_alloc)
701 { 685 {
702 /* A few sanity checks. We can have exactly one memory slot which has 686 /* A few sanity checks. We can have exactly one memory slot which has
703 to start at guest virtual zero and which has to be located at a 687 to start at guest virtual zero and which has to be located at a
704 page boundary in userland and which has to end at a page boundary. 688 page boundary in userland and which has to end at a page boundary.
705 The memory in userland is ok to be fragmented into various different 689 The memory in userland is ok to be fragmented into various different
706 vmas. It is okay to mmap() and munmap() stuff in this slot after 690 vmas. It is okay to mmap() and munmap() stuff in this slot after
707 doing this call at any time */ 691 doing this call at any time */
708 692
709 if (mem->slot) 693 if (mem->slot)
710 return -EINVAL; 694 return -EINVAL;
711 695
712 if (mem->guest_phys_addr) 696 if (mem->guest_phys_addr)
713 return -EINVAL; 697 return -EINVAL;
714 698
715 if (mem->userspace_addr & (PAGE_SIZE - 1)) 699 if (mem->userspace_addr & (PAGE_SIZE - 1))
716 return -EINVAL; 700 return -EINVAL;
717 701
718 if (mem->memory_size & (PAGE_SIZE - 1)) 702 if (mem->memory_size & (PAGE_SIZE - 1))
719 return -EINVAL; 703 return -EINVAL;
720 704
721 if (!user_alloc) 705 if (!user_alloc)
722 return -EINVAL; 706 return -EINVAL;
723 707
724 return 0; 708 return 0;
725 } 709 }
726 710
727 void kvm_arch_commit_memory_region(struct kvm *kvm, 711 void kvm_arch_commit_memory_region(struct kvm *kvm,
728 struct kvm_userspace_memory_region *mem, 712 struct kvm_userspace_memory_region *mem,
729 struct kvm_memory_slot old, 713 struct kvm_memory_slot old,
730 int user_alloc) 714 int user_alloc)
731 { 715 {
732 int i; 716 int i;
733 struct kvm_vcpu *vcpu; 717 struct kvm_vcpu *vcpu;
734 718
735 /* request update of sie control block for all available vcpus */ 719 /* request update of sie control block for all available vcpus */
736 kvm_for_each_vcpu(i, vcpu, kvm) { 720 kvm_for_each_vcpu(i, vcpu, kvm) {
737 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 721 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
738 continue; 722 continue;
739 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP); 723 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
740 } 724 }
741 } 725 }
742 726
743 void kvm_arch_flush_shadow(struct kvm *kvm) 727 void kvm_arch_flush_shadow(struct kvm *kvm)
744 { 728 {
745 } 729 }
746 730
747 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 731 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
748 { 732 {
749 return gfn; 733 return gfn;
750 } 734 }
751 735
752 static int __init kvm_s390_init(void) 736 static int __init kvm_s390_init(void)
753 { 737 {
754 int ret; 738 int ret;
755 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 739 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
756 if (ret) 740 if (ret)
757 return ret; 741 return ret;
758 742
759 /* 743 /*
760 * guests can ask for up to 255+1 double words, we need a full page 744 * guests can ask for up to 255+1 double words, we need a full page
761 * to hold the maximum amount of facilites. On the other hand, we 745 * to hold the maximum amount of facilites. On the other hand, we
762 * only set facilities that are known to work in KVM. 746 * only set facilities that are known to work in KVM.
763 */ 747 */
764 facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); 748 facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
765 if (!facilities) { 749 if (!facilities) {
766 kvm_exit(); 750 kvm_exit();
767 return -ENOMEM; 751 return -ENOMEM;
768 } 752 }
769 stfle(facilities, 1); 753 stfle(facilities, 1);
770 facilities[0] &= 0xff00fff3f0700000ULL; 754 facilities[0] &= 0xff00fff3f0700000ULL;
771 return 0; 755 return 0;
772 } 756 }
773 757
774 static void __exit kvm_s390_exit(void) 758 static void __exit kvm_s390_exit(void)
775 { 759 {
776 free_page((unsigned long) facilities); 760 free_page((unsigned long) facilities);
777 kvm_exit(); 761 kvm_exit();
778 } 762 }
779 763
780 module_init(kvm_s390_init); 764 module_init(kvm_s390_init);
781 module_exit(kvm_s390_exit); 765 module_exit(kvm_s390_exit);
782 766
1 /* 1 /*
2 * Kernel-based Virtual Machine driver for Linux 2 * Kernel-based Virtual Machine driver for Linux
3 * 3 *
4 * derived from drivers/kvm/kvm_main.c 4 * derived from drivers/kvm/kvm_main.c
5 * 5 *
6 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc. 7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008 8 * Copyright IBM Corporation, 2008
9 * 9 *
10 * Authors: 10 * Authors:
11 * Avi Kivity <avi@qumranet.com> 11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com> 12 * Yaniv Kamay <yaniv@qumranet.com>
13 * Amit Shah <amit.shah@qumranet.com> 13 * Amit Shah <amit.shah@qumranet.com>
14 * Ben-Ami Yassour <benami@il.ibm.com> 14 * Ben-Ami Yassour <benami@il.ibm.com>
15 * 15 *
16 * This work is licensed under the terms of the GNU GPL, version 2. See 16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory. 17 * the COPYING file in the top-level directory.
18 * 18 *
19 */ 19 */
20 20
21 #include <linux/kvm_host.h> 21 #include <linux/kvm_host.h>
22 #include "irq.h" 22 #include "irq.h"
23 #include "mmu.h" 23 #include "mmu.h"
24 #include "i8254.h" 24 #include "i8254.h"
25 #include "tss.h" 25 #include "tss.h"
26 #include "kvm_cache_regs.h" 26 #include "kvm_cache_regs.h"
27 #include "x86.h" 27 #include "x86.h"
28 28
29 #include <linux/clocksource.h> 29 #include <linux/clocksource.h>
30 #include <linux/interrupt.h> 30 #include <linux/interrupt.h>
31 #include <linux/kvm.h> 31 #include <linux/kvm.h>
32 #include <linux/fs.h> 32 #include <linux/fs.h>
33 #include <linux/vmalloc.h> 33 #include <linux/vmalloc.h>
34 #include <linux/module.h> 34 #include <linux/module.h>
35 #include <linux/mman.h> 35 #include <linux/mman.h>
36 #include <linux/highmem.h> 36 #include <linux/highmem.h>
37 #include <linux/iommu.h> 37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h> 38 #include <linux/intel-iommu.h>
39 #include <linux/cpufreq.h> 39 #include <linux/cpufreq.h>
40 #include <linux/user-return-notifier.h> 40 #include <linux/user-return-notifier.h>
41 #include <linux/srcu.h> 41 #include <linux/srcu.h>
42 #include <linux/slab.h> 42 #include <linux/slab.h>
43 #include <linux/perf_event.h> 43 #include <linux/perf_event.h>
44 #include <trace/events/kvm.h> 44 #include <trace/events/kvm.h>
45 45
46 #define CREATE_TRACE_POINTS 46 #define CREATE_TRACE_POINTS
47 #include "trace.h" 47 #include "trace.h"
48 48
49 #include <asm/debugreg.h> 49 #include <asm/debugreg.h>
50 #include <asm/uaccess.h> 50 #include <asm/uaccess.h>
51 #include <asm/msr.h> 51 #include <asm/msr.h>
52 #include <asm/desc.h> 52 #include <asm/desc.h>
53 #include <asm/mtrr.h> 53 #include <asm/mtrr.h>
54 #include <asm/mce.h> 54 #include <asm/mce.h>
55 55
56 #define MAX_IO_MSRS 256 56 #define MAX_IO_MSRS 256
57 #define CR0_RESERVED_BITS \ 57 #define CR0_RESERVED_BITS \
58 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ 58 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
59 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ 59 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
60 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) 60 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
61 #define CR4_RESERVED_BITS \ 61 #define CR4_RESERVED_BITS \
62 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 62 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
63 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 63 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
64 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ 64 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
65 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 65 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
66 66
67 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 67 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
68 68
69 #define KVM_MAX_MCE_BANKS 32 69 #define KVM_MAX_MCE_BANKS 32
70 #define KVM_MCE_CAP_SUPPORTED MCG_CTL_P 70 #define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
71 71
72 /* EFER defaults: 72 /* EFER defaults:
73 * - enable syscall per default because its emulated by KVM 73 * - enable syscall per default because its emulated by KVM
74 * - enable LME and LMA per default on 64 bit KVM 74 * - enable LME and LMA per default on 64 bit KVM
75 */ 75 */
76 #ifdef CONFIG_X86_64 76 #ifdef CONFIG_X86_64
77 static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL; 77 static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
78 #else 78 #else
79 static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; 79 static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
80 #endif 80 #endif
81 81
82 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 82 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
83 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 83 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
84 84
85 static void update_cr8_intercept(struct kvm_vcpu *vcpu); 85 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
86 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 86 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
87 struct kvm_cpuid_entry2 __user *entries); 87 struct kvm_cpuid_entry2 __user *entries);
88 88
89 struct kvm_x86_ops *kvm_x86_ops; 89 struct kvm_x86_ops *kvm_x86_ops;
90 EXPORT_SYMBOL_GPL(kvm_x86_ops); 90 EXPORT_SYMBOL_GPL(kvm_x86_ops);
91 91
92 int ignore_msrs = 0; 92 int ignore_msrs = 0;
93 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); 93 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
94 94
95 #define KVM_NR_SHARED_MSRS 16 95 #define KVM_NR_SHARED_MSRS 16
96 96
97 struct kvm_shared_msrs_global { 97 struct kvm_shared_msrs_global {
98 int nr; 98 int nr;
99 u32 msrs[KVM_NR_SHARED_MSRS]; 99 u32 msrs[KVM_NR_SHARED_MSRS];
100 }; 100 };
101 101
102 struct kvm_shared_msrs { 102 struct kvm_shared_msrs {
103 struct user_return_notifier urn; 103 struct user_return_notifier urn;
104 bool registered; 104 bool registered;
105 struct kvm_shared_msr_values { 105 struct kvm_shared_msr_values {
106 u64 host; 106 u64 host;
107 u64 curr; 107 u64 curr;
108 } values[KVM_NR_SHARED_MSRS]; 108 } values[KVM_NR_SHARED_MSRS];
109 }; 109 };
110 110
111 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; 111 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
112 static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); 112 static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
113 113
114 struct kvm_stats_debugfs_item debugfs_entries[] = { 114 struct kvm_stats_debugfs_item debugfs_entries[] = {
115 { "pf_fixed", VCPU_STAT(pf_fixed) }, 115 { "pf_fixed", VCPU_STAT(pf_fixed) },
116 { "pf_guest", VCPU_STAT(pf_guest) }, 116 { "pf_guest", VCPU_STAT(pf_guest) },
117 { "tlb_flush", VCPU_STAT(tlb_flush) }, 117 { "tlb_flush", VCPU_STAT(tlb_flush) },
118 { "invlpg", VCPU_STAT(invlpg) }, 118 { "invlpg", VCPU_STAT(invlpg) },
119 { "exits", VCPU_STAT(exits) }, 119 { "exits", VCPU_STAT(exits) },
120 { "io_exits", VCPU_STAT(io_exits) }, 120 { "io_exits", VCPU_STAT(io_exits) },
121 { "mmio_exits", VCPU_STAT(mmio_exits) }, 121 { "mmio_exits", VCPU_STAT(mmio_exits) },
122 { "signal_exits", VCPU_STAT(signal_exits) }, 122 { "signal_exits", VCPU_STAT(signal_exits) },
123 { "irq_window", VCPU_STAT(irq_window_exits) }, 123 { "irq_window", VCPU_STAT(irq_window_exits) },
124 { "nmi_window", VCPU_STAT(nmi_window_exits) }, 124 { "nmi_window", VCPU_STAT(nmi_window_exits) },
125 { "halt_exits", VCPU_STAT(halt_exits) }, 125 { "halt_exits", VCPU_STAT(halt_exits) },
126 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 126 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
127 { "hypercalls", VCPU_STAT(hypercalls) }, 127 { "hypercalls", VCPU_STAT(hypercalls) },
128 { "request_irq", VCPU_STAT(request_irq_exits) }, 128 { "request_irq", VCPU_STAT(request_irq_exits) },
129 { "irq_exits", VCPU_STAT(irq_exits) }, 129 { "irq_exits", VCPU_STAT(irq_exits) },
130 { "host_state_reload", VCPU_STAT(host_state_reload) }, 130 { "host_state_reload", VCPU_STAT(host_state_reload) },
131 { "efer_reload", VCPU_STAT(efer_reload) }, 131 { "efer_reload", VCPU_STAT(efer_reload) },
132 { "fpu_reload", VCPU_STAT(fpu_reload) }, 132 { "fpu_reload", VCPU_STAT(fpu_reload) },
133 { "insn_emulation", VCPU_STAT(insn_emulation) }, 133 { "insn_emulation", VCPU_STAT(insn_emulation) },
134 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, 134 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
135 { "irq_injections", VCPU_STAT(irq_injections) }, 135 { "irq_injections", VCPU_STAT(irq_injections) },
136 { "nmi_injections", VCPU_STAT(nmi_injections) }, 136 { "nmi_injections", VCPU_STAT(nmi_injections) },
137 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, 137 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
138 { "mmu_pte_write", VM_STAT(mmu_pte_write) }, 138 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
139 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, 139 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
140 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) }, 140 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
141 { "mmu_flooded", VM_STAT(mmu_flooded) }, 141 { "mmu_flooded", VM_STAT(mmu_flooded) },
142 { "mmu_recycled", VM_STAT(mmu_recycled) }, 142 { "mmu_recycled", VM_STAT(mmu_recycled) },
143 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 143 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
144 { "mmu_unsync", VM_STAT(mmu_unsync) }, 144 { "mmu_unsync", VM_STAT(mmu_unsync) },
145 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 145 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
146 { "largepages", VM_STAT(lpages) }, 146 { "largepages", VM_STAT(lpages) },
147 { NULL } 147 { NULL }
148 }; 148 };
149 149
150 static void kvm_on_user_return(struct user_return_notifier *urn) 150 static void kvm_on_user_return(struct user_return_notifier *urn)
151 { 151 {
152 unsigned slot; 152 unsigned slot;
153 struct kvm_shared_msrs *locals 153 struct kvm_shared_msrs *locals
154 = container_of(urn, struct kvm_shared_msrs, urn); 154 = container_of(urn, struct kvm_shared_msrs, urn);
155 struct kvm_shared_msr_values *values; 155 struct kvm_shared_msr_values *values;
156 156
157 for (slot = 0; slot < shared_msrs_global.nr; ++slot) { 157 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
158 values = &locals->values[slot]; 158 values = &locals->values[slot];
159 if (values->host != values->curr) { 159 if (values->host != values->curr) {
160 wrmsrl(shared_msrs_global.msrs[slot], values->host); 160 wrmsrl(shared_msrs_global.msrs[slot], values->host);
161 values->curr = values->host; 161 values->curr = values->host;
162 } 162 }
163 } 163 }
164 locals->registered = false; 164 locals->registered = false;
165 user_return_notifier_unregister(urn); 165 user_return_notifier_unregister(urn);
166 } 166 }
167 167
168 static void shared_msr_update(unsigned slot, u32 msr) 168 static void shared_msr_update(unsigned slot, u32 msr)
169 { 169 {
170 struct kvm_shared_msrs *smsr; 170 struct kvm_shared_msrs *smsr;
171 u64 value; 171 u64 value;
172 172
173 smsr = &__get_cpu_var(shared_msrs); 173 smsr = &__get_cpu_var(shared_msrs);
174 /* only read, and nobody should modify it at this time, 174 /* only read, and nobody should modify it at this time,
175 * so don't need lock */ 175 * so don't need lock */
176 if (slot >= shared_msrs_global.nr) { 176 if (slot >= shared_msrs_global.nr) {
177 printk(KERN_ERR "kvm: invalid MSR slot!"); 177 printk(KERN_ERR "kvm: invalid MSR slot!");
178 return; 178 return;
179 } 179 }
180 rdmsrl_safe(msr, &value); 180 rdmsrl_safe(msr, &value);
181 smsr->values[slot].host = value; 181 smsr->values[slot].host = value;
182 smsr->values[slot].curr = value; 182 smsr->values[slot].curr = value;
183 } 183 }
184 184
185 void kvm_define_shared_msr(unsigned slot, u32 msr) 185 void kvm_define_shared_msr(unsigned slot, u32 msr)
186 { 186 {
187 if (slot >= shared_msrs_global.nr) 187 if (slot >= shared_msrs_global.nr)
188 shared_msrs_global.nr = slot + 1; 188 shared_msrs_global.nr = slot + 1;
189 shared_msrs_global.msrs[slot] = msr; 189 shared_msrs_global.msrs[slot] = msr;
190 /* we need ensured the shared_msr_global have been updated */ 190 /* we need ensured the shared_msr_global have been updated */
191 smp_wmb(); 191 smp_wmb();
192 } 192 }
193 EXPORT_SYMBOL_GPL(kvm_define_shared_msr); 193 EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
194 194
195 static void kvm_shared_msr_cpu_online(void) 195 static void kvm_shared_msr_cpu_online(void)
196 { 196 {
197 unsigned i; 197 unsigned i;
198 198
199 for (i = 0; i < shared_msrs_global.nr; ++i) 199 for (i = 0; i < shared_msrs_global.nr; ++i)
200 shared_msr_update(i, shared_msrs_global.msrs[i]); 200 shared_msr_update(i, shared_msrs_global.msrs[i]);
201 } 201 }
202 202
203 void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 203 void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
204 { 204 {
205 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 205 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
206 206
207 if (((value ^ smsr->values[slot].curr) & mask) == 0) 207 if (((value ^ smsr->values[slot].curr) & mask) == 0)
208 return; 208 return;
209 smsr->values[slot].curr = value; 209 smsr->values[slot].curr = value;
210 wrmsrl(shared_msrs_global.msrs[slot], value); 210 wrmsrl(shared_msrs_global.msrs[slot], value);
211 if (!smsr->registered) { 211 if (!smsr->registered) {
212 smsr->urn.on_user_return = kvm_on_user_return; 212 smsr->urn.on_user_return = kvm_on_user_return;
213 user_return_notifier_register(&smsr->urn); 213 user_return_notifier_register(&smsr->urn);
214 smsr->registered = true; 214 smsr->registered = true;
215 } 215 }
216 } 216 }
217 EXPORT_SYMBOL_GPL(kvm_set_shared_msr); 217 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
218 218
219 static void drop_user_return_notifiers(void *ignore) 219 static void drop_user_return_notifiers(void *ignore)
220 { 220 {
221 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 221 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
222 222
223 if (smsr->registered) 223 if (smsr->registered)
224 kvm_on_user_return(&smsr->urn); 224 kvm_on_user_return(&smsr->urn);
225 } 225 }
226 226
227 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) 227 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
228 { 228 {
229 if (irqchip_in_kernel(vcpu->kvm)) 229 if (irqchip_in_kernel(vcpu->kvm))
230 return vcpu->arch.apic_base; 230 return vcpu->arch.apic_base;
231 else 231 else
232 return vcpu->arch.apic_base; 232 return vcpu->arch.apic_base;
233 } 233 }
234 EXPORT_SYMBOL_GPL(kvm_get_apic_base); 234 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
235 235
236 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) 236 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
237 { 237 {
238 /* TODO: reserve bits check */ 238 /* TODO: reserve bits check */
239 if (irqchip_in_kernel(vcpu->kvm)) 239 if (irqchip_in_kernel(vcpu->kvm))
240 kvm_lapic_set_base(vcpu, data); 240 kvm_lapic_set_base(vcpu, data);
241 else 241 else
242 vcpu->arch.apic_base = data; 242 vcpu->arch.apic_base = data;
243 } 243 }
244 EXPORT_SYMBOL_GPL(kvm_set_apic_base); 244 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
245 245
246 #define EXCPT_BENIGN 0 246 #define EXCPT_BENIGN 0
247 #define EXCPT_CONTRIBUTORY 1 247 #define EXCPT_CONTRIBUTORY 1
248 #define EXCPT_PF 2 248 #define EXCPT_PF 2
249 249
250 static int exception_class(int vector) 250 static int exception_class(int vector)
251 { 251 {
252 switch (vector) { 252 switch (vector) {
253 case PF_VECTOR: 253 case PF_VECTOR:
254 return EXCPT_PF; 254 return EXCPT_PF;
255 case DE_VECTOR: 255 case DE_VECTOR:
256 case TS_VECTOR: 256 case TS_VECTOR:
257 case NP_VECTOR: 257 case NP_VECTOR:
258 case SS_VECTOR: 258 case SS_VECTOR:
259 case GP_VECTOR: 259 case GP_VECTOR:
260 return EXCPT_CONTRIBUTORY; 260 return EXCPT_CONTRIBUTORY;
261 default: 261 default:
262 break; 262 break;
263 } 263 }
264 return EXCPT_BENIGN; 264 return EXCPT_BENIGN;
265 } 265 }
266 266
267 static void kvm_multiple_exception(struct kvm_vcpu *vcpu, 267 static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
268 unsigned nr, bool has_error, u32 error_code, 268 unsigned nr, bool has_error, u32 error_code,
269 bool reinject) 269 bool reinject)
270 { 270 {
271 u32 prev_nr; 271 u32 prev_nr;
272 int class1, class2; 272 int class1, class2;
273 273
274 if (!vcpu->arch.exception.pending) { 274 if (!vcpu->arch.exception.pending) {
275 queue: 275 queue:
276 vcpu->arch.exception.pending = true; 276 vcpu->arch.exception.pending = true;
277 vcpu->arch.exception.has_error_code = has_error; 277 vcpu->arch.exception.has_error_code = has_error;
278 vcpu->arch.exception.nr = nr; 278 vcpu->arch.exception.nr = nr;
279 vcpu->arch.exception.error_code = error_code; 279 vcpu->arch.exception.error_code = error_code;
280 vcpu->arch.exception.reinject = reinject; 280 vcpu->arch.exception.reinject = reinject;
281 return; 281 return;
282 } 282 }
283 283
284 /* to check exception */ 284 /* to check exception */
285 prev_nr = vcpu->arch.exception.nr; 285 prev_nr = vcpu->arch.exception.nr;
286 if (prev_nr == DF_VECTOR) { 286 if (prev_nr == DF_VECTOR) {
287 /* triple fault -> shutdown */ 287 /* triple fault -> shutdown */
288 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 288 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
289 return; 289 return;
290 } 290 }
291 class1 = exception_class(prev_nr); 291 class1 = exception_class(prev_nr);
292 class2 = exception_class(nr); 292 class2 = exception_class(nr);
293 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) 293 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
294 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { 294 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
295 /* generate double fault per SDM Table 5-5 */ 295 /* generate double fault per SDM Table 5-5 */
296 vcpu->arch.exception.pending = true; 296 vcpu->arch.exception.pending = true;
297 vcpu->arch.exception.has_error_code = true; 297 vcpu->arch.exception.has_error_code = true;
298 vcpu->arch.exception.nr = DF_VECTOR; 298 vcpu->arch.exception.nr = DF_VECTOR;
299 vcpu->arch.exception.error_code = 0; 299 vcpu->arch.exception.error_code = 0;
300 } else 300 } else
301 /* replace previous exception with a new one in a hope 301 /* replace previous exception with a new one in a hope
302 that instruction re-execution will regenerate lost 302 that instruction re-execution will regenerate lost
303 exception */ 303 exception */
304 goto queue; 304 goto queue;
305 } 305 }
306 306
307 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 307 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
308 { 308 {
309 kvm_multiple_exception(vcpu, nr, false, 0, false); 309 kvm_multiple_exception(vcpu, nr, false, 0, false);
310 } 310 }
311 EXPORT_SYMBOL_GPL(kvm_queue_exception); 311 EXPORT_SYMBOL_GPL(kvm_queue_exception);
312 312
313 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) 313 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
314 { 314 {
315 kvm_multiple_exception(vcpu, nr, false, 0, true); 315 kvm_multiple_exception(vcpu, nr, false, 0, true);
316 } 316 }
317 EXPORT_SYMBOL_GPL(kvm_requeue_exception); 317 EXPORT_SYMBOL_GPL(kvm_requeue_exception);
318 318
319 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, 319 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
320 u32 error_code) 320 u32 error_code)
321 { 321 {
322 ++vcpu->stat.pf_guest; 322 ++vcpu->stat.pf_guest;
323 vcpu->arch.cr2 = addr; 323 vcpu->arch.cr2 = addr;
324 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 324 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
325 } 325 }
326 326
327 void kvm_inject_nmi(struct kvm_vcpu *vcpu) 327 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
328 { 328 {
329 vcpu->arch.nmi_pending = 1; 329 vcpu->arch.nmi_pending = 1;
330 } 330 }
331 EXPORT_SYMBOL_GPL(kvm_inject_nmi); 331 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
332 332
333 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 333 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
334 { 334 {
335 kvm_multiple_exception(vcpu, nr, true, error_code, false); 335 kvm_multiple_exception(vcpu, nr, true, error_code, false);
336 } 336 }
337 EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 337 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
338 338
339 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 339 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
340 { 340 {
341 kvm_multiple_exception(vcpu, nr, true, error_code, true); 341 kvm_multiple_exception(vcpu, nr, true, error_code, true);
342 } 342 }
343 EXPORT_SYMBOL_GPL(kvm_requeue_exception_e); 343 EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
344 344
345 /* 345 /*
346 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue 346 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue
347 * a #GP and return false. 347 * a #GP and return false.
348 */ 348 */
349 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) 349 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
350 { 350 {
351 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl) 351 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
352 return true; 352 return true;
353 kvm_queue_exception_e(vcpu, GP_VECTOR, 0); 353 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
354 return false; 354 return false;
355 } 355 }
356 EXPORT_SYMBOL_GPL(kvm_require_cpl); 356 EXPORT_SYMBOL_GPL(kvm_require_cpl);
357 357
358 /* 358 /*
359 * Load the pae pdptrs. Return true is they are all valid. 359 * Load the pae pdptrs. Return true is they are all valid.
360 */ 360 */
361 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) 361 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
362 { 362 {
363 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; 363 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
364 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; 364 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
365 int i; 365 int i;
366 int ret; 366 int ret;
367 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; 367 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
368 368
369 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, 369 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
370 offset * sizeof(u64), sizeof(pdpte)); 370 offset * sizeof(u64), sizeof(pdpte));
371 if (ret < 0) { 371 if (ret < 0) {
372 ret = 0; 372 ret = 0;
373 goto out; 373 goto out;
374 } 374 }
375 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { 375 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
376 if (is_present_gpte(pdpte[i]) && 376 if (is_present_gpte(pdpte[i]) &&
377 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { 377 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
378 ret = 0; 378 ret = 0;
379 goto out; 379 goto out;
380 } 380 }
381 } 381 }
382 ret = 1; 382 ret = 1;
383 383
384 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); 384 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
385 __set_bit(VCPU_EXREG_PDPTR, 385 __set_bit(VCPU_EXREG_PDPTR,
386 (unsigned long *)&vcpu->arch.regs_avail); 386 (unsigned long *)&vcpu->arch.regs_avail);
387 __set_bit(VCPU_EXREG_PDPTR, 387 __set_bit(VCPU_EXREG_PDPTR,
388 (unsigned long *)&vcpu->arch.regs_dirty); 388 (unsigned long *)&vcpu->arch.regs_dirty);
389 out: 389 out:
390 390
391 return ret; 391 return ret;
392 } 392 }
393 EXPORT_SYMBOL_GPL(load_pdptrs); 393 EXPORT_SYMBOL_GPL(load_pdptrs);
394 394
395 static bool pdptrs_changed(struct kvm_vcpu *vcpu) 395 static bool pdptrs_changed(struct kvm_vcpu *vcpu)
396 { 396 {
397 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; 397 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
398 bool changed = true; 398 bool changed = true;
399 int r; 399 int r;
400 400
401 if (is_long_mode(vcpu) || !is_pae(vcpu)) 401 if (is_long_mode(vcpu) || !is_pae(vcpu))
402 return false; 402 return false;
403 403
404 if (!test_bit(VCPU_EXREG_PDPTR, 404 if (!test_bit(VCPU_EXREG_PDPTR,
405 (unsigned long *)&vcpu->arch.regs_avail)) 405 (unsigned long *)&vcpu->arch.regs_avail))
406 return true; 406 return true;
407 407
408 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); 408 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
409 if (r < 0) 409 if (r < 0)
410 goto out; 410 goto out;
411 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; 411 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
412 out: 412 out:
413 413
414 return changed; 414 return changed;
415 } 415 }
416 416
417 static int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 417 static int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
418 { 418 {
419 unsigned long old_cr0 = kvm_read_cr0(vcpu); 419 unsigned long old_cr0 = kvm_read_cr0(vcpu);
420 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP | 420 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
421 X86_CR0_CD | X86_CR0_NW; 421 X86_CR0_CD | X86_CR0_NW;
422 422
423 cr0 |= X86_CR0_ET; 423 cr0 |= X86_CR0_ET;
424 424
425 #ifdef CONFIG_X86_64 425 #ifdef CONFIG_X86_64
426 if (cr0 & 0xffffffff00000000UL) 426 if (cr0 & 0xffffffff00000000UL)
427 return 1; 427 return 1;
428 #endif 428 #endif
429 429
430 cr0 &= ~CR0_RESERVED_BITS; 430 cr0 &= ~CR0_RESERVED_BITS;
431 431
432 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) 432 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
433 return 1; 433 return 1;
434 434
435 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) 435 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
436 return 1; 436 return 1;
437 437
438 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 438 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
439 #ifdef CONFIG_X86_64 439 #ifdef CONFIG_X86_64
440 if ((vcpu->arch.efer & EFER_LME)) { 440 if ((vcpu->arch.efer & EFER_LME)) {
441 int cs_db, cs_l; 441 int cs_db, cs_l;
442 442
443 if (!is_pae(vcpu)) 443 if (!is_pae(vcpu))
444 return 1; 444 return 1;
445 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 445 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
446 if (cs_l) 446 if (cs_l)
447 return 1; 447 return 1;
448 } else 448 } else
449 #endif 449 #endif
450 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) 450 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3))
451 return 1; 451 return 1;
452 } 452 }
453 453
454 kvm_x86_ops->set_cr0(vcpu, cr0); 454 kvm_x86_ops->set_cr0(vcpu, cr0);
455 455
456 if ((cr0 ^ old_cr0) & update_bits) 456 if ((cr0 ^ old_cr0) & update_bits)
457 kvm_mmu_reset_context(vcpu); 457 kvm_mmu_reset_context(vcpu);
458 return 0; 458 return 0;
459 } 459 }
460 460
461 void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 461 void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
462 { 462 {
463 if (__kvm_set_cr0(vcpu, cr0)) 463 if (__kvm_set_cr0(vcpu, cr0))
464 kvm_inject_gp(vcpu, 0); 464 kvm_inject_gp(vcpu, 0);
465 } 465 }
466 EXPORT_SYMBOL_GPL(kvm_set_cr0); 466 EXPORT_SYMBOL_GPL(kvm_set_cr0);
467 467
468 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 468 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
469 { 469 {
470 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); 470 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
471 } 471 }
472 EXPORT_SYMBOL_GPL(kvm_lmsw); 472 EXPORT_SYMBOL_GPL(kvm_lmsw);
473 473
474 int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 474 int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
475 { 475 {
476 unsigned long old_cr4 = kvm_read_cr4(vcpu); 476 unsigned long old_cr4 = kvm_read_cr4(vcpu);
477 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 477 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
478 478
479 if (cr4 & CR4_RESERVED_BITS) 479 if (cr4 & CR4_RESERVED_BITS)
480 return 1; 480 return 1;
481 481
482 if (is_long_mode(vcpu)) { 482 if (is_long_mode(vcpu)) {
483 if (!(cr4 & X86_CR4_PAE)) 483 if (!(cr4 & X86_CR4_PAE))
484 return 1; 484 return 1;
485 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 485 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
486 && ((cr4 ^ old_cr4) & pdptr_bits) 486 && ((cr4 ^ old_cr4) & pdptr_bits)
487 && !load_pdptrs(vcpu, vcpu->arch.cr3)) 487 && !load_pdptrs(vcpu, vcpu->arch.cr3))
488 return 1; 488 return 1;
489 489
490 if (cr4 & X86_CR4_VMXE) 490 if (cr4 & X86_CR4_VMXE)
491 return 1; 491 return 1;
492 492
493 kvm_x86_ops->set_cr4(vcpu, cr4); 493 kvm_x86_ops->set_cr4(vcpu, cr4);
494 494
495 if ((cr4 ^ old_cr4) & pdptr_bits) 495 if ((cr4 ^ old_cr4) & pdptr_bits)
496 kvm_mmu_reset_context(vcpu); 496 kvm_mmu_reset_context(vcpu);
497 497
498 return 0; 498 return 0;
499 } 499 }
500 500
501 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 501 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
502 { 502 {
503 if (__kvm_set_cr4(vcpu, cr4)) 503 if (__kvm_set_cr4(vcpu, cr4))
504 kvm_inject_gp(vcpu, 0); 504 kvm_inject_gp(vcpu, 0);
505 } 505 }
506 EXPORT_SYMBOL_GPL(kvm_set_cr4); 506 EXPORT_SYMBOL_GPL(kvm_set_cr4);
507 507
508 static int __kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 508 static int __kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
509 { 509 {
510 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 510 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
511 kvm_mmu_sync_roots(vcpu); 511 kvm_mmu_sync_roots(vcpu);
512 kvm_mmu_flush_tlb(vcpu); 512 kvm_mmu_flush_tlb(vcpu);
513 return 0; 513 return 0;
514 } 514 }
515 515
516 if (is_long_mode(vcpu)) { 516 if (is_long_mode(vcpu)) {
517 if (cr3 & CR3_L_MODE_RESERVED_BITS) 517 if (cr3 & CR3_L_MODE_RESERVED_BITS)
518 return 1; 518 return 1;
519 } else { 519 } else {
520 if (is_pae(vcpu)) { 520 if (is_pae(vcpu)) {
521 if (cr3 & CR3_PAE_RESERVED_BITS) 521 if (cr3 & CR3_PAE_RESERVED_BITS)
522 return 1; 522 return 1;
523 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) 523 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3))
524 return 1; 524 return 1;
525 } 525 }
526 /* 526 /*
527 * We don't check reserved bits in nonpae mode, because 527 * We don't check reserved bits in nonpae mode, because
528 * this isn't enforced, and VMware depends on this. 528 * this isn't enforced, and VMware depends on this.
529 */ 529 */
530 } 530 }
531 531
532 /* 532 /*
533 * Does the new cr3 value map to physical memory? (Note, we 533 * Does the new cr3 value map to physical memory? (Note, we
534 * catch an invalid cr3 even in real-mode, because it would 534 * catch an invalid cr3 even in real-mode, because it would
535 * cause trouble later on when we turn on paging anyway.) 535 * cause trouble later on when we turn on paging anyway.)
536 * 536 *
537 * A real CPU would silently accept an invalid cr3 and would 537 * A real CPU would silently accept an invalid cr3 and would
538 * attempt to use it - with largely undefined (and often hard 538 * attempt to use it - with largely undefined (and often hard
539 * to debug) behavior on the guest side. 539 * to debug) behavior on the guest side.
540 */ 540 */
541 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) 541 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
542 return 1; 542 return 1;
543 vcpu->arch.cr3 = cr3; 543 vcpu->arch.cr3 = cr3;
544 vcpu->arch.mmu.new_cr3(vcpu); 544 vcpu->arch.mmu.new_cr3(vcpu);
545 return 0; 545 return 0;
546 } 546 }
547 547
548 void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 548 void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
549 { 549 {
550 if (__kvm_set_cr3(vcpu, cr3)) 550 if (__kvm_set_cr3(vcpu, cr3))
551 kvm_inject_gp(vcpu, 0); 551 kvm_inject_gp(vcpu, 0);
552 } 552 }
553 EXPORT_SYMBOL_GPL(kvm_set_cr3); 553 EXPORT_SYMBOL_GPL(kvm_set_cr3);
554 554
555 int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 555 int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
556 { 556 {
557 if (cr8 & CR8_RESERVED_BITS) 557 if (cr8 & CR8_RESERVED_BITS)
558 return 1; 558 return 1;
559 if (irqchip_in_kernel(vcpu->kvm)) 559 if (irqchip_in_kernel(vcpu->kvm))
560 kvm_lapic_set_tpr(vcpu, cr8); 560 kvm_lapic_set_tpr(vcpu, cr8);
561 else 561 else
562 vcpu->arch.cr8 = cr8; 562 vcpu->arch.cr8 = cr8;
563 return 0; 563 return 0;
564 } 564 }
565 565
566 void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 566 void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
567 { 567 {
568 if (__kvm_set_cr8(vcpu, cr8)) 568 if (__kvm_set_cr8(vcpu, cr8))
569 kvm_inject_gp(vcpu, 0); 569 kvm_inject_gp(vcpu, 0);
570 } 570 }
571 EXPORT_SYMBOL_GPL(kvm_set_cr8); 571 EXPORT_SYMBOL_GPL(kvm_set_cr8);
572 572
573 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) 573 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
574 { 574 {
575 if (irqchip_in_kernel(vcpu->kvm)) 575 if (irqchip_in_kernel(vcpu->kvm))
576 return kvm_lapic_get_cr8(vcpu); 576 return kvm_lapic_get_cr8(vcpu);
577 else 577 else
578 return vcpu->arch.cr8; 578 return vcpu->arch.cr8;
579 } 579 }
580 EXPORT_SYMBOL_GPL(kvm_get_cr8); 580 EXPORT_SYMBOL_GPL(kvm_get_cr8);
581 581
582 static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 582 static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
583 { 583 {
584 switch (dr) { 584 switch (dr) {
585 case 0 ... 3: 585 case 0 ... 3:
586 vcpu->arch.db[dr] = val; 586 vcpu->arch.db[dr] = val;
587 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 587 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
588 vcpu->arch.eff_db[dr] = val; 588 vcpu->arch.eff_db[dr] = val;
589 break; 589 break;
590 case 4: 590 case 4:
591 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 591 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
592 return 1; /* #UD */ 592 return 1; /* #UD */
593 /* fall through */ 593 /* fall through */
594 case 6: 594 case 6:
595 if (val & 0xffffffff00000000ULL) 595 if (val & 0xffffffff00000000ULL)
596 return -1; /* #GP */ 596 return -1; /* #GP */
597 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 597 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
598 break; 598 break;
599 case 5: 599 case 5:
600 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 600 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
601 return 1; /* #UD */ 601 return 1; /* #UD */
602 /* fall through */ 602 /* fall through */
603 default: /* 7 */ 603 default: /* 7 */
604 if (val & 0xffffffff00000000ULL) 604 if (val & 0xffffffff00000000ULL)
605 return -1; /* #GP */ 605 return -1; /* #GP */
606 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 606 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
607 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 607 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
608 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); 608 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
609 vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); 609 vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
610 } 610 }
611 break; 611 break;
612 } 612 }
613 613
614 return 0; 614 return 0;
615 } 615 }
616 616
617 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 617 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
618 { 618 {
619 int res; 619 int res;
620 620
621 res = __kvm_set_dr(vcpu, dr, val); 621 res = __kvm_set_dr(vcpu, dr, val);
622 if (res > 0) 622 if (res > 0)
623 kvm_queue_exception(vcpu, UD_VECTOR); 623 kvm_queue_exception(vcpu, UD_VECTOR);
624 else if (res < 0) 624 else if (res < 0)
625 kvm_inject_gp(vcpu, 0); 625 kvm_inject_gp(vcpu, 0);
626 626
627 return res; 627 return res;
628 } 628 }
629 EXPORT_SYMBOL_GPL(kvm_set_dr); 629 EXPORT_SYMBOL_GPL(kvm_set_dr);
630 630
631 static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) 631 static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
632 { 632 {
633 switch (dr) { 633 switch (dr) {
634 case 0 ... 3: 634 case 0 ... 3:
635 *val = vcpu->arch.db[dr]; 635 *val = vcpu->arch.db[dr];
636 break; 636 break;
637 case 4: 637 case 4:
638 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 638 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
639 return 1; 639 return 1;
640 /* fall through */ 640 /* fall through */
641 case 6: 641 case 6:
642 *val = vcpu->arch.dr6; 642 *val = vcpu->arch.dr6;
643 break; 643 break;
644 case 5: 644 case 5:
645 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 645 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
646 return 1; 646 return 1;
647 /* fall through */ 647 /* fall through */
648 default: /* 7 */ 648 default: /* 7 */
649 *val = vcpu->arch.dr7; 649 *val = vcpu->arch.dr7;
650 break; 650 break;
651 } 651 }
652 652
653 return 0; 653 return 0;
654 } 654 }
655 655
656 int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) 656 int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
657 { 657 {
658 if (_kvm_get_dr(vcpu, dr, val)) { 658 if (_kvm_get_dr(vcpu, dr, val)) {
659 kvm_queue_exception(vcpu, UD_VECTOR); 659 kvm_queue_exception(vcpu, UD_VECTOR);
660 return 1; 660 return 1;
661 } 661 }
662 return 0; 662 return 0;
663 } 663 }
664 EXPORT_SYMBOL_GPL(kvm_get_dr); 664 EXPORT_SYMBOL_GPL(kvm_get_dr);
665 665
666 static inline u32 bit(int bitno) 666 static inline u32 bit(int bitno)
667 { 667 {
668 return 1 << (bitno & 31); 668 return 1 << (bitno & 31);
669 } 669 }
670 670
671 /* 671 /*
672 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 672 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
673 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 673 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
674 * 674 *
675 * This list is modified at module load time to reflect the 675 * This list is modified at module load time to reflect the
676 * capabilities of the host cpu. This capabilities test skips MSRs that are 676 * capabilities of the host cpu. This capabilities test skips MSRs that are
677 * kvm-specific. Those are put in the beginning of the list. 677 * kvm-specific. Those are put in the beginning of the list.
678 */ 678 */
679 679
680 #define KVM_SAVE_MSRS_BEGIN 7 680 #define KVM_SAVE_MSRS_BEGIN 7
681 static u32 msrs_to_save[] = { 681 static u32 msrs_to_save[] = {
682 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 682 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
683 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 683 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
684 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 684 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
685 HV_X64_MSR_APIC_ASSIST_PAGE, 685 HV_X64_MSR_APIC_ASSIST_PAGE,
686 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 686 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
687 MSR_K6_STAR, 687 MSR_K6_STAR,
688 #ifdef CONFIG_X86_64 688 #ifdef CONFIG_X86_64
689 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 689 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
690 #endif 690 #endif
691 MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA 691 MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
692 }; 692 };
693 693
694 static unsigned num_msrs_to_save; 694 static unsigned num_msrs_to_save;
695 695
696 static u32 emulated_msrs[] = { 696 static u32 emulated_msrs[] = {
697 MSR_IA32_MISC_ENABLE, 697 MSR_IA32_MISC_ENABLE,
698 }; 698 };
699 699
700 static int set_efer(struct kvm_vcpu *vcpu, u64 efer) 700 static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
701 { 701 {
702 u64 old_efer = vcpu->arch.efer; 702 u64 old_efer = vcpu->arch.efer;
703 703
704 if (efer & efer_reserved_bits) 704 if (efer & efer_reserved_bits)
705 return 1; 705 return 1;
706 706
707 if (is_paging(vcpu) 707 if (is_paging(vcpu)
708 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) 708 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
709 return 1; 709 return 1;
710 710
711 if (efer & EFER_FFXSR) { 711 if (efer & EFER_FFXSR) {
712 struct kvm_cpuid_entry2 *feat; 712 struct kvm_cpuid_entry2 *feat;
713 713
714 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 714 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
715 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) 715 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
716 return 1; 716 return 1;
717 } 717 }
718 718
719 if (efer & EFER_SVME) { 719 if (efer & EFER_SVME) {
720 struct kvm_cpuid_entry2 *feat; 720 struct kvm_cpuid_entry2 *feat;
721 721
722 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 722 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
723 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) 723 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
724 return 1; 724 return 1;
725 } 725 }
726 726
727 efer &= ~EFER_LMA; 727 efer &= ~EFER_LMA;
728 efer |= vcpu->arch.efer & EFER_LMA; 728 efer |= vcpu->arch.efer & EFER_LMA;
729 729
730 kvm_x86_ops->set_efer(vcpu, efer); 730 kvm_x86_ops->set_efer(vcpu, efer);
731 731
732 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 732 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
733 kvm_mmu_reset_context(vcpu); 733 kvm_mmu_reset_context(vcpu);
734 734
735 /* Update reserved bits */ 735 /* Update reserved bits */
736 if ((efer ^ old_efer) & EFER_NX) 736 if ((efer ^ old_efer) & EFER_NX)
737 kvm_mmu_reset_context(vcpu); 737 kvm_mmu_reset_context(vcpu);
738 738
739 return 0; 739 return 0;
740 } 740 }
741 741
742 void kvm_enable_efer_bits(u64 mask) 742 void kvm_enable_efer_bits(u64 mask)
743 { 743 {
744 efer_reserved_bits &= ~mask; 744 efer_reserved_bits &= ~mask;
745 } 745 }
746 EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); 746 EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
747 747
748 748
749 /* 749 /*
750 * Writes msr value into into the appropriate "register". 750 * Writes msr value into into the appropriate "register".
751 * Returns 0 on success, non-0 otherwise. 751 * Returns 0 on success, non-0 otherwise.
752 * Assumes vcpu_load() was already called. 752 * Assumes vcpu_load() was already called.
753 */ 753 */
754 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 754 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
755 { 755 {
756 return kvm_x86_ops->set_msr(vcpu, msr_index, data); 756 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
757 } 757 }
758 758
759 /* 759 /*
760 * Adapt set_msr() to msr_io()'s calling convention 760 * Adapt set_msr() to msr_io()'s calling convention
761 */ 761 */
762 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) 762 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
763 { 763 {
764 return kvm_set_msr(vcpu, index, *data); 764 return kvm_set_msr(vcpu, index, *data);
765 } 765 }
766 766
767 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) 767 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
768 { 768 {
769 int version; 769 int version;
770 int r; 770 int r;
771 struct pvclock_wall_clock wc; 771 struct pvclock_wall_clock wc;
772 struct timespec boot; 772 struct timespec boot;
773 773
774 if (!wall_clock) 774 if (!wall_clock)
775 return; 775 return;
776 776
777 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); 777 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
778 if (r) 778 if (r)
779 return; 779 return;
780 780
781 if (version & 1) 781 if (version & 1)
782 ++version; /* first time write, random junk */ 782 ++version; /* first time write, random junk */
783 783
784 ++version; 784 ++version;
785 785
786 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 786 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
787 787
788 /* 788 /*
789 * The guest calculates current wall clock time by adding 789 * The guest calculates current wall clock time by adding
790 * system time (updated by kvm_write_guest_time below) to the 790 * system time (updated by kvm_write_guest_time below) to the
791 * wall clock specified here. guest system time equals host 791 * wall clock specified here. guest system time equals host
792 * system time for us, thus we must fill in host boot time here. 792 * system time for us, thus we must fill in host boot time here.
793 */ 793 */
794 getboottime(&boot); 794 getboottime(&boot);
795 795
796 wc.sec = boot.tv_sec; 796 wc.sec = boot.tv_sec;
797 wc.nsec = boot.tv_nsec; 797 wc.nsec = boot.tv_nsec;
798 wc.version = version; 798 wc.version = version;
799 799
800 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); 800 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
801 801
802 version++; 802 version++;
803 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 803 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
804 } 804 }
805 805
806 static uint32_t div_frac(uint32_t dividend, uint32_t divisor) 806 static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
807 { 807 {
808 uint32_t quotient, remainder; 808 uint32_t quotient, remainder;
809 809
810 /* Don't try to replace with do_div(), this one calculates 810 /* Don't try to replace with do_div(), this one calculates
811 * "(dividend << 32) / divisor" */ 811 * "(dividend << 32) / divisor" */
812 __asm__ ( "divl %4" 812 __asm__ ( "divl %4"
813 : "=a" (quotient), "=d" (remainder) 813 : "=a" (quotient), "=d" (remainder)
814 : "0" (0), "1" (dividend), "r" (divisor) ); 814 : "0" (0), "1" (dividend), "r" (divisor) );
815 return quotient; 815 return quotient;
816 } 816 }
817 817
818 static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock) 818 static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
819 { 819 {
820 uint64_t nsecs = 1000000000LL; 820 uint64_t nsecs = 1000000000LL;
821 int32_t shift = 0; 821 int32_t shift = 0;
822 uint64_t tps64; 822 uint64_t tps64;
823 uint32_t tps32; 823 uint32_t tps32;
824 824
825 tps64 = tsc_khz * 1000LL; 825 tps64 = tsc_khz * 1000LL;
826 while (tps64 > nsecs*2) { 826 while (tps64 > nsecs*2) {
827 tps64 >>= 1; 827 tps64 >>= 1;
828 shift--; 828 shift--;
829 } 829 }
830 830
831 tps32 = (uint32_t)tps64; 831 tps32 = (uint32_t)tps64;
832 while (tps32 <= (uint32_t)nsecs) { 832 while (tps32 <= (uint32_t)nsecs) {
833 tps32 <<= 1; 833 tps32 <<= 1;
834 shift++; 834 shift++;
835 } 835 }
836 836
837 hv_clock->tsc_shift = shift; 837 hv_clock->tsc_shift = shift;
838 hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32); 838 hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
839 839
840 pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n", 840 pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
841 __func__, tsc_khz, hv_clock->tsc_shift, 841 __func__, tsc_khz, hv_clock->tsc_shift,
842 hv_clock->tsc_to_system_mul); 842 hv_clock->tsc_to_system_mul);
843 } 843 }
844 844
845 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 845 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
846 846
847 static void kvm_write_guest_time(struct kvm_vcpu *v) 847 static void kvm_write_guest_time(struct kvm_vcpu *v)
848 { 848 {
849 struct timespec ts; 849 struct timespec ts;
850 unsigned long flags; 850 unsigned long flags;
851 struct kvm_vcpu_arch *vcpu = &v->arch; 851 struct kvm_vcpu_arch *vcpu = &v->arch;
852 void *shared_kaddr; 852 void *shared_kaddr;
853 unsigned long this_tsc_khz; 853 unsigned long this_tsc_khz;
854 854
855 if ((!vcpu->time_page)) 855 if ((!vcpu->time_page))
856 return; 856 return;
857 857
858 this_tsc_khz = get_cpu_var(cpu_tsc_khz); 858 this_tsc_khz = get_cpu_var(cpu_tsc_khz);
859 if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) { 859 if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
860 kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); 860 kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
861 vcpu->hv_clock_tsc_khz = this_tsc_khz; 861 vcpu->hv_clock_tsc_khz = this_tsc_khz;
862 } 862 }
863 put_cpu_var(cpu_tsc_khz); 863 put_cpu_var(cpu_tsc_khz);
864 864
865 /* Keep irq disabled to prevent changes to the clock */ 865 /* Keep irq disabled to prevent changes to the clock */
866 local_irq_save(flags); 866 local_irq_save(flags);
867 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); 867 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
868 ktime_get_ts(&ts); 868 ktime_get_ts(&ts);
869 monotonic_to_bootbased(&ts); 869 monotonic_to_bootbased(&ts);
870 local_irq_restore(flags); 870 local_irq_restore(flags);
871 871
872 /* With all the info we got, fill in the values */ 872 /* With all the info we got, fill in the values */
873 873
874 vcpu->hv_clock.system_time = ts.tv_nsec + 874 vcpu->hv_clock.system_time = ts.tv_nsec +
875 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; 875 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
876 876
877 vcpu->hv_clock.flags = 0; 877 vcpu->hv_clock.flags = 0;
878 878
879 /* 879 /*
880 * The interface expects us to write an even number signaling that the 880 * The interface expects us to write an even number signaling that the
881 * update is finished. Since the guest won't see the intermediate 881 * update is finished. Since the guest won't see the intermediate
882 * state, we just increase by 2 at the end. 882 * state, we just increase by 2 at the end.
883 */ 883 */
884 vcpu->hv_clock.version += 2; 884 vcpu->hv_clock.version += 2;
885 885
886 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); 886 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
887 887
888 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, 888 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
889 sizeof(vcpu->hv_clock)); 889 sizeof(vcpu->hv_clock));
890 890
891 kunmap_atomic(shared_kaddr, KM_USER0); 891 kunmap_atomic(shared_kaddr, KM_USER0);
892 892
893 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 893 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
894 } 894 }
895 895
896 static int kvm_request_guest_time_update(struct kvm_vcpu *v) 896 static int kvm_request_guest_time_update(struct kvm_vcpu *v)
897 { 897 {
898 struct kvm_vcpu_arch *vcpu = &v->arch; 898 struct kvm_vcpu_arch *vcpu = &v->arch;
899 899
900 if (!vcpu->time_page) 900 if (!vcpu->time_page)
901 return 0; 901 return 0;
902 set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); 902 set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
903 return 1; 903 return 1;
904 } 904 }
905 905
906 static bool msr_mtrr_valid(unsigned msr) 906 static bool msr_mtrr_valid(unsigned msr)
907 { 907 {
908 switch (msr) { 908 switch (msr) {
909 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: 909 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
910 case MSR_MTRRfix64K_00000: 910 case MSR_MTRRfix64K_00000:
911 case MSR_MTRRfix16K_80000: 911 case MSR_MTRRfix16K_80000:
912 case MSR_MTRRfix16K_A0000: 912 case MSR_MTRRfix16K_A0000:
913 case MSR_MTRRfix4K_C0000: 913 case MSR_MTRRfix4K_C0000:
914 case MSR_MTRRfix4K_C8000: 914 case MSR_MTRRfix4K_C8000:
915 case MSR_MTRRfix4K_D0000: 915 case MSR_MTRRfix4K_D0000:
916 case MSR_MTRRfix4K_D8000: 916 case MSR_MTRRfix4K_D8000:
917 case MSR_MTRRfix4K_E0000: 917 case MSR_MTRRfix4K_E0000:
918 case MSR_MTRRfix4K_E8000: 918 case MSR_MTRRfix4K_E8000:
919 case MSR_MTRRfix4K_F0000: 919 case MSR_MTRRfix4K_F0000:
920 case MSR_MTRRfix4K_F8000: 920 case MSR_MTRRfix4K_F8000:
921 case MSR_MTRRdefType: 921 case MSR_MTRRdefType:
922 case MSR_IA32_CR_PAT: 922 case MSR_IA32_CR_PAT:
923 return true; 923 return true;
924 case 0x2f8: 924 case 0x2f8:
925 return true; 925 return true;
926 } 926 }
927 return false; 927 return false;
928 } 928 }
929 929
930 static bool valid_pat_type(unsigned t) 930 static bool valid_pat_type(unsigned t)
931 { 931 {
932 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ 932 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
933 } 933 }
934 934
935 static bool valid_mtrr_type(unsigned t) 935 static bool valid_mtrr_type(unsigned t)
936 { 936 {
937 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ 937 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
938 } 938 }
939 939
940 static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) 940 static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
941 { 941 {
942 int i; 942 int i;
943 943
944 if (!msr_mtrr_valid(msr)) 944 if (!msr_mtrr_valid(msr))
945 return false; 945 return false;
946 946
947 if (msr == MSR_IA32_CR_PAT) { 947 if (msr == MSR_IA32_CR_PAT) {
948 for (i = 0; i < 8; i++) 948 for (i = 0; i < 8; i++)
949 if (!valid_pat_type((data >> (i * 8)) & 0xff)) 949 if (!valid_pat_type((data >> (i * 8)) & 0xff))
950 return false; 950 return false;
951 return true; 951 return true;
952 } else if (msr == MSR_MTRRdefType) { 952 } else if (msr == MSR_MTRRdefType) {
953 if (data & ~0xcff) 953 if (data & ~0xcff)
954 return false; 954 return false;
955 return valid_mtrr_type(data & 0xff); 955 return valid_mtrr_type(data & 0xff);
956 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { 956 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
957 for (i = 0; i < 8 ; i++) 957 for (i = 0; i < 8 ; i++)
958 if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) 958 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
959 return false; 959 return false;
960 return true; 960 return true;
961 } 961 }
962 962
963 /* variable MTRRs */ 963 /* variable MTRRs */
964 return valid_mtrr_type(data & 0xff); 964 return valid_mtrr_type(data & 0xff);
965 } 965 }
966 966
967 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 967 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
968 { 968 {
969 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 969 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
970 970
971 if (!mtrr_valid(vcpu, msr, data)) 971 if (!mtrr_valid(vcpu, msr, data))
972 return 1; 972 return 1;
973 973
974 if (msr == MSR_MTRRdefType) { 974 if (msr == MSR_MTRRdefType) {
975 vcpu->arch.mtrr_state.def_type = data; 975 vcpu->arch.mtrr_state.def_type = data;
976 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; 976 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
977 } else if (msr == MSR_MTRRfix64K_00000) 977 } else if (msr == MSR_MTRRfix64K_00000)
978 p[0] = data; 978 p[0] = data;
979 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) 979 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
980 p[1 + msr - MSR_MTRRfix16K_80000] = data; 980 p[1 + msr - MSR_MTRRfix16K_80000] = data;
981 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) 981 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
982 p[3 + msr - MSR_MTRRfix4K_C0000] = data; 982 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
983 else if (msr == MSR_IA32_CR_PAT) 983 else if (msr == MSR_IA32_CR_PAT)
984 vcpu->arch.pat = data; 984 vcpu->arch.pat = data;
985 else { /* Variable MTRRs */ 985 else { /* Variable MTRRs */
986 int idx, is_mtrr_mask; 986 int idx, is_mtrr_mask;
987 u64 *pt; 987 u64 *pt;
988 988
989 idx = (msr - 0x200) / 2; 989 idx = (msr - 0x200) / 2;
990 is_mtrr_mask = msr - 0x200 - 2 * idx; 990 is_mtrr_mask = msr - 0x200 - 2 * idx;
991 if (!is_mtrr_mask) 991 if (!is_mtrr_mask)
992 pt = 992 pt =
993 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; 993 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
994 else 994 else
995 pt = 995 pt =
996 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; 996 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
997 *pt = data; 997 *pt = data;
998 } 998 }
999 999
1000 kvm_mmu_reset_context(vcpu); 1000 kvm_mmu_reset_context(vcpu);
1001 return 0; 1001 return 0;
1002 } 1002 }
1003 1003
1004 static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1004 static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1005 { 1005 {
1006 u64 mcg_cap = vcpu->arch.mcg_cap; 1006 u64 mcg_cap = vcpu->arch.mcg_cap;
1007 unsigned bank_num = mcg_cap & 0xff; 1007 unsigned bank_num = mcg_cap & 0xff;
1008 1008
1009 switch (msr) { 1009 switch (msr) {
1010 case MSR_IA32_MCG_STATUS: 1010 case MSR_IA32_MCG_STATUS:
1011 vcpu->arch.mcg_status = data; 1011 vcpu->arch.mcg_status = data;
1012 break; 1012 break;
1013 case MSR_IA32_MCG_CTL: 1013 case MSR_IA32_MCG_CTL:
1014 if (!(mcg_cap & MCG_CTL_P)) 1014 if (!(mcg_cap & MCG_CTL_P))
1015 return 1; 1015 return 1;
1016 if (data != 0 && data != ~(u64)0) 1016 if (data != 0 && data != ~(u64)0)
1017 return -1; 1017 return -1;
1018 vcpu->arch.mcg_ctl = data; 1018 vcpu->arch.mcg_ctl = data;
1019 break; 1019 break;
1020 default: 1020 default:
1021 if (msr >= MSR_IA32_MC0_CTL && 1021 if (msr >= MSR_IA32_MC0_CTL &&
1022 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 1022 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1023 u32 offset = msr - MSR_IA32_MC0_CTL; 1023 u32 offset = msr - MSR_IA32_MC0_CTL;
1024 /* only 0 or all 1s can be written to IA32_MCi_CTL 1024 /* only 0 or all 1s can be written to IA32_MCi_CTL
1025 * some Linux kernels though clear bit 10 in bank 4 to 1025 * some Linux kernels though clear bit 10 in bank 4 to
1026 * workaround a BIOS/GART TBL issue on AMD K8s, ignore 1026 * workaround a BIOS/GART TBL issue on AMD K8s, ignore
1027 * this to avoid an uncatched #GP in the guest 1027 * this to avoid an uncatched #GP in the guest
1028 */ 1028 */
1029 if ((offset & 0x3) == 0 && 1029 if ((offset & 0x3) == 0 &&
1030 data != 0 && (data | (1 << 10)) != ~(u64)0) 1030 data != 0 && (data | (1 << 10)) != ~(u64)0)
1031 return -1; 1031 return -1;
1032 vcpu->arch.mce_banks[offset] = data; 1032 vcpu->arch.mce_banks[offset] = data;
1033 break; 1033 break;
1034 } 1034 }
1035 return 1; 1035 return 1;
1036 } 1036 }
1037 return 0; 1037 return 0;
1038 } 1038 }
1039 1039
1040 static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) 1040 static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1041 { 1041 {
1042 struct kvm *kvm = vcpu->kvm; 1042 struct kvm *kvm = vcpu->kvm;
1043 int lm = is_long_mode(vcpu); 1043 int lm = is_long_mode(vcpu);
1044 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 1044 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1045 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; 1045 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1046 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 1046 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1047 : kvm->arch.xen_hvm_config.blob_size_32; 1047 : kvm->arch.xen_hvm_config.blob_size_32;
1048 u32 page_num = data & ~PAGE_MASK; 1048 u32 page_num = data & ~PAGE_MASK;
1049 u64 page_addr = data & PAGE_MASK; 1049 u64 page_addr = data & PAGE_MASK;
1050 u8 *page; 1050 u8 *page;
1051 int r; 1051 int r;
1052 1052
1053 r = -E2BIG; 1053 r = -E2BIG;
1054 if (page_num >= blob_size) 1054 if (page_num >= blob_size)
1055 goto out; 1055 goto out;
1056 r = -ENOMEM; 1056 r = -ENOMEM;
1057 page = kzalloc(PAGE_SIZE, GFP_KERNEL); 1057 page = kzalloc(PAGE_SIZE, GFP_KERNEL);
1058 if (!page) 1058 if (!page)
1059 goto out; 1059 goto out;
1060 r = -EFAULT; 1060 r = -EFAULT;
1061 if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) 1061 if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
1062 goto out_free; 1062 goto out_free;
1063 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) 1063 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1064 goto out_free; 1064 goto out_free;
1065 r = 0; 1065 r = 0;
1066 out_free: 1066 out_free:
1067 kfree(page); 1067 kfree(page);
1068 out: 1068 out:
1069 return r; 1069 return r;
1070 } 1070 }
1071 1071
1072 static bool kvm_hv_hypercall_enabled(struct kvm *kvm) 1072 static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1073 { 1073 {
1074 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; 1074 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1075 } 1075 }
1076 1076
1077 static bool kvm_hv_msr_partition_wide(u32 msr) 1077 static bool kvm_hv_msr_partition_wide(u32 msr)
1078 { 1078 {
1079 bool r = false; 1079 bool r = false;
1080 switch (msr) { 1080 switch (msr) {
1081 case HV_X64_MSR_GUEST_OS_ID: 1081 case HV_X64_MSR_GUEST_OS_ID:
1082 case HV_X64_MSR_HYPERCALL: 1082 case HV_X64_MSR_HYPERCALL:
1083 r = true; 1083 r = true;
1084 break; 1084 break;
1085 } 1085 }
1086 1086
1087 return r; 1087 return r;
1088 } 1088 }
1089 1089
1090 static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1090 static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1091 { 1091 {
1092 struct kvm *kvm = vcpu->kvm; 1092 struct kvm *kvm = vcpu->kvm;
1093 1093
1094 switch (msr) { 1094 switch (msr) {
1095 case HV_X64_MSR_GUEST_OS_ID: 1095 case HV_X64_MSR_GUEST_OS_ID:
1096 kvm->arch.hv_guest_os_id = data; 1096 kvm->arch.hv_guest_os_id = data;
1097 /* setting guest os id to zero disables hypercall page */ 1097 /* setting guest os id to zero disables hypercall page */
1098 if (!kvm->arch.hv_guest_os_id) 1098 if (!kvm->arch.hv_guest_os_id)
1099 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 1099 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1100 break; 1100 break;
1101 case HV_X64_MSR_HYPERCALL: { 1101 case HV_X64_MSR_HYPERCALL: {
1102 u64 gfn; 1102 u64 gfn;
1103 unsigned long addr; 1103 unsigned long addr;
1104 u8 instructions[4]; 1104 u8 instructions[4];
1105 1105
1106 /* if guest os id is not set hypercall should remain disabled */ 1106 /* if guest os id is not set hypercall should remain disabled */
1107 if (!kvm->arch.hv_guest_os_id) 1107 if (!kvm->arch.hv_guest_os_id)
1108 break; 1108 break;
1109 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 1109 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1110 kvm->arch.hv_hypercall = data; 1110 kvm->arch.hv_hypercall = data;
1111 break; 1111 break;
1112 } 1112 }
1113 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 1113 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1114 addr = gfn_to_hva(kvm, gfn); 1114 addr = gfn_to_hva(kvm, gfn);
1115 if (kvm_is_error_hva(addr)) 1115 if (kvm_is_error_hva(addr))
1116 return 1; 1116 return 1;
1117 kvm_x86_ops->patch_hypercall(vcpu, instructions); 1117 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1118 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1118 ((unsigned char *)instructions)[3] = 0xc3; /* ret */
1119 if (copy_to_user((void __user *)addr, instructions, 4)) 1119 if (copy_to_user((void __user *)addr, instructions, 4))
1120 return 1; 1120 return 1;
1121 kvm->arch.hv_hypercall = data; 1121 kvm->arch.hv_hypercall = data;
1122 break; 1122 break;
1123 } 1123 }
1124 default: 1124 default:
1125 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " 1125 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1126 "data 0x%llx\n", msr, data); 1126 "data 0x%llx\n", msr, data);
1127 return 1; 1127 return 1;
1128 } 1128 }
1129 return 0; 1129 return 0;
1130 } 1130 }
1131 1131
1132 static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1132 static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1133 { 1133 {
1134 switch (msr) { 1134 switch (msr) {
1135 case HV_X64_MSR_APIC_ASSIST_PAGE: { 1135 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1136 unsigned long addr; 1136 unsigned long addr;
1137 1137
1138 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { 1138 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1139 vcpu->arch.hv_vapic = data; 1139 vcpu->arch.hv_vapic = data;
1140 break; 1140 break;
1141 } 1141 }
1142 addr = gfn_to_hva(vcpu->kvm, data >> 1142 addr = gfn_to_hva(vcpu->kvm, data >>
1143 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); 1143 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1144 if (kvm_is_error_hva(addr)) 1144 if (kvm_is_error_hva(addr))
1145 return 1; 1145 return 1;
1146 if (clear_user((void __user *)addr, PAGE_SIZE)) 1146 if (clear_user((void __user *)addr, PAGE_SIZE))
1147 return 1; 1147 return 1;
1148 vcpu->arch.hv_vapic = data; 1148 vcpu->arch.hv_vapic = data;
1149 break; 1149 break;
1150 } 1150 }
1151 case HV_X64_MSR_EOI: 1151 case HV_X64_MSR_EOI:
1152 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1152 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1153 case HV_X64_MSR_ICR: 1153 case HV_X64_MSR_ICR:
1154 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1154 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1155 case HV_X64_MSR_TPR: 1155 case HV_X64_MSR_TPR:
1156 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1156 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1157 default: 1157 default:
1158 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " 1158 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1159 "data 0x%llx\n", msr, data); 1159 "data 0x%llx\n", msr, data);
1160 return 1; 1160 return 1;
1161 } 1161 }
1162 1162
1163 return 0; 1163 return 0;
1164 } 1164 }
1165 1165
1166 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1166 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1167 { 1167 {
1168 switch (msr) { 1168 switch (msr) {
1169 case MSR_EFER: 1169 case MSR_EFER:
1170 return set_efer(vcpu, data); 1170 return set_efer(vcpu, data);
1171 case MSR_K7_HWCR: 1171 case MSR_K7_HWCR:
1172 data &= ~(u64)0x40; /* ignore flush filter disable */ 1172 data &= ~(u64)0x40; /* ignore flush filter disable */
1173 data &= ~(u64)0x100; /* ignore ignne emulation enable */ 1173 data &= ~(u64)0x100; /* ignore ignne emulation enable */
1174 if (data != 0) { 1174 if (data != 0) {
1175 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 1175 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1176 data); 1176 data);
1177 return 1; 1177 return 1;
1178 } 1178 }
1179 break; 1179 break;
1180 case MSR_FAM10H_MMIO_CONF_BASE: 1180 case MSR_FAM10H_MMIO_CONF_BASE:
1181 if (data != 0) { 1181 if (data != 0) {
1182 pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: " 1182 pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
1183 "0x%llx\n", data); 1183 "0x%llx\n", data);
1184 return 1; 1184 return 1;
1185 } 1185 }
1186 break; 1186 break;
1187 case MSR_AMD64_NB_CFG: 1187 case MSR_AMD64_NB_CFG:
1188 break; 1188 break;
1189 case MSR_IA32_DEBUGCTLMSR: 1189 case MSR_IA32_DEBUGCTLMSR:
1190 if (!data) { 1190 if (!data) {
1191 /* We support the non-activated case already */ 1191 /* We support the non-activated case already */
1192 break; 1192 break;
1193 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) { 1193 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
1194 /* Values other than LBR and BTF are vendor-specific, 1194 /* Values other than LBR and BTF are vendor-specific,
1195 thus reserved and should throw a #GP */ 1195 thus reserved and should throw a #GP */
1196 return 1; 1196 return 1;
1197 } 1197 }
1198 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", 1198 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1199 __func__, data); 1199 __func__, data);
1200 break; 1200 break;
1201 case MSR_IA32_UCODE_REV: 1201 case MSR_IA32_UCODE_REV:
1202 case MSR_IA32_UCODE_WRITE: 1202 case MSR_IA32_UCODE_WRITE:
1203 case MSR_VM_HSAVE_PA: 1203 case MSR_VM_HSAVE_PA:
1204 case MSR_AMD64_PATCH_LOADER: 1204 case MSR_AMD64_PATCH_LOADER:
1205 break; 1205 break;
1206 case 0x200 ... 0x2ff: 1206 case 0x200 ... 0x2ff:
1207 return set_msr_mtrr(vcpu, msr, data); 1207 return set_msr_mtrr(vcpu, msr, data);
1208 case MSR_IA32_APICBASE: 1208 case MSR_IA32_APICBASE:
1209 kvm_set_apic_base(vcpu, data); 1209 kvm_set_apic_base(vcpu, data);
1210 break; 1210 break;
1211 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: 1211 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1212 return kvm_x2apic_msr_write(vcpu, msr, data); 1212 return kvm_x2apic_msr_write(vcpu, msr, data);
1213 case MSR_IA32_MISC_ENABLE: 1213 case MSR_IA32_MISC_ENABLE:
1214 vcpu->arch.ia32_misc_enable_msr = data; 1214 vcpu->arch.ia32_misc_enable_msr = data;
1215 break; 1215 break;
1216 case MSR_KVM_WALL_CLOCK_NEW: 1216 case MSR_KVM_WALL_CLOCK_NEW:
1217 case MSR_KVM_WALL_CLOCK: 1217 case MSR_KVM_WALL_CLOCK:
1218 vcpu->kvm->arch.wall_clock = data; 1218 vcpu->kvm->arch.wall_clock = data;
1219 kvm_write_wall_clock(vcpu->kvm, data); 1219 kvm_write_wall_clock(vcpu->kvm, data);
1220 break; 1220 break;
1221 case MSR_KVM_SYSTEM_TIME_NEW: 1221 case MSR_KVM_SYSTEM_TIME_NEW:
1222 case MSR_KVM_SYSTEM_TIME: { 1222 case MSR_KVM_SYSTEM_TIME: {
1223 if (vcpu->arch.time_page) { 1223 if (vcpu->arch.time_page) {
1224 kvm_release_page_dirty(vcpu->arch.time_page); 1224 kvm_release_page_dirty(vcpu->arch.time_page);
1225 vcpu->arch.time_page = NULL; 1225 vcpu->arch.time_page = NULL;
1226 } 1226 }
1227 1227
1228 vcpu->arch.time = data; 1228 vcpu->arch.time = data;
1229 1229
1230 /* we verify if the enable bit is set... */ 1230 /* we verify if the enable bit is set... */
1231 if (!(data & 1)) 1231 if (!(data & 1))
1232 break; 1232 break;
1233 1233
1234 /* ...but clean it before doing the actual write */ 1234 /* ...but clean it before doing the actual write */
1235 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); 1235 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1236 1236
1237 vcpu->arch.time_page = 1237 vcpu->arch.time_page =
1238 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); 1238 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
1239 1239
1240 if (is_error_page(vcpu->arch.time_page)) { 1240 if (is_error_page(vcpu->arch.time_page)) {
1241 kvm_release_page_clean(vcpu->arch.time_page); 1241 kvm_release_page_clean(vcpu->arch.time_page);
1242 vcpu->arch.time_page = NULL; 1242 vcpu->arch.time_page = NULL;
1243 } 1243 }
1244 1244
1245 kvm_request_guest_time_update(vcpu); 1245 kvm_request_guest_time_update(vcpu);
1246 break; 1246 break;
1247 } 1247 }
1248 case MSR_IA32_MCG_CTL: 1248 case MSR_IA32_MCG_CTL:
1249 case MSR_IA32_MCG_STATUS: 1249 case MSR_IA32_MCG_STATUS:
1250 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1250 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1251 return set_msr_mce(vcpu, msr, data); 1251 return set_msr_mce(vcpu, msr, data);
1252 1252
1253 /* Performance counters are not protected by a CPUID bit, 1253 /* Performance counters are not protected by a CPUID bit,
1254 * so we should check all of them in the generic path for the sake of 1254 * so we should check all of them in the generic path for the sake of
1255 * cross vendor migration. 1255 * cross vendor migration.
1256 * Writing a zero into the event select MSRs disables them, 1256 * Writing a zero into the event select MSRs disables them,
1257 * which we perfectly emulate ;-). Any other value should be at least 1257 * which we perfectly emulate ;-). Any other value should be at least
1258 * reported, some guests depend on them. 1258 * reported, some guests depend on them.
1259 */ 1259 */
1260 case MSR_P6_EVNTSEL0: 1260 case MSR_P6_EVNTSEL0:
1261 case MSR_P6_EVNTSEL1: 1261 case MSR_P6_EVNTSEL1:
1262 case MSR_K7_EVNTSEL0: 1262 case MSR_K7_EVNTSEL0:
1263 case MSR_K7_EVNTSEL1: 1263 case MSR_K7_EVNTSEL1:
1264 case MSR_K7_EVNTSEL2: 1264 case MSR_K7_EVNTSEL2:
1265 case MSR_K7_EVNTSEL3: 1265 case MSR_K7_EVNTSEL3:
1266 if (data != 0) 1266 if (data != 0)
1267 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1267 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1268 "0x%x data 0x%llx\n", msr, data); 1268 "0x%x data 0x%llx\n", msr, data);
1269 break; 1269 break;
1270 /* at least RHEL 4 unconditionally writes to the perfctr registers, 1270 /* at least RHEL 4 unconditionally writes to the perfctr registers,
1271 * so we ignore writes to make it happy. 1271 * so we ignore writes to make it happy.
1272 */ 1272 */
1273 case MSR_P6_PERFCTR0: 1273 case MSR_P6_PERFCTR0:
1274 case MSR_P6_PERFCTR1: 1274 case MSR_P6_PERFCTR1:
1275 case MSR_K7_PERFCTR0: 1275 case MSR_K7_PERFCTR0:
1276 case MSR_K7_PERFCTR1: 1276 case MSR_K7_PERFCTR1:
1277 case MSR_K7_PERFCTR2: 1277 case MSR_K7_PERFCTR2:
1278 case MSR_K7_PERFCTR3: 1278 case MSR_K7_PERFCTR3:
1279 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1279 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1280 "0x%x data 0x%llx\n", msr, data); 1280 "0x%x data 0x%llx\n", msr, data);
1281 break; 1281 break;
1282 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 1282 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1283 if (kvm_hv_msr_partition_wide(msr)) { 1283 if (kvm_hv_msr_partition_wide(msr)) {
1284 int r; 1284 int r;
1285 mutex_lock(&vcpu->kvm->lock); 1285 mutex_lock(&vcpu->kvm->lock);
1286 r = set_msr_hyperv_pw(vcpu, msr, data); 1286 r = set_msr_hyperv_pw(vcpu, msr, data);
1287 mutex_unlock(&vcpu->kvm->lock); 1287 mutex_unlock(&vcpu->kvm->lock);
1288 return r; 1288 return r;
1289 } else 1289 } else
1290 return set_msr_hyperv(vcpu, msr, data); 1290 return set_msr_hyperv(vcpu, msr, data);
1291 break; 1291 break;
1292 default: 1292 default:
1293 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) 1293 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1294 return xen_hvm_config(vcpu, data); 1294 return xen_hvm_config(vcpu, data);
1295 if (!ignore_msrs) { 1295 if (!ignore_msrs) {
1296 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", 1296 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
1297 msr, data); 1297 msr, data);
1298 return 1; 1298 return 1;
1299 } else { 1299 } else {
1300 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", 1300 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
1301 msr, data); 1301 msr, data);
1302 break; 1302 break;
1303 } 1303 }
1304 } 1304 }
1305 return 0; 1305 return 0;
1306 } 1306 }
1307 EXPORT_SYMBOL_GPL(kvm_set_msr_common); 1307 EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1308 1308
1309 1309
1310 /* 1310 /*
1311 * Reads an msr value (of 'msr_index') into 'pdata'. 1311 * Reads an msr value (of 'msr_index') into 'pdata'.
1312 * Returns 0 on success, non-0 otherwise. 1312 * Returns 0 on success, non-0 otherwise.
1313 * Assumes vcpu_load() was already called. 1313 * Assumes vcpu_load() was already called.
1314 */ 1314 */
1315 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 1315 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1316 { 1316 {
1317 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); 1317 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
1318 } 1318 }
1319 1319
1320 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1320 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1321 { 1321 {
1322 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 1322 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1323 1323
1324 if (!msr_mtrr_valid(msr)) 1324 if (!msr_mtrr_valid(msr))
1325 return 1; 1325 return 1;
1326 1326
1327 if (msr == MSR_MTRRdefType) 1327 if (msr == MSR_MTRRdefType)
1328 *pdata = vcpu->arch.mtrr_state.def_type + 1328 *pdata = vcpu->arch.mtrr_state.def_type +
1329 (vcpu->arch.mtrr_state.enabled << 10); 1329 (vcpu->arch.mtrr_state.enabled << 10);
1330 else if (msr == MSR_MTRRfix64K_00000) 1330 else if (msr == MSR_MTRRfix64K_00000)
1331 *pdata = p[0]; 1331 *pdata = p[0];
1332 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) 1332 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1333 *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; 1333 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
1334 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) 1334 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1335 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; 1335 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
1336 else if (msr == MSR_IA32_CR_PAT) 1336 else if (msr == MSR_IA32_CR_PAT)
1337 *pdata = vcpu->arch.pat; 1337 *pdata = vcpu->arch.pat;
1338 else { /* Variable MTRRs */ 1338 else { /* Variable MTRRs */
1339 int idx, is_mtrr_mask; 1339 int idx, is_mtrr_mask;
1340 u64 *pt; 1340 u64 *pt;
1341 1341
1342 idx = (msr - 0x200) / 2; 1342 idx = (msr - 0x200) / 2;
1343 is_mtrr_mask = msr - 0x200 - 2 * idx; 1343 is_mtrr_mask = msr - 0x200 - 2 * idx;
1344 if (!is_mtrr_mask) 1344 if (!is_mtrr_mask)
1345 pt = 1345 pt =
1346 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; 1346 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1347 else 1347 else
1348 pt = 1348 pt =
1349 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; 1349 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1350 *pdata = *pt; 1350 *pdata = *pt;
1351 } 1351 }
1352 1352
1353 return 0; 1353 return 0;
1354 } 1354 }
1355 1355
1356 static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1356 static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1357 { 1357 {
1358 u64 data; 1358 u64 data;
1359 u64 mcg_cap = vcpu->arch.mcg_cap; 1359 u64 mcg_cap = vcpu->arch.mcg_cap;
1360 unsigned bank_num = mcg_cap & 0xff; 1360 unsigned bank_num = mcg_cap & 0xff;
1361 1361
1362 switch (msr) { 1362 switch (msr) {
1363 case MSR_IA32_P5_MC_ADDR: 1363 case MSR_IA32_P5_MC_ADDR:
1364 case MSR_IA32_P5_MC_TYPE: 1364 case MSR_IA32_P5_MC_TYPE:
1365 data = 0; 1365 data = 0;
1366 break; 1366 break;
1367 case MSR_IA32_MCG_CAP: 1367 case MSR_IA32_MCG_CAP:
1368 data = vcpu->arch.mcg_cap; 1368 data = vcpu->arch.mcg_cap;
1369 break; 1369 break;
1370 case MSR_IA32_MCG_CTL: 1370 case MSR_IA32_MCG_CTL:
1371 if (!(mcg_cap & MCG_CTL_P)) 1371 if (!(mcg_cap & MCG_CTL_P))
1372 return 1; 1372 return 1;
1373 data = vcpu->arch.mcg_ctl; 1373 data = vcpu->arch.mcg_ctl;
1374 break; 1374 break;
1375 case MSR_IA32_MCG_STATUS: 1375 case MSR_IA32_MCG_STATUS:
1376 data = vcpu->arch.mcg_status; 1376 data = vcpu->arch.mcg_status;
1377 break; 1377 break;
1378 default: 1378 default:
1379 if (msr >= MSR_IA32_MC0_CTL && 1379 if (msr >= MSR_IA32_MC0_CTL &&
1380 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 1380 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1381 u32 offset = msr - MSR_IA32_MC0_CTL; 1381 u32 offset = msr - MSR_IA32_MC0_CTL;
1382 data = vcpu->arch.mce_banks[offset]; 1382 data = vcpu->arch.mce_banks[offset];
1383 break; 1383 break;
1384 } 1384 }
1385 return 1; 1385 return 1;
1386 } 1386 }
1387 *pdata = data; 1387 *pdata = data;
1388 return 0; 1388 return 0;
1389 } 1389 }
1390 1390
1391 static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1391 static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1392 { 1392 {
1393 u64 data = 0; 1393 u64 data = 0;
1394 struct kvm *kvm = vcpu->kvm; 1394 struct kvm *kvm = vcpu->kvm;
1395 1395
1396 switch (msr) { 1396 switch (msr) {
1397 case HV_X64_MSR_GUEST_OS_ID: 1397 case HV_X64_MSR_GUEST_OS_ID:
1398 data = kvm->arch.hv_guest_os_id; 1398 data = kvm->arch.hv_guest_os_id;
1399 break; 1399 break;
1400 case HV_X64_MSR_HYPERCALL: 1400 case HV_X64_MSR_HYPERCALL:
1401 data = kvm->arch.hv_hypercall; 1401 data = kvm->arch.hv_hypercall;
1402 break; 1402 break;
1403 default: 1403 default:
1404 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1404 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1405 return 1; 1405 return 1;
1406 } 1406 }
1407 1407
1408 *pdata = data; 1408 *pdata = data;
1409 return 0; 1409 return 0;
1410 } 1410 }
1411 1411
1412 static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1412 static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1413 { 1413 {
1414 u64 data = 0; 1414 u64 data = 0;
1415 1415
1416 switch (msr) { 1416 switch (msr) {
1417 case HV_X64_MSR_VP_INDEX: { 1417 case HV_X64_MSR_VP_INDEX: {
1418 int r; 1418 int r;
1419 struct kvm_vcpu *v; 1419 struct kvm_vcpu *v;
1420 kvm_for_each_vcpu(r, v, vcpu->kvm) 1420 kvm_for_each_vcpu(r, v, vcpu->kvm)
1421 if (v == vcpu) 1421 if (v == vcpu)
1422 data = r; 1422 data = r;
1423 break; 1423 break;
1424 } 1424 }
1425 case HV_X64_MSR_EOI: 1425 case HV_X64_MSR_EOI:
1426 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1426 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1427 case HV_X64_MSR_ICR: 1427 case HV_X64_MSR_ICR:
1428 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1428 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1429 case HV_X64_MSR_TPR: 1429 case HV_X64_MSR_TPR:
1430 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1430 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1431 default: 1431 default:
1432 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1432 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1433 return 1; 1433 return 1;
1434 } 1434 }
1435 *pdata = data; 1435 *pdata = data;
1436 return 0; 1436 return 0;
1437 } 1437 }
1438 1438
1439 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1439 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1440 { 1440 {
1441 u64 data; 1441 u64 data;
1442 1442
1443 switch (msr) { 1443 switch (msr) {
1444 case MSR_IA32_PLATFORM_ID: 1444 case MSR_IA32_PLATFORM_ID:
1445 case MSR_IA32_UCODE_REV: 1445 case MSR_IA32_UCODE_REV:
1446 case MSR_IA32_EBL_CR_POWERON: 1446 case MSR_IA32_EBL_CR_POWERON:
1447 case MSR_IA32_DEBUGCTLMSR: 1447 case MSR_IA32_DEBUGCTLMSR:
1448 case MSR_IA32_LASTBRANCHFROMIP: 1448 case MSR_IA32_LASTBRANCHFROMIP:
1449 case MSR_IA32_LASTBRANCHTOIP: 1449 case MSR_IA32_LASTBRANCHTOIP:
1450 case MSR_IA32_LASTINTFROMIP: 1450 case MSR_IA32_LASTINTFROMIP:
1451 case MSR_IA32_LASTINTTOIP: 1451 case MSR_IA32_LASTINTTOIP:
1452 case MSR_K8_SYSCFG: 1452 case MSR_K8_SYSCFG:
1453 case MSR_K7_HWCR: 1453 case MSR_K7_HWCR:
1454 case MSR_VM_HSAVE_PA: 1454 case MSR_VM_HSAVE_PA:
1455 case MSR_P6_PERFCTR0: 1455 case MSR_P6_PERFCTR0:
1456 case MSR_P6_PERFCTR1: 1456 case MSR_P6_PERFCTR1:
1457 case MSR_P6_EVNTSEL0: 1457 case MSR_P6_EVNTSEL0:
1458 case MSR_P6_EVNTSEL1: 1458 case MSR_P6_EVNTSEL1:
1459 case MSR_K7_EVNTSEL0: 1459 case MSR_K7_EVNTSEL0:
1460 case MSR_K7_PERFCTR0: 1460 case MSR_K7_PERFCTR0:
1461 case MSR_K8_INT_PENDING_MSG: 1461 case MSR_K8_INT_PENDING_MSG:
1462 case MSR_AMD64_NB_CFG: 1462 case MSR_AMD64_NB_CFG:
1463 case MSR_FAM10H_MMIO_CONF_BASE: 1463 case MSR_FAM10H_MMIO_CONF_BASE:
1464 data = 0; 1464 data = 0;
1465 break; 1465 break;
1466 case MSR_MTRRcap: 1466 case MSR_MTRRcap:
1467 data = 0x500 | KVM_NR_VAR_MTRR; 1467 data = 0x500 | KVM_NR_VAR_MTRR;
1468 break; 1468 break;
1469 case 0x200 ... 0x2ff: 1469 case 0x200 ... 0x2ff:
1470 return get_msr_mtrr(vcpu, msr, pdata); 1470 return get_msr_mtrr(vcpu, msr, pdata);
1471 case 0xcd: /* fsb frequency */ 1471 case 0xcd: /* fsb frequency */
1472 data = 3; 1472 data = 3;
1473 break; 1473 break;
1474 case MSR_IA32_APICBASE: 1474 case MSR_IA32_APICBASE:
1475 data = kvm_get_apic_base(vcpu); 1475 data = kvm_get_apic_base(vcpu);
1476 break; 1476 break;
1477 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: 1477 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1478 return kvm_x2apic_msr_read(vcpu, msr, pdata); 1478 return kvm_x2apic_msr_read(vcpu, msr, pdata);
1479 break; 1479 break;
1480 case MSR_IA32_MISC_ENABLE: 1480 case MSR_IA32_MISC_ENABLE:
1481 data = vcpu->arch.ia32_misc_enable_msr; 1481 data = vcpu->arch.ia32_misc_enable_msr;
1482 break; 1482 break;
1483 case MSR_IA32_PERF_STATUS: 1483 case MSR_IA32_PERF_STATUS:
1484 /* TSC increment by tick */ 1484 /* TSC increment by tick */
1485 data = 1000ULL; 1485 data = 1000ULL;
1486 /* CPU multiplier */ 1486 /* CPU multiplier */
1487 data |= (((uint64_t)4ULL) << 40); 1487 data |= (((uint64_t)4ULL) << 40);
1488 break; 1488 break;
1489 case MSR_EFER: 1489 case MSR_EFER:
1490 data = vcpu->arch.efer; 1490 data = vcpu->arch.efer;
1491 break; 1491 break;
1492 case MSR_KVM_WALL_CLOCK: 1492 case MSR_KVM_WALL_CLOCK:
1493 case MSR_KVM_WALL_CLOCK_NEW: 1493 case MSR_KVM_WALL_CLOCK_NEW:
1494 data = vcpu->kvm->arch.wall_clock; 1494 data = vcpu->kvm->arch.wall_clock;
1495 break; 1495 break;
1496 case MSR_KVM_SYSTEM_TIME: 1496 case MSR_KVM_SYSTEM_TIME:
1497 case MSR_KVM_SYSTEM_TIME_NEW: 1497 case MSR_KVM_SYSTEM_TIME_NEW:
1498 data = vcpu->arch.time; 1498 data = vcpu->arch.time;
1499 break; 1499 break;
1500 case MSR_IA32_P5_MC_ADDR: 1500 case MSR_IA32_P5_MC_ADDR:
1501 case MSR_IA32_P5_MC_TYPE: 1501 case MSR_IA32_P5_MC_TYPE:
1502 case MSR_IA32_MCG_CAP: 1502 case MSR_IA32_MCG_CAP:
1503 case MSR_IA32_MCG_CTL: 1503 case MSR_IA32_MCG_CTL:
1504 case MSR_IA32_MCG_STATUS: 1504 case MSR_IA32_MCG_STATUS:
1505 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1505 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1506 return get_msr_mce(vcpu, msr, pdata); 1506 return get_msr_mce(vcpu, msr, pdata);
1507 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 1507 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1508 if (kvm_hv_msr_partition_wide(msr)) { 1508 if (kvm_hv_msr_partition_wide(msr)) {
1509 int r; 1509 int r;
1510 mutex_lock(&vcpu->kvm->lock); 1510 mutex_lock(&vcpu->kvm->lock);
1511 r = get_msr_hyperv_pw(vcpu, msr, pdata); 1511 r = get_msr_hyperv_pw(vcpu, msr, pdata);
1512 mutex_unlock(&vcpu->kvm->lock); 1512 mutex_unlock(&vcpu->kvm->lock);
1513 return r; 1513 return r;
1514 } else 1514 } else
1515 return get_msr_hyperv(vcpu, msr, pdata); 1515 return get_msr_hyperv(vcpu, msr, pdata);
1516 break; 1516 break;
1517 default: 1517 default:
1518 if (!ignore_msrs) { 1518 if (!ignore_msrs) {
1519 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 1519 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1520 return 1; 1520 return 1;
1521 } else { 1521 } else {
1522 pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr); 1522 pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
1523 data = 0; 1523 data = 0;
1524 } 1524 }
1525 break; 1525 break;
1526 } 1526 }
1527 *pdata = data; 1527 *pdata = data;
1528 return 0; 1528 return 0;
1529 } 1529 }
1530 EXPORT_SYMBOL_GPL(kvm_get_msr_common); 1530 EXPORT_SYMBOL_GPL(kvm_get_msr_common);
1531 1531
1532 /* 1532 /*
1533 * Read or write a bunch of msrs. All parameters are kernel addresses. 1533 * Read or write a bunch of msrs. All parameters are kernel addresses.
1534 * 1534 *
1535 * @return number of msrs set successfully. 1535 * @return number of msrs set successfully.
1536 */ 1536 */
1537 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, 1537 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1538 struct kvm_msr_entry *entries, 1538 struct kvm_msr_entry *entries,
1539 int (*do_msr)(struct kvm_vcpu *vcpu, 1539 int (*do_msr)(struct kvm_vcpu *vcpu,
1540 unsigned index, u64 *data)) 1540 unsigned index, u64 *data))
1541 { 1541 {
1542 int i, idx; 1542 int i, idx;
1543 1543
1544 vcpu_load(vcpu); 1544 vcpu_load(vcpu);
1545 1545
1546 idx = srcu_read_lock(&vcpu->kvm->srcu); 1546 idx = srcu_read_lock(&vcpu->kvm->srcu);
1547 for (i = 0; i < msrs->nmsrs; ++i) 1547 for (i = 0; i < msrs->nmsrs; ++i)
1548 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1548 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1549 break; 1549 break;
1550 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1550 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1551 1551
1552 vcpu_put(vcpu); 1552 vcpu_put(vcpu);
1553 1553
1554 return i; 1554 return i;
1555 } 1555 }
1556 1556
1557 /* 1557 /*
1558 * Read or write a bunch of msrs. Parameters are user addresses. 1558 * Read or write a bunch of msrs. Parameters are user addresses.
1559 * 1559 *
1560 * @return number of msrs set successfully. 1560 * @return number of msrs set successfully.
1561 */ 1561 */
1562 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, 1562 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
1563 int (*do_msr)(struct kvm_vcpu *vcpu, 1563 int (*do_msr)(struct kvm_vcpu *vcpu,
1564 unsigned index, u64 *data), 1564 unsigned index, u64 *data),
1565 int writeback) 1565 int writeback)
1566 { 1566 {
1567 struct kvm_msrs msrs; 1567 struct kvm_msrs msrs;
1568 struct kvm_msr_entry *entries; 1568 struct kvm_msr_entry *entries;
1569 int r, n; 1569 int r, n;
1570 unsigned size; 1570 unsigned size;
1571 1571
1572 r = -EFAULT; 1572 r = -EFAULT;
1573 if (copy_from_user(&msrs, user_msrs, sizeof msrs)) 1573 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
1574 goto out; 1574 goto out;
1575 1575
1576 r = -E2BIG; 1576 r = -E2BIG;
1577 if (msrs.nmsrs >= MAX_IO_MSRS) 1577 if (msrs.nmsrs >= MAX_IO_MSRS)
1578 goto out; 1578 goto out;
1579 1579
1580 r = -ENOMEM; 1580 r = -ENOMEM;
1581 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; 1581 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
1582 entries = kmalloc(size, GFP_KERNEL); 1582 entries = kmalloc(size, GFP_KERNEL);
1583 if (!entries) 1583 if (!entries)
1584 goto out; 1584 goto out;
1585 1585
1586 r = -EFAULT; 1586 r = -EFAULT;
1587 if (copy_from_user(entries, user_msrs->entries, size)) 1587 if (copy_from_user(entries, user_msrs->entries, size))
1588 goto out_free; 1588 goto out_free;
1589 1589
1590 r = n = __msr_io(vcpu, &msrs, entries, do_msr); 1590 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
1591 if (r < 0) 1591 if (r < 0)
1592 goto out_free; 1592 goto out_free;
1593 1593
1594 r = -EFAULT; 1594 r = -EFAULT;
1595 if (writeback && copy_to_user(user_msrs->entries, entries, size)) 1595 if (writeback && copy_to_user(user_msrs->entries, entries, size))
1596 goto out_free; 1596 goto out_free;
1597 1597
1598 r = n; 1598 r = n;
1599 1599
1600 out_free: 1600 out_free:
1601 kfree(entries); 1601 kfree(entries);
1602 out: 1602 out:
1603 return r; 1603 return r;
1604 } 1604 }
1605 1605
1606 int kvm_dev_ioctl_check_extension(long ext) 1606 int kvm_dev_ioctl_check_extension(long ext)
1607 { 1607 {
1608 int r; 1608 int r;
1609 1609
1610 switch (ext) { 1610 switch (ext) {
1611 case KVM_CAP_IRQCHIP: 1611 case KVM_CAP_IRQCHIP:
1612 case KVM_CAP_HLT: 1612 case KVM_CAP_HLT:
1613 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 1613 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
1614 case KVM_CAP_SET_TSS_ADDR: 1614 case KVM_CAP_SET_TSS_ADDR:
1615 case KVM_CAP_EXT_CPUID: 1615 case KVM_CAP_EXT_CPUID:
1616 case KVM_CAP_CLOCKSOURCE: 1616 case KVM_CAP_CLOCKSOURCE:
1617 case KVM_CAP_PIT: 1617 case KVM_CAP_PIT:
1618 case KVM_CAP_NOP_IO_DELAY: 1618 case KVM_CAP_NOP_IO_DELAY:
1619 case KVM_CAP_MP_STATE: 1619 case KVM_CAP_MP_STATE:
1620 case KVM_CAP_SYNC_MMU: 1620 case KVM_CAP_SYNC_MMU:
1621 case KVM_CAP_REINJECT_CONTROL: 1621 case KVM_CAP_REINJECT_CONTROL:
1622 case KVM_CAP_IRQ_INJECT_STATUS: 1622 case KVM_CAP_IRQ_INJECT_STATUS:
1623 case KVM_CAP_ASSIGN_DEV_IRQ: 1623 case KVM_CAP_ASSIGN_DEV_IRQ:
1624 case KVM_CAP_IRQFD: 1624 case KVM_CAP_IRQFD:
1625 case KVM_CAP_IOEVENTFD: 1625 case KVM_CAP_IOEVENTFD:
1626 case KVM_CAP_PIT2: 1626 case KVM_CAP_PIT2:
1627 case KVM_CAP_PIT_STATE2: 1627 case KVM_CAP_PIT_STATE2:
1628 case KVM_CAP_SET_IDENTITY_MAP_ADDR: 1628 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
1629 case KVM_CAP_XEN_HVM: 1629 case KVM_CAP_XEN_HVM:
1630 case KVM_CAP_ADJUST_CLOCK: 1630 case KVM_CAP_ADJUST_CLOCK:
1631 case KVM_CAP_VCPU_EVENTS: 1631 case KVM_CAP_VCPU_EVENTS:
1632 case KVM_CAP_HYPERV: 1632 case KVM_CAP_HYPERV:
1633 case KVM_CAP_HYPERV_VAPIC: 1633 case KVM_CAP_HYPERV_VAPIC:
1634 case KVM_CAP_HYPERV_SPIN: 1634 case KVM_CAP_HYPERV_SPIN:
1635 case KVM_CAP_PCI_SEGMENT: 1635 case KVM_CAP_PCI_SEGMENT:
1636 case KVM_CAP_DEBUGREGS: 1636 case KVM_CAP_DEBUGREGS:
1637 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1637 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1638 r = 1; 1638 r = 1;
1639 break; 1639 break;
1640 case KVM_CAP_COALESCED_MMIO: 1640 case KVM_CAP_COALESCED_MMIO:
1641 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 1641 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
1642 break; 1642 break;
1643 case KVM_CAP_VAPIC: 1643 case KVM_CAP_VAPIC:
1644 r = !kvm_x86_ops->cpu_has_accelerated_tpr(); 1644 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
1645 break; 1645 break;
1646 case KVM_CAP_NR_VCPUS: 1646 case KVM_CAP_NR_VCPUS:
1647 r = KVM_MAX_VCPUS; 1647 r = KVM_MAX_VCPUS;
1648 break; 1648 break;
1649 case KVM_CAP_NR_MEMSLOTS: 1649 case KVM_CAP_NR_MEMSLOTS:
1650 r = KVM_MEMORY_SLOTS; 1650 r = KVM_MEMORY_SLOTS;
1651 break; 1651 break;
1652 case KVM_CAP_PV_MMU: /* obsolete */ 1652 case KVM_CAP_PV_MMU: /* obsolete */
1653 r = 0; 1653 r = 0;
1654 break; 1654 break;
1655 case KVM_CAP_IOMMU: 1655 case KVM_CAP_IOMMU:
1656 r = iommu_found(); 1656 r = iommu_found();
1657 break; 1657 break;
1658 case KVM_CAP_MCE: 1658 case KVM_CAP_MCE:
1659 r = KVM_MAX_MCE_BANKS; 1659 r = KVM_MAX_MCE_BANKS;
1660 break; 1660 break;
1661 default: 1661 default:
1662 r = 0; 1662 r = 0;
1663 break; 1663 break;
1664 } 1664 }
1665 return r; 1665 return r;
1666 1666
1667 } 1667 }
1668 1668
1669 long kvm_arch_dev_ioctl(struct file *filp, 1669 long kvm_arch_dev_ioctl(struct file *filp,
1670 unsigned int ioctl, unsigned long arg) 1670 unsigned int ioctl, unsigned long arg)
1671 { 1671 {
1672 void __user *argp = (void __user *)arg; 1672 void __user *argp = (void __user *)arg;
1673 long r; 1673 long r;
1674 1674
1675 switch (ioctl) { 1675 switch (ioctl) {
1676 case KVM_GET_MSR_INDEX_LIST: { 1676 case KVM_GET_MSR_INDEX_LIST: {
1677 struct kvm_msr_list __user *user_msr_list = argp; 1677 struct kvm_msr_list __user *user_msr_list = argp;
1678 struct kvm_msr_list msr_list; 1678 struct kvm_msr_list msr_list;
1679 unsigned n; 1679 unsigned n;
1680 1680
1681 r = -EFAULT; 1681 r = -EFAULT;
1682 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) 1682 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
1683 goto out; 1683 goto out;
1684 n = msr_list.nmsrs; 1684 n = msr_list.nmsrs;
1685 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs); 1685 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
1686 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) 1686 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
1687 goto out; 1687 goto out;
1688 r = -E2BIG; 1688 r = -E2BIG;
1689 if (n < msr_list.nmsrs) 1689 if (n < msr_list.nmsrs)
1690 goto out; 1690 goto out;
1691 r = -EFAULT; 1691 r = -EFAULT;
1692 if (copy_to_user(user_msr_list->indices, &msrs_to_save, 1692 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
1693 num_msrs_to_save * sizeof(u32))) 1693 num_msrs_to_save * sizeof(u32)))
1694 goto out; 1694 goto out;
1695 if (copy_to_user(user_msr_list->indices + num_msrs_to_save, 1695 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
1696 &emulated_msrs, 1696 &emulated_msrs,
1697 ARRAY_SIZE(emulated_msrs) * sizeof(u32))) 1697 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
1698 goto out; 1698 goto out;
1699 r = 0; 1699 r = 0;
1700 break; 1700 break;
1701 } 1701 }
1702 case KVM_GET_SUPPORTED_CPUID: { 1702 case KVM_GET_SUPPORTED_CPUID: {
1703 struct kvm_cpuid2 __user *cpuid_arg = argp; 1703 struct kvm_cpuid2 __user *cpuid_arg = argp;
1704 struct kvm_cpuid2 cpuid; 1704 struct kvm_cpuid2 cpuid;
1705 1705
1706 r = -EFAULT; 1706 r = -EFAULT;
1707 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1707 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1708 goto out; 1708 goto out;
1709 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 1709 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
1710 cpuid_arg->entries); 1710 cpuid_arg->entries);
1711 if (r) 1711 if (r)
1712 goto out; 1712 goto out;
1713 1713
1714 r = -EFAULT; 1714 r = -EFAULT;
1715 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) 1715 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
1716 goto out; 1716 goto out;
1717 r = 0; 1717 r = 0;
1718 break; 1718 break;
1719 } 1719 }
1720 case KVM_X86_GET_MCE_CAP_SUPPORTED: { 1720 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
1721 u64 mce_cap; 1721 u64 mce_cap;
1722 1722
1723 mce_cap = KVM_MCE_CAP_SUPPORTED; 1723 mce_cap = KVM_MCE_CAP_SUPPORTED;
1724 r = -EFAULT; 1724 r = -EFAULT;
1725 if (copy_to_user(argp, &mce_cap, sizeof mce_cap)) 1725 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
1726 goto out; 1726 goto out;
1727 r = 0; 1727 r = 0;
1728 break; 1728 break;
1729 } 1729 }
1730 default: 1730 default:
1731 r = -EINVAL; 1731 r = -EINVAL;
1732 } 1732 }
1733 out: 1733 out:
1734 return r; 1734 return r;
1735 } 1735 }
1736 1736
1737 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1737 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1738 { 1738 {
1739 kvm_x86_ops->vcpu_load(vcpu, cpu); 1739 kvm_x86_ops->vcpu_load(vcpu, cpu);
1740 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { 1740 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
1741 unsigned long khz = cpufreq_quick_get(cpu); 1741 unsigned long khz = cpufreq_quick_get(cpu);
1742 if (!khz) 1742 if (!khz)
1743 khz = tsc_khz; 1743 khz = tsc_khz;
1744 per_cpu(cpu_tsc_khz, cpu) = khz; 1744 per_cpu(cpu_tsc_khz, cpu) = khz;
1745 } 1745 }
1746 kvm_request_guest_time_update(vcpu); 1746 kvm_request_guest_time_update(vcpu);
1747 } 1747 }
1748 1748
1749 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1749 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1750 { 1750 {
1751 kvm_x86_ops->vcpu_put(vcpu); 1751 kvm_x86_ops->vcpu_put(vcpu);
1752 kvm_put_guest_fpu(vcpu); 1752 kvm_put_guest_fpu(vcpu);
1753 } 1753 }
1754 1754
1755 static int is_efer_nx(void) 1755 static int is_efer_nx(void)
1756 { 1756 {
1757 unsigned long long efer = 0; 1757 unsigned long long efer = 0;
1758 1758
1759 rdmsrl_safe(MSR_EFER, &efer); 1759 rdmsrl_safe(MSR_EFER, &efer);
1760 return efer & EFER_NX; 1760 return efer & EFER_NX;
1761 } 1761 }
1762 1762
1763 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) 1763 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
1764 { 1764 {
1765 int i; 1765 int i;
1766 struct kvm_cpuid_entry2 *e, *entry; 1766 struct kvm_cpuid_entry2 *e, *entry;
1767 1767
1768 entry = NULL; 1768 entry = NULL;
1769 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { 1769 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
1770 e = &vcpu->arch.cpuid_entries[i]; 1770 e = &vcpu->arch.cpuid_entries[i];
1771 if (e->function == 0x80000001) { 1771 if (e->function == 0x80000001) {
1772 entry = e; 1772 entry = e;
1773 break; 1773 break;
1774 } 1774 }
1775 } 1775 }
1776 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { 1776 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
1777 entry->edx &= ~(1 << 20); 1777 entry->edx &= ~(1 << 20);
1778 printk(KERN_INFO "kvm: guest NX capability removed\n"); 1778 printk(KERN_INFO "kvm: guest NX capability removed\n");
1779 } 1779 }
1780 } 1780 }
1781 1781
1782 /* when an old userspace process fills a new kernel module */ 1782 /* when an old userspace process fills a new kernel module */
1783 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 1783 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1784 struct kvm_cpuid *cpuid, 1784 struct kvm_cpuid *cpuid,
1785 struct kvm_cpuid_entry __user *entries) 1785 struct kvm_cpuid_entry __user *entries)
1786 { 1786 {
1787 int r, i; 1787 int r, i;
1788 struct kvm_cpuid_entry *cpuid_entries; 1788 struct kvm_cpuid_entry *cpuid_entries;
1789 1789
1790 r = -E2BIG; 1790 r = -E2BIG;
1791 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 1791 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1792 goto out; 1792 goto out;
1793 r = -ENOMEM; 1793 r = -ENOMEM;
1794 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); 1794 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
1795 if (!cpuid_entries) 1795 if (!cpuid_entries)
1796 goto out; 1796 goto out;
1797 r = -EFAULT; 1797 r = -EFAULT;
1798 if (copy_from_user(cpuid_entries, entries, 1798 if (copy_from_user(cpuid_entries, entries,
1799 cpuid->nent * sizeof(struct kvm_cpuid_entry))) 1799 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
1800 goto out_free; 1800 goto out_free;
1801 vcpu_load(vcpu); 1801 vcpu_load(vcpu);
1802 for (i = 0; i < cpuid->nent; i++) { 1802 for (i = 0; i < cpuid->nent; i++) {
1803 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; 1803 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
1804 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; 1804 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
1805 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; 1805 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
1806 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; 1806 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
1807 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx; 1807 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
1808 vcpu->arch.cpuid_entries[i].index = 0; 1808 vcpu->arch.cpuid_entries[i].index = 0;
1809 vcpu->arch.cpuid_entries[i].flags = 0; 1809 vcpu->arch.cpuid_entries[i].flags = 0;
1810 vcpu->arch.cpuid_entries[i].padding[0] = 0; 1810 vcpu->arch.cpuid_entries[i].padding[0] = 0;
1811 vcpu->arch.cpuid_entries[i].padding[1] = 0; 1811 vcpu->arch.cpuid_entries[i].padding[1] = 0;
1812 vcpu->arch.cpuid_entries[i].padding[2] = 0; 1812 vcpu->arch.cpuid_entries[i].padding[2] = 0;
1813 } 1813 }
1814 vcpu->arch.cpuid_nent = cpuid->nent; 1814 vcpu->arch.cpuid_nent = cpuid->nent;
1815 cpuid_fix_nx_cap(vcpu); 1815 cpuid_fix_nx_cap(vcpu);
1816 r = 0; 1816 r = 0;
1817 kvm_apic_set_version(vcpu); 1817 kvm_apic_set_version(vcpu);
1818 kvm_x86_ops->cpuid_update(vcpu); 1818 kvm_x86_ops->cpuid_update(vcpu);
1819 vcpu_put(vcpu); 1819 vcpu_put(vcpu);
1820 1820
1821 out_free: 1821 out_free:
1822 vfree(cpuid_entries); 1822 vfree(cpuid_entries);
1823 out: 1823 out:
1824 return r; 1824 return r;
1825 } 1825 }
1826 1826
1827 static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, 1827 static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1828 struct kvm_cpuid2 *cpuid, 1828 struct kvm_cpuid2 *cpuid,
1829 struct kvm_cpuid_entry2 __user *entries) 1829 struct kvm_cpuid_entry2 __user *entries)
1830 { 1830 {
1831 int r; 1831 int r;
1832 1832
1833 r = -E2BIG; 1833 r = -E2BIG;
1834 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 1834 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1835 goto out; 1835 goto out;
1836 r = -EFAULT; 1836 r = -EFAULT;
1837 if (copy_from_user(&vcpu->arch.cpuid_entries, entries, 1837 if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
1838 cpuid->nent * sizeof(struct kvm_cpuid_entry2))) 1838 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
1839 goto out; 1839 goto out;
1840 vcpu_load(vcpu); 1840 vcpu_load(vcpu);
1841 vcpu->arch.cpuid_nent = cpuid->nent; 1841 vcpu->arch.cpuid_nent = cpuid->nent;
1842 kvm_apic_set_version(vcpu); 1842 kvm_apic_set_version(vcpu);
1843 kvm_x86_ops->cpuid_update(vcpu); 1843 kvm_x86_ops->cpuid_update(vcpu);
1844 vcpu_put(vcpu); 1844 vcpu_put(vcpu);
1845 return 0; 1845 return 0;
1846 1846
1847 out: 1847 out:
1848 return r; 1848 return r;
1849 } 1849 }
1850 1850
1851 static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, 1851 static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1852 struct kvm_cpuid2 *cpuid, 1852 struct kvm_cpuid2 *cpuid,
1853 struct kvm_cpuid_entry2 __user *entries) 1853 struct kvm_cpuid_entry2 __user *entries)
1854 { 1854 {
1855 int r; 1855 int r;
1856 1856
1857 vcpu_load(vcpu); 1857 vcpu_load(vcpu);
1858 r = -E2BIG; 1858 r = -E2BIG;
1859 if (cpuid->nent < vcpu->arch.cpuid_nent) 1859 if (cpuid->nent < vcpu->arch.cpuid_nent)
1860 goto out; 1860 goto out;
1861 r = -EFAULT; 1861 r = -EFAULT;
1862 if (copy_to_user(entries, &vcpu->arch.cpuid_entries, 1862 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
1863 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) 1863 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
1864 goto out; 1864 goto out;
1865 return 0; 1865 return 0;
1866 1866
1867 out: 1867 out:
1868 cpuid->nent = vcpu->arch.cpuid_nent; 1868 cpuid->nent = vcpu->arch.cpuid_nent;
1869 vcpu_put(vcpu); 1869 vcpu_put(vcpu);
1870 return r; 1870 return r;
1871 } 1871 }
1872 1872
1873 static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, 1873 static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1874 u32 index) 1874 u32 index)
1875 { 1875 {
1876 entry->function = function; 1876 entry->function = function;
1877 entry->index = index; 1877 entry->index = index;
1878 cpuid_count(entry->function, entry->index, 1878 cpuid_count(entry->function, entry->index,
1879 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); 1879 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
1880 entry->flags = 0; 1880 entry->flags = 0;
1881 } 1881 }
1882 1882
1883 #define F(x) bit(X86_FEATURE_##x) 1883 #define F(x) bit(X86_FEATURE_##x)
1884 1884
1885 static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 1885 static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1886 u32 index, int *nent, int maxnent) 1886 u32 index, int *nent, int maxnent)
1887 { 1887 {
1888 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 1888 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
1889 #ifdef CONFIG_X86_64 1889 #ifdef CONFIG_X86_64
1890 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) 1890 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
1891 ? F(GBPAGES) : 0; 1891 ? F(GBPAGES) : 0;
1892 unsigned f_lm = F(LM); 1892 unsigned f_lm = F(LM);
1893 #else 1893 #else
1894 unsigned f_gbpages = 0; 1894 unsigned f_gbpages = 0;
1895 unsigned f_lm = 0; 1895 unsigned f_lm = 0;
1896 #endif 1896 #endif
1897 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; 1897 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
1898 1898
1899 /* cpuid 1.edx */ 1899 /* cpuid 1.edx */
1900 const u32 kvm_supported_word0_x86_features = 1900 const u32 kvm_supported_word0_x86_features =
1901 F(FPU) | F(VME) | F(DE) | F(PSE) | 1901 F(FPU) | F(VME) | F(DE) | F(PSE) |
1902 F(TSC) | F(MSR) | F(PAE) | F(MCE) | 1902 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
1903 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | 1903 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
1904 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1904 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1905 F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | 1905 F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
1906 0 /* Reserved, DS, ACPI */ | F(MMX) | 1906 0 /* Reserved, DS, ACPI */ | F(MMX) |
1907 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | 1907 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
1908 0 /* HTT, TM, Reserved, PBE */; 1908 0 /* HTT, TM, Reserved, PBE */;
1909 /* cpuid 0x80000001.edx */ 1909 /* cpuid 0x80000001.edx */
1910 const u32 kvm_supported_word1_x86_features = 1910 const u32 kvm_supported_word1_x86_features =
1911 F(FPU) | F(VME) | F(DE) | F(PSE) | 1911 F(FPU) | F(VME) | F(DE) | F(PSE) |
1912 F(TSC) | F(MSR) | F(PAE) | F(MCE) | 1912 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
1913 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | 1913 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
1914 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1914 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1915 F(PAT) | F(PSE36) | 0 /* Reserved */ | 1915 F(PAT) | F(PSE36) | 0 /* Reserved */ |
1916 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | 1916 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
1917 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | 1917 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
1918 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1918 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
1919 /* cpuid 1.ecx */ 1919 /* cpuid 1.ecx */
1920 const u32 kvm_supported_word4_x86_features = 1920 const u32 kvm_supported_word4_x86_features =
1921 F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | 1921 F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
1922 0 /* DS-CPL, VMX, SMX, EST */ | 1922 0 /* DS-CPL, VMX, SMX, EST */ |
1923 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | 1923 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
1924 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 1924 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
1925 0 /* Reserved, DCA */ | F(XMM4_1) | 1925 0 /* Reserved, DCA */ | F(XMM4_1) |
1926 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 1926 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
1927 0 /* Reserved, XSAVE, OSXSAVE */; 1927 0 /* Reserved, XSAVE, OSXSAVE */;
1928 /* cpuid 0x80000001.ecx */ 1928 /* cpuid 0x80000001.ecx */
1929 const u32 kvm_supported_word6_x86_features = 1929 const u32 kvm_supported_word6_x86_features =
1930 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | 1930 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
1931 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | 1931 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
1932 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | 1932 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
1933 0 /* SKINIT */ | 0 /* WDT */; 1933 0 /* SKINIT */ | 0 /* WDT */;
1934 1934
1935 /* all calls to cpuid_count() should be made on the same cpu */ 1935 /* all calls to cpuid_count() should be made on the same cpu */
1936 get_cpu(); 1936 get_cpu();
1937 do_cpuid_1_ent(entry, function, index); 1937 do_cpuid_1_ent(entry, function, index);
1938 ++*nent; 1938 ++*nent;
1939 1939
1940 switch (function) { 1940 switch (function) {
1941 case 0: 1941 case 0:
1942 entry->eax = min(entry->eax, (u32)0xb); 1942 entry->eax = min(entry->eax, (u32)0xb);
1943 break; 1943 break;
1944 case 1: 1944 case 1:
1945 entry->edx &= kvm_supported_word0_x86_features; 1945 entry->edx &= kvm_supported_word0_x86_features;
1946 entry->ecx &= kvm_supported_word4_x86_features; 1946 entry->ecx &= kvm_supported_word4_x86_features;
1947 /* we support x2apic emulation even if host does not support 1947 /* we support x2apic emulation even if host does not support
1948 * it since we emulate x2apic in software */ 1948 * it since we emulate x2apic in software */
1949 entry->ecx |= F(X2APIC); 1949 entry->ecx |= F(X2APIC);
1950 break; 1950 break;
1951 /* function 2 entries are STATEFUL. That is, repeated cpuid commands 1951 /* function 2 entries are STATEFUL. That is, repeated cpuid commands
1952 * may return different values. This forces us to get_cpu() before 1952 * may return different values. This forces us to get_cpu() before
1953 * issuing the first command, and also to emulate this annoying behavior 1953 * issuing the first command, and also to emulate this annoying behavior
1954 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ 1954 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
1955 case 2: { 1955 case 2: {
1956 int t, times = entry->eax & 0xff; 1956 int t, times = entry->eax & 0xff;
1957 1957
1958 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1958 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
1959 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; 1959 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
1960 for (t = 1; t < times && *nent < maxnent; ++t) { 1960 for (t = 1; t < times && *nent < maxnent; ++t) {
1961 do_cpuid_1_ent(&entry[t], function, 0); 1961 do_cpuid_1_ent(&entry[t], function, 0);
1962 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1962 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
1963 ++*nent; 1963 ++*nent;
1964 } 1964 }
1965 break; 1965 break;
1966 } 1966 }
1967 /* function 4 and 0xb have additional index. */ 1967 /* function 4 and 0xb have additional index. */
1968 case 4: { 1968 case 4: {
1969 int i, cache_type; 1969 int i, cache_type;
1970 1970
1971 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1971 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1972 /* read more entries until cache_type is zero */ 1972 /* read more entries until cache_type is zero */
1973 for (i = 1; *nent < maxnent; ++i) { 1973 for (i = 1; *nent < maxnent; ++i) {
1974 cache_type = entry[i - 1].eax & 0x1f; 1974 cache_type = entry[i - 1].eax & 0x1f;
1975 if (!cache_type) 1975 if (!cache_type)
1976 break; 1976 break;
1977 do_cpuid_1_ent(&entry[i], function, i); 1977 do_cpuid_1_ent(&entry[i], function, i);
1978 entry[i].flags |= 1978 entry[i].flags |=
1979 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1979 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1980 ++*nent; 1980 ++*nent;
1981 } 1981 }
1982 break; 1982 break;
1983 } 1983 }
1984 case 0xb: { 1984 case 0xb: {
1985 int i, level_type; 1985 int i, level_type;
1986 1986
1987 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1987 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1988 /* read more entries until level_type is zero */ 1988 /* read more entries until level_type is zero */
1989 for (i = 1; *nent < maxnent; ++i) { 1989 for (i = 1; *nent < maxnent; ++i) {
1990 level_type = entry[i - 1].ecx & 0xff00; 1990 level_type = entry[i - 1].ecx & 0xff00;
1991 if (!level_type) 1991 if (!level_type)
1992 break; 1992 break;
1993 do_cpuid_1_ent(&entry[i], function, i); 1993 do_cpuid_1_ent(&entry[i], function, i);
1994 entry[i].flags |= 1994 entry[i].flags |=
1995 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1995 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1996 ++*nent; 1996 ++*nent;
1997 } 1997 }
1998 break; 1998 break;
1999 } 1999 }
2000 case KVM_CPUID_SIGNATURE: { 2000 case KVM_CPUID_SIGNATURE: {
2001 char signature[12] = "KVMKVMKVM\0\0"; 2001 char signature[12] = "KVMKVMKVM\0\0";
2002 u32 *sigptr = (u32 *)signature; 2002 u32 *sigptr = (u32 *)signature;
2003 entry->eax = 0; 2003 entry->eax = 0;
2004 entry->ebx = sigptr[0]; 2004 entry->ebx = sigptr[0];
2005 entry->ecx = sigptr[1]; 2005 entry->ecx = sigptr[1];
2006 entry->edx = sigptr[2]; 2006 entry->edx = sigptr[2];
2007 break; 2007 break;
2008 } 2008 }
2009 case KVM_CPUID_FEATURES: 2009 case KVM_CPUID_FEATURES:
2010 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | 2010 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
2011 (1 << KVM_FEATURE_NOP_IO_DELAY) | 2011 (1 << KVM_FEATURE_NOP_IO_DELAY) |
2012 (1 << KVM_FEATURE_CLOCKSOURCE2) | 2012 (1 << KVM_FEATURE_CLOCKSOURCE2) |
2013 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); 2013 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
2014 entry->ebx = 0; 2014 entry->ebx = 0;
2015 entry->ecx = 0; 2015 entry->ecx = 0;
2016 entry->edx = 0; 2016 entry->edx = 0;
2017 break; 2017 break;
2018 case 0x80000000: 2018 case 0x80000000:
2019 entry->eax = min(entry->eax, 0x8000001a); 2019 entry->eax = min(entry->eax, 0x8000001a);
2020 break; 2020 break;
2021 case 0x80000001: 2021 case 0x80000001:
2022 entry->edx &= kvm_supported_word1_x86_features; 2022 entry->edx &= kvm_supported_word1_x86_features;
2023 entry->ecx &= kvm_supported_word6_x86_features; 2023 entry->ecx &= kvm_supported_word6_x86_features;
2024 break; 2024 break;
2025 } 2025 }
2026 2026
2027 kvm_x86_ops->set_supported_cpuid(function, entry); 2027 kvm_x86_ops->set_supported_cpuid(function, entry);
2028 2028
2029 put_cpu(); 2029 put_cpu();
2030 } 2030 }
2031 2031
2032 #undef F 2032 #undef F
2033 2033
2034 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 2034 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
2035 struct kvm_cpuid_entry2 __user *entries) 2035 struct kvm_cpuid_entry2 __user *entries)
2036 { 2036 {
2037 struct kvm_cpuid_entry2 *cpuid_entries; 2037 struct kvm_cpuid_entry2 *cpuid_entries;
2038 int limit, nent = 0, r = -E2BIG; 2038 int limit, nent = 0, r = -E2BIG;
2039 u32 func; 2039 u32 func;
2040 2040
2041 if (cpuid->nent < 1) 2041 if (cpuid->nent < 1)
2042 goto out; 2042 goto out;
2043 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 2043 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2044 cpuid->nent = KVM_MAX_CPUID_ENTRIES; 2044 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
2045 r = -ENOMEM; 2045 r = -ENOMEM;
2046 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 2046 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
2047 if (!cpuid_entries) 2047 if (!cpuid_entries)
2048 goto out; 2048 goto out;
2049 2049
2050 do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent); 2050 do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
2051 limit = cpuid_entries[0].eax; 2051 limit = cpuid_entries[0].eax;
2052 for (func = 1; func <= limit && nent < cpuid->nent; ++func) 2052 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
2053 do_cpuid_ent(&cpuid_entries[nent], func, 0, 2053 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2054 &nent, cpuid->nent); 2054 &nent, cpuid->nent);
2055 r = -E2BIG; 2055 r = -E2BIG;
2056 if (nent >= cpuid->nent) 2056 if (nent >= cpuid->nent)
2057 goto out_free; 2057 goto out_free;
2058 2058
2059 do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent); 2059 do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
2060 limit = cpuid_entries[nent - 1].eax; 2060 limit = cpuid_entries[nent - 1].eax;
2061 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) 2061 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
2062 do_cpuid_ent(&cpuid_entries[nent], func, 0, 2062 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2063 &nent, cpuid->nent); 2063 &nent, cpuid->nent);
2064 2064
2065 2065
2066 2066
2067 r = -E2BIG; 2067 r = -E2BIG;
2068 if (nent >= cpuid->nent) 2068 if (nent >= cpuid->nent)
2069 goto out_free; 2069 goto out_free;
2070 2070
2071 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, 2071 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
2072 cpuid->nent); 2072 cpuid->nent);
2073 2073
2074 r = -E2BIG; 2074 r = -E2BIG;
2075 if (nent >= cpuid->nent) 2075 if (nent >= cpuid->nent)
2076 goto out_free; 2076 goto out_free;
2077 2077
2078 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, 2078 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent,
2079 cpuid->nent); 2079 cpuid->nent);
2080 2080
2081 r = -E2BIG; 2081 r = -E2BIG;
2082 if (nent >= cpuid->nent) 2082 if (nent >= cpuid->nent)
2083 goto out_free; 2083 goto out_free;
2084 2084
2085 r = -EFAULT; 2085 r = -EFAULT;
2086 if (copy_to_user(entries, cpuid_entries, 2086 if (copy_to_user(entries, cpuid_entries,
2087 nent * sizeof(struct kvm_cpuid_entry2))) 2087 nent * sizeof(struct kvm_cpuid_entry2)))
2088 goto out_free; 2088 goto out_free;
2089 cpuid->nent = nent; 2089 cpuid->nent = nent;
2090 r = 0; 2090 r = 0;
2091 2091
2092 out_free: 2092 out_free:
2093 vfree(cpuid_entries); 2093 vfree(cpuid_entries);
2094 out: 2094 out:
2095 return r; 2095 return r;
2096 } 2096 }
2097 2097
2098 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 2098 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2099 struct kvm_lapic_state *s) 2099 struct kvm_lapic_state *s)
2100 { 2100 {
2101 vcpu_load(vcpu); 2101 vcpu_load(vcpu);
2102 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); 2102 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2103 vcpu_put(vcpu); 2103 vcpu_put(vcpu);
2104 2104
2105 return 0; 2105 return 0;
2106 } 2106 }
2107 2107
2108 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, 2108 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2109 struct kvm_lapic_state *s) 2109 struct kvm_lapic_state *s)
2110 { 2110 {
2111 vcpu_load(vcpu); 2111 vcpu_load(vcpu);
2112 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 2112 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2113 kvm_apic_post_state_restore(vcpu); 2113 kvm_apic_post_state_restore(vcpu);
2114 update_cr8_intercept(vcpu); 2114 update_cr8_intercept(vcpu);
2115 vcpu_put(vcpu); 2115 vcpu_put(vcpu);
2116 2116
2117 return 0; 2117 return 0;
2118 } 2118 }
2119 2119
2120 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, 2120 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2121 struct kvm_interrupt *irq) 2121 struct kvm_interrupt *irq)
2122 { 2122 {
2123 if (irq->irq < 0 || irq->irq >= 256) 2123 if (irq->irq < 0 || irq->irq >= 256)
2124 return -EINVAL; 2124 return -EINVAL;
2125 if (irqchip_in_kernel(vcpu->kvm)) 2125 if (irqchip_in_kernel(vcpu->kvm))
2126 return -ENXIO; 2126 return -ENXIO;
2127 vcpu_load(vcpu); 2127 vcpu_load(vcpu);
2128 2128
2129 kvm_queue_interrupt(vcpu, irq->irq, false); 2129 kvm_queue_interrupt(vcpu, irq->irq, false);
2130 2130
2131 vcpu_put(vcpu); 2131 vcpu_put(vcpu);
2132 2132
2133 return 0; 2133 return 0;
2134 } 2134 }
2135 2135
2136 static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) 2136 static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2137 { 2137 {
2138 vcpu_load(vcpu); 2138 vcpu_load(vcpu);
2139 kvm_inject_nmi(vcpu); 2139 kvm_inject_nmi(vcpu);
2140 vcpu_put(vcpu); 2140 vcpu_put(vcpu);
2141 2141
2142 return 0; 2142 return 0;
2143 } 2143 }
2144 2144
2145 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, 2145 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2146 struct kvm_tpr_access_ctl *tac) 2146 struct kvm_tpr_access_ctl *tac)
2147 { 2147 {
2148 if (tac->flags) 2148 if (tac->flags)
2149 return -EINVAL; 2149 return -EINVAL;
2150 vcpu->arch.tpr_access_reporting = !!tac->enabled; 2150 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2151 return 0; 2151 return 0;
2152 } 2152 }
2153 2153
2154 static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, 2154 static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2155 u64 mcg_cap) 2155 u64 mcg_cap)
2156 { 2156 {
2157 int r; 2157 int r;
2158 unsigned bank_num = mcg_cap & 0xff, bank; 2158 unsigned bank_num = mcg_cap & 0xff, bank;
2159 2159
2160 vcpu_load(vcpu); 2160 vcpu_load(vcpu);
2161 r = -EINVAL; 2161 r = -EINVAL;
2162 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) 2162 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2163 goto out; 2163 goto out;
2164 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) 2164 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2165 goto out; 2165 goto out;
2166 r = 0; 2166 r = 0;
2167 vcpu->arch.mcg_cap = mcg_cap; 2167 vcpu->arch.mcg_cap = mcg_cap;
2168 /* Init IA32_MCG_CTL to all 1s */ 2168 /* Init IA32_MCG_CTL to all 1s */
2169 if (mcg_cap & MCG_CTL_P) 2169 if (mcg_cap & MCG_CTL_P)
2170 vcpu->arch.mcg_ctl = ~(u64)0; 2170 vcpu->arch.mcg_ctl = ~(u64)0;
2171 /* Init IA32_MCi_CTL to all 1s */ 2171 /* Init IA32_MCi_CTL to all 1s */
2172 for (bank = 0; bank < bank_num; bank++) 2172 for (bank = 0; bank < bank_num; bank++)
2173 vcpu->arch.mce_banks[bank*4] = ~(u64)0; 2173 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2174 out: 2174 out:
2175 vcpu_put(vcpu); 2175 vcpu_put(vcpu);
2176 return r; 2176 return r;
2177 } 2177 }
2178 2178
2179 static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, 2179 static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2180 struct kvm_x86_mce *mce) 2180 struct kvm_x86_mce *mce)
2181 { 2181 {
2182 u64 mcg_cap = vcpu->arch.mcg_cap; 2182 u64 mcg_cap = vcpu->arch.mcg_cap;
2183 unsigned bank_num = mcg_cap & 0xff; 2183 unsigned bank_num = mcg_cap & 0xff;
2184 u64 *banks = vcpu->arch.mce_banks; 2184 u64 *banks = vcpu->arch.mce_banks;
2185 2185
2186 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL)) 2186 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2187 return -EINVAL; 2187 return -EINVAL;
2188 /* 2188 /*
2189 * if IA32_MCG_CTL is not all 1s, the uncorrected error 2189 * if IA32_MCG_CTL is not all 1s, the uncorrected error
2190 * reporting is disabled 2190 * reporting is disabled
2191 */ 2191 */
2192 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) && 2192 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2193 vcpu->arch.mcg_ctl != ~(u64)0) 2193 vcpu->arch.mcg_ctl != ~(u64)0)
2194 return 0; 2194 return 0;
2195 banks += 4 * mce->bank; 2195 banks += 4 * mce->bank;
2196 /* 2196 /*
2197 * if IA32_MCi_CTL is not all 1s, the uncorrected error 2197 * if IA32_MCi_CTL is not all 1s, the uncorrected error
2198 * reporting is disabled for the bank 2198 * reporting is disabled for the bank
2199 */ 2199 */
2200 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0) 2200 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2201 return 0; 2201 return 0;
2202 if (mce->status & MCI_STATUS_UC) { 2202 if (mce->status & MCI_STATUS_UC) {
2203 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || 2203 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2204 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { 2204 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2205 printk(KERN_DEBUG "kvm: set_mce: " 2205 printk(KERN_DEBUG "kvm: set_mce: "
2206 "injects mce exception while " 2206 "injects mce exception while "
2207 "previous one is in progress!\n"); 2207 "previous one is in progress!\n");
2208 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 2208 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
2209 return 0; 2209 return 0;
2210 } 2210 }
2211 if (banks[1] & MCI_STATUS_VAL) 2211 if (banks[1] & MCI_STATUS_VAL)
2212 mce->status |= MCI_STATUS_OVER; 2212 mce->status |= MCI_STATUS_OVER;
2213 banks[2] = mce->addr; 2213 banks[2] = mce->addr;
2214 banks[3] = mce->misc; 2214 banks[3] = mce->misc;
2215 vcpu->arch.mcg_status = mce->mcg_status; 2215 vcpu->arch.mcg_status = mce->mcg_status;
2216 banks[1] = mce->status; 2216 banks[1] = mce->status;
2217 kvm_queue_exception(vcpu, MC_VECTOR); 2217 kvm_queue_exception(vcpu, MC_VECTOR);
2218 } else if (!(banks[1] & MCI_STATUS_VAL) 2218 } else if (!(banks[1] & MCI_STATUS_VAL)
2219 || !(banks[1] & MCI_STATUS_UC)) { 2219 || !(banks[1] & MCI_STATUS_UC)) {
2220 if (banks[1] & MCI_STATUS_VAL) 2220 if (banks[1] & MCI_STATUS_VAL)
2221 mce->status |= MCI_STATUS_OVER; 2221 mce->status |= MCI_STATUS_OVER;
2222 banks[2] = mce->addr; 2222 banks[2] = mce->addr;
2223 banks[3] = mce->misc; 2223 banks[3] = mce->misc;
2224 banks[1] = mce->status; 2224 banks[1] = mce->status;
2225 } else 2225 } else
2226 banks[1] |= MCI_STATUS_OVER; 2226 banks[1] |= MCI_STATUS_OVER;
2227 return 0; 2227 return 0;
2228 } 2228 }
2229 2229
2230 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, 2230 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2231 struct kvm_vcpu_events *events) 2231 struct kvm_vcpu_events *events)
2232 { 2232 {
2233 vcpu_load(vcpu); 2233 vcpu_load(vcpu);
2234 2234
2235 events->exception.injected = 2235 events->exception.injected =
2236 vcpu->arch.exception.pending && 2236 vcpu->arch.exception.pending &&
2237 !kvm_exception_is_soft(vcpu->arch.exception.nr); 2237 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2238 events->exception.nr = vcpu->arch.exception.nr; 2238 events->exception.nr = vcpu->arch.exception.nr;
2239 events->exception.has_error_code = vcpu->arch.exception.has_error_code; 2239 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2240 events->exception.error_code = vcpu->arch.exception.error_code; 2240 events->exception.error_code = vcpu->arch.exception.error_code;
2241 2241
2242 events->interrupt.injected = 2242 events->interrupt.injected =
2243 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; 2243 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2244 events->interrupt.nr = vcpu->arch.interrupt.nr; 2244 events->interrupt.nr = vcpu->arch.interrupt.nr;
2245 events->interrupt.soft = 0; 2245 events->interrupt.soft = 0;
2246 events->interrupt.shadow = 2246 events->interrupt.shadow =
2247 kvm_x86_ops->get_interrupt_shadow(vcpu, 2247 kvm_x86_ops->get_interrupt_shadow(vcpu,
2248 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); 2248 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2249 2249
2250 events->nmi.injected = vcpu->arch.nmi_injected; 2250 events->nmi.injected = vcpu->arch.nmi_injected;
2251 events->nmi.pending = vcpu->arch.nmi_pending; 2251 events->nmi.pending = vcpu->arch.nmi_pending;
2252 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); 2252 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2253 2253
2254 events->sipi_vector = vcpu->arch.sipi_vector; 2254 events->sipi_vector = vcpu->arch.sipi_vector;
2255 2255
2256 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2256 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2257 | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2257 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2258 | KVM_VCPUEVENT_VALID_SHADOW); 2258 | KVM_VCPUEVENT_VALID_SHADOW);
2259 2259
2260 vcpu_put(vcpu); 2260 vcpu_put(vcpu);
2261 } 2261 }
2262 2262
2263 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, 2263 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2264 struct kvm_vcpu_events *events) 2264 struct kvm_vcpu_events *events)
2265 { 2265 {
2266 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING 2266 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2267 | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2267 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2268 | KVM_VCPUEVENT_VALID_SHADOW)) 2268 | KVM_VCPUEVENT_VALID_SHADOW))
2269 return -EINVAL; 2269 return -EINVAL;
2270 2270
2271 vcpu_load(vcpu); 2271 vcpu_load(vcpu);
2272 2272
2273 vcpu->arch.exception.pending = events->exception.injected; 2273 vcpu->arch.exception.pending = events->exception.injected;
2274 vcpu->arch.exception.nr = events->exception.nr; 2274 vcpu->arch.exception.nr = events->exception.nr;
2275 vcpu->arch.exception.has_error_code = events->exception.has_error_code; 2275 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2276 vcpu->arch.exception.error_code = events->exception.error_code; 2276 vcpu->arch.exception.error_code = events->exception.error_code;
2277 2277
2278 vcpu->arch.interrupt.pending = events->interrupt.injected; 2278 vcpu->arch.interrupt.pending = events->interrupt.injected;
2279 vcpu->arch.interrupt.nr = events->interrupt.nr; 2279 vcpu->arch.interrupt.nr = events->interrupt.nr;
2280 vcpu->arch.interrupt.soft = events->interrupt.soft; 2280 vcpu->arch.interrupt.soft = events->interrupt.soft;
2281 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) 2281 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
2282 kvm_pic_clear_isr_ack(vcpu->kvm); 2282 kvm_pic_clear_isr_ack(vcpu->kvm);
2283 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) 2283 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2284 kvm_x86_ops->set_interrupt_shadow(vcpu, 2284 kvm_x86_ops->set_interrupt_shadow(vcpu,
2285 events->interrupt.shadow); 2285 events->interrupt.shadow);
2286 2286
2287 vcpu->arch.nmi_injected = events->nmi.injected; 2287 vcpu->arch.nmi_injected = events->nmi.injected;
2288 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) 2288 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2289 vcpu->arch.nmi_pending = events->nmi.pending; 2289 vcpu->arch.nmi_pending = events->nmi.pending;
2290 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); 2290 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2291 2291
2292 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) 2292 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
2293 vcpu->arch.sipi_vector = events->sipi_vector; 2293 vcpu->arch.sipi_vector = events->sipi_vector;
2294 2294
2295 vcpu_put(vcpu); 2295 vcpu_put(vcpu);
2296 2296
2297 return 0; 2297 return 0;
2298 } 2298 }
2299 2299
2300 static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2300 static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2301 struct kvm_debugregs *dbgregs) 2301 struct kvm_debugregs *dbgregs)
2302 { 2302 {
2303 vcpu_load(vcpu); 2303 vcpu_load(vcpu);
2304 2304
2305 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2305 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2306 dbgregs->dr6 = vcpu->arch.dr6; 2306 dbgregs->dr6 = vcpu->arch.dr6;
2307 dbgregs->dr7 = vcpu->arch.dr7; 2307 dbgregs->dr7 = vcpu->arch.dr7;
2308 dbgregs->flags = 0; 2308 dbgregs->flags = 0;
2309 2309
2310 vcpu_put(vcpu); 2310 vcpu_put(vcpu);
2311 } 2311 }
2312 2312
2313 static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, 2313 static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2314 struct kvm_debugregs *dbgregs) 2314 struct kvm_debugregs *dbgregs)
2315 { 2315 {
2316 if (dbgregs->flags) 2316 if (dbgregs->flags)
2317 return -EINVAL; 2317 return -EINVAL;
2318 2318
2319 vcpu_load(vcpu); 2319 vcpu_load(vcpu);
2320 2320
2321 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 2321 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2322 vcpu->arch.dr6 = dbgregs->dr6; 2322 vcpu->arch.dr6 = dbgregs->dr6;
2323 vcpu->arch.dr7 = dbgregs->dr7; 2323 vcpu->arch.dr7 = dbgregs->dr7;
2324 2324
2325 vcpu_put(vcpu); 2325 vcpu_put(vcpu);
2326 2326
2327 return 0; 2327 return 0;
2328 } 2328 }
2329 2329
2330 long kvm_arch_vcpu_ioctl(struct file *filp, 2330 long kvm_arch_vcpu_ioctl(struct file *filp,
2331 unsigned int ioctl, unsigned long arg) 2331 unsigned int ioctl, unsigned long arg)
2332 { 2332 {
2333 struct kvm_vcpu *vcpu = filp->private_data; 2333 struct kvm_vcpu *vcpu = filp->private_data;
2334 void __user *argp = (void __user *)arg; 2334 void __user *argp = (void __user *)arg;
2335 int r; 2335 int r;
2336 struct kvm_lapic_state *lapic = NULL; 2336 struct kvm_lapic_state *lapic = NULL;
2337 2337
2338 switch (ioctl) { 2338 switch (ioctl) {
2339 case KVM_GET_LAPIC: { 2339 case KVM_GET_LAPIC: {
2340 r = -EINVAL; 2340 r = -EINVAL;
2341 if (!vcpu->arch.apic) 2341 if (!vcpu->arch.apic)
2342 goto out; 2342 goto out;
2343 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2343 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2344 2344
2345 r = -ENOMEM; 2345 r = -ENOMEM;
2346 if (!lapic) 2346 if (!lapic)
2347 goto out; 2347 goto out;
2348 r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); 2348 r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
2349 if (r) 2349 if (r)
2350 goto out; 2350 goto out;
2351 r = -EFAULT; 2351 r = -EFAULT;
2352 if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) 2352 if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
2353 goto out; 2353 goto out;
2354 r = 0; 2354 r = 0;
2355 break; 2355 break;
2356 } 2356 }
2357 case KVM_SET_LAPIC: { 2357 case KVM_SET_LAPIC: {
2358 r = -EINVAL; 2358 r = -EINVAL;
2359 if (!vcpu->arch.apic) 2359 if (!vcpu->arch.apic)
2360 goto out; 2360 goto out;
2361 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2361 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2362 r = -ENOMEM; 2362 r = -ENOMEM;
2363 if (!lapic) 2363 if (!lapic)
2364 goto out; 2364 goto out;
2365 r = -EFAULT; 2365 r = -EFAULT;
2366 if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) 2366 if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
2367 goto out; 2367 goto out;
2368 r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); 2368 r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
2369 if (r) 2369 if (r)
2370 goto out; 2370 goto out;
2371 r = 0; 2371 r = 0;
2372 break; 2372 break;
2373 } 2373 }
2374 case KVM_INTERRUPT: { 2374 case KVM_INTERRUPT: {
2375 struct kvm_interrupt irq; 2375 struct kvm_interrupt irq;
2376 2376
2377 r = -EFAULT; 2377 r = -EFAULT;
2378 if (copy_from_user(&irq, argp, sizeof irq)) 2378 if (copy_from_user(&irq, argp, sizeof irq))
2379 goto out; 2379 goto out;
2380 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 2380 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
2381 if (r) 2381 if (r)
2382 goto out; 2382 goto out;
2383 r = 0; 2383 r = 0;
2384 break; 2384 break;
2385 } 2385 }
2386 case KVM_NMI: { 2386 case KVM_NMI: {
2387 r = kvm_vcpu_ioctl_nmi(vcpu); 2387 r = kvm_vcpu_ioctl_nmi(vcpu);
2388 if (r) 2388 if (r)
2389 goto out; 2389 goto out;
2390 r = 0; 2390 r = 0;
2391 break; 2391 break;
2392 } 2392 }
2393 case KVM_SET_CPUID: { 2393 case KVM_SET_CPUID: {
2394 struct kvm_cpuid __user *cpuid_arg = argp; 2394 struct kvm_cpuid __user *cpuid_arg = argp;
2395 struct kvm_cpuid cpuid; 2395 struct kvm_cpuid cpuid;
2396 2396
2397 r = -EFAULT; 2397 r = -EFAULT;
2398 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2398 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2399 goto out; 2399 goto out;
2400 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); 2400 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
2401 if (r) 2401 if (r)
2402 goto out; 2402 goto out;
2403 break; 2403 break;
2404 } 2404 }
2405 case KVM_SET_CPUID2: { 2405 case KVM_SET_CPUID2: {
2406 struct kvm_cpuid2 __user *cpuid_arg = argp; 2406 struct kvm_cpuid2 __user *cpuid_arg = argp;
2407 struct kvm_cpuid2 cpuid; 2407 struct kvm_cpuid2 cpuid;
2408 2408
2409 r = -EFAULT; 2409 r = -EFAULT;
2410 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2410 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2411 goto out; 2411 goto out;
2412 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, 2412 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
2413 cpuid_arg->entries); 2413 cpuid_arg->entries);
2414 if (r) 2414 if (r)
2415 goto out; 2415 goto out;
2416 break; 2416 break;
2417 } 2417 }
2418 case KVM_GET_CPUID2: { 2418 case KVM_GET_CPUID2: {
2419 struct kvm_cpuid2 __user *cpuid_arg = argp; 2419 struct kvm_cpuid2 __user *cpuid_arg = argp;
2420 struct kvm_cpuid2 cpuid; 2420 struct kvm_cpuid2 cpuid;
2421 2421
2422 r = -EFAULT; 2422 r = -EFAULT;
2423 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2423 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2424 goto out; 2424 goto out;
2425 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, 2425 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
2426 cpuid_arg->entries); 2426 cpuid_arg->entries);
2427 if (r) 2427 if (r)
2428 goto out; 2428 goto out;
2429 r = -EFAULT; 2429 r = -EFAULT;
2430 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) 2430 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2431 goto out; 2431 goto out;
2432 r = 0; 2432 r = 0;
2433 break; 2433 break;
2434 } 2434 }
2435 case KVM_GET_MSRS: 2435 case KVM_GET_MSRS:
2436 r = msr_io(vcpu, argp, kvm_get_msr, 1); 2436 r = msr_io(vcpu, argp, kvm_get_msr, 1);
2437 break; 2437 break;
2438 case KVM_SET_MSRS: 2438 case KVM_SET_MSRS:
2439 r = msr_io(vcpu, argp, do_set_msr, 0); 2439 r = msr_io(vcpu, argp, do_set_msr, 0);
2440 break; 2440 break;
2441 case KVM_TPR_ACCESS_REPORTING: { 2441 case KVM_TPR_ACCESS_REPORTING: {
2442 struct kvm_tpr_access_ctl tac; 2442 struct kvm_tpr_access_ctl tac;
2443 2443
2444 r = -EFAULT; 2444 r = -EFAULT;
2445 if (copy_from_user(&tac, argp, sizeof tac)) 2445 if (copy_from_user(&tac, argp, sizeof tac))
2446 goto out; 2446 goto out;
2447 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac); 2447 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
2448 if (r) 2448 if (r)
2449 goto out; 2449 goto out;
2450 r = -EFAULT; 2450 r = -EFAULT;
2451 if (copy_to_user(argp, &tac, sizeof tac)) 2451 if (copy_to_user(argp, &tac, sizeof tac))
2452 goto out; 2452 goto out;
2453 r = 0; 2453 r = 0;
2454 break; 2454 break;
2455 }; 2455 };
2456 case KVM_SET_VAPIC_ADDR: { 2456 case KVM_SET_VAPIC_ADDR: {
2457 struct kvm_vapic_addr va; 2457 struct kvm_vapic_addr va;
2458 2458
2459 r = -EINVAL; 2459 r = -EINVAL;
2460 if (!irqchip_in_kernel(vcpu->kvm)) 2460 if (!irqchip_in_kernel(vcpu->kvm))
2461 goto out; 2461 goto out;
2462 r = -EFAULT; 2462 r = -EFAULT;
2463 if (copy_from_user(&va, argp, sizeof va)) 2463 if (copy_from_user(&va, argp, sizeof va))
2464 goto out; 2464 goto out;
2465 r = 0; 2465 r = 0;
2466 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); 2466 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
2467 break; 2467 break;
2468 } 2468 }
2469 case KVM_X86_SETUP_MCE: { 2469 case KVM_X86_SETUP_MCE: {
2470 u64 mcg_cap; 2470 u64 mcg_cap;
2471 2471
2472 r = -EFAULT; 2472 r = -EFAULT;
2473 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap)) 2473 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
2474 goto out; 2474 goto out;
2475 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap); 2475 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
2476 break; 2476 break;
2477 } 2477 }
2478 case KVM_X86_SET_MCE: { 2478 case KVM_X86_SET_MCE: {
2479 struct kvm_x86_mce mce; 2479 struct kvm_x86_mce mce;
2480 2480
2481 r = -EFAULT; 2481 r = -EFAULT;
2482 if (copy_from_user(&mce, argp, sizeof mce)) 2482 if (copy_from_user(&mce, argp, sizeof mce))
2483 goto out; 2483 goto out;
2484 vcpu_load(vcpu); 2484 vcpu_load(vcpu);
2485 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2485 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
2486 vcpu_put(vcpu); 2486 vcpu_put(vcpu);
2487 break; 2487 break;
2488 } 2488 }
2489 case KVM_GET_VCPU_EVENTS: { 2489 case KVM_GET_VCPU_EVENTS: {
2490 struct kvm_vcpu_events events; 2490 struct kvm_vcpu_events events;
2491 2491
2492 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); 2492 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
2493 2493
2494 r = -EFAULT; 2494 r = -EFAULT;
2495 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) 2495 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
2496 break; 2496 break;
2497 r = 0; 2497 r = 0;
2498 break; 2498 break;
2499 } 2499 }
2500 case KVM_SET_VCPU_EVENTS: { 2500 case KVM_SET_VCPU_EVENTS: {
2501 struct kvm_vcpu_events events; 2501 struct kvm_vcpu_events events;
2502 2502
2503 r = -EFAULT; 2503 r = -EFAULT;
2504 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) 2504 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
2505 break; 2505 break;
2506 2506
2507 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); 2507 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
2508 break; 2508 break;
2509 } 2509 }
2510 case KVM_GET_DEBUGREGS: { 2510 case KVM_GET_DEBUGREGS: {
2511 struct kvm_debugregs dbgregs; 2511 struct kvm_debugregs dbgregs;
2512 2512
2513 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); 2513 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
2514 2514
2515 r = -EFAULT; 2515 r = -EFAULT;
2516 if (copy_to_user(argp, &dbgregs, 2516 if (copy_to_user(argp, &dbgregs,
2517 sizeof(struct kvm_debugregs))) 2517 sizeof(struct kvm_debugregs)))
2518 break; 2518 break;
2519 r = 0; 2519 r = 0;
2520 break; 2520 break;
2521 } 2521 }
2522 case KVM_SET_DEBUGREGS: { 2522 case KVM_SET_DEBUGREGS: {
2523 struct kvm_debugregs dbgregs; 2523 struct kvm_debugregs dbgregs;
2524 2524
2525 r = -EFAULT; 2525 r = -EFAULT;
2526 if (copy_from_user(&dbgregs, argp, 2526 if (copy_from_user(&dbgregs, argp,
2527 sizeof(struct kvm_debugregs))) 2527 sizeof(struct kvm_debugregs)))
2528 break; 2528 break;
2529 2529
2530 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); 2530 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
2531 break; 2531 break;
2532 } 2532 }
2533 default: 2533 default:
2534 r = -EINVAL; 2534 r = -EINVAL;
2535 } 2535 }
2536 out: 2536 out:
2537 kfree(lapic); 2537 kfree(lapic);
2538 return r; 2538 return r;
2539 } 2539 }
2540 2540
2541 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) 2541 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
2542 { 2542 {
2543 int ret; 2543 int ret;
2544 2544
2545 if (addr > (unsigned int)(-3 * PAGE_SIZE)) 2545 if (addr > (unsigned int)(-3 * PAGE_SIZE))
2546 return -1; 2546 return -1;
2547 ret = kvm_x86_ops->set_tss_addr(kvm, addr); 2547 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
2548 return ret; 2548 return ret;
2549 } 2549 }
2550 2550
2551 static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, 2551 static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
2552 u64 ident_addr) 2552 u64 ident_addr)
2553 { 2553 {
2554 kvm->arch.ept_identity_map_addr = ident_addr; 2554 kvm->arch.ept_identity_map_addr = ident_addr;
2555 return 0; 2555 return 0;
2556 } 2556 }
2557 2557
2558 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, 2558 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
2559 u32 kvm_nr_mmu_pages) 2559 u32 kvm_nr_mmu_pages)
2560 { 2560 {
2561 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) 2561 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
2562 return -EINVAL; 2562 return -EINVAL;
2563 2563
2564 mutex_lock(&kvm->slots_lock); 2564 mutex_lock(&kvm->slots_lock);
2565 spin_lock(&kvm->mmu_lock); 2565 spin_lock(&kvm->mmu_lock);
2566 2566
2567 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 2567 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
2568 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 2568 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
2569 2569
2570 spin_unlock(&kvm->mmu_lock); 2570 spin_unlock(&kvm->mmu_lock);
2571 mutex_unlock(&kvm->slots_lock); 2571 mutex_unlock(&kvm->slots_lock);
2572 return 0; 2572 return 0;
2573 } 2573 }
2574 2574
2575 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) 2575 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
2576 { 2576 {
2577 return kvm->arch.n_alloc_mmu_pages; 2577 return kvm->arch.n_alloc_mmu_pages;
2578 } 2578 }
2579 2579
2580 gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) 2580 gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2581 { 2581 {
2582 int i; 2582 int i;
2583 struct kvm_mem_alias *alias; 2583 struct kvm_mem_alias *alias;
2584 struct kvm_mem_aliases *aliases; 2584 struct kvm_mem_aliases *aliases;
2585 2585
2586 aliases = kvm_aliases(kvm); 2586 aliases = kvm_aliases(kvm);
2587 2587
2588 for (i = 0; i < aliases->naliases; ++i) { 2588 for (i = 0; i < aliases->naliases; ++i) {
2589 alias = &aliases->aliases[i]; 2589 alias = &aliases->aliases[i];
2590 if (alias->flags & KVM_ALIAS_INVALID) 2590 if (alias->flags & KVM_ALIAS_INVALID)
2591 continue; 2591 continue;
2592 if (gfn >= alias->base_gfn 2592 if (gfn >= alias->base_gfn
2593 && gfn < alias->base_gfn + alias->npages) 2593 && gfn < alias->base_gfn + alias->npages)
2594 return alias->target_gfn + gfn - alias->base_gfn; 2594 return alias->target_gfn + gfn - alias->base_gfn;
2595 } 2595 }
2596 return gfn; 2596 return gfn;
2597 } 2597 }
2598 2598
2599 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 2599 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
2600 { 2600 {
2601 int i; 2601 int i;
2602 struct kvm_mem_alias *alias; 2602 struct kvm_mem_alias *alias;
2603 struct kvm_mem_aliases *aliases; 2603 struct kvm_mem_aliases *aliases;
2604 2604
2605 aliases = kvm_aliases(kvm); 2605 aliases = kvm_aliases(kvm);
2606 2606
2607 for (i = 0; i < aliases->naliases; ++i) { 2607 for (i = 0; i < aliases->naliases; ++i) {
2608 alias = &aliases->aliases[i]; 2608 alias = &aliases->aliases[i];
2609 if (gfn >= alias->base_gfn 2609 if (gfn >= alias->base_gfn
2610 && gfn < alias->base_gfn + alias->npages) 2610 && gfn < alias->base_gfn + alias->npages)
2611 return alias->target_gfn + gfn - alias->base_gfn; 2611 return alias->target_gfn + gfn - alias->base_gfn;
2612 } 2612 }
2613 return gfn; 2613 return gfn;
2614 } 2614 }
2615 2615
2616 /* 2616 /*
2617 * Set a new alias region. Aliases map a portion of physical memory into 2617 * Set a new alias region. Aliases map a portion of physical memory into
2618 * another portion. This is useful for memory windows, for example the PC 2618 * another portion. This is useful for memory windows, for example the PC
2619 * VGA region. 2619 * VGA region.
2620 */ 2620 */
2621 static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, 2621 static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
2622 struct kvm_memory_alias *alias) 2622 struct kvm_memory_alias *alias)
2623 { 2623 {
2624 int r, n; 2624 int r, n;
2625 struct kvm_mem_alias *p; 2625 struct kvm_mem_alias *p;
2626 struct kvm_mem_aliases *aliases, *old_aliases; 2626 struct kvm_mem_aliases *aliases, *old_aliases;
2627 2627
2628 r = -EINVAL; 2628 r = -EINVAL;
2629 /* General sanity checks */ 2629 /* General sanity checks */
2630 if (alias->memory_size & (PAGE_SIZE - 1)) 2630 if (alias->memory_size & (PAGE_SIZE - 1))
2631 goto out; 2631 goto out;
2632 if (alias->guest_phys_addr & (PAGE_SIZE - 1)) 2632 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
2633 goto out; 2633 goto out;
2634 if (alias->slot >= KVM_ALIAS_SLOTS) 2634 if (alias->slot >= KVM_ALIAS_SLOTS)
2635 goto out; 2635 goto out;
2636 if (alias->guest_phys_addr + alias->memory_size 2636 if (alias->guest_phys_addr + alias->memory_size
2637 < alias->guest_phys_addr) 2637 < alias->guest_phys_addr)
2638 goto out; 2638 goto out;
2639 if (alias->target_phys_addr + alias->memory_size 2639 if (alias->target_phys_addr + alias->memory_size
2640 < alias->target_phys_addr) 2640 < alias->target_phys_addr)
2641 goto out; 2641 goto out;
2642 2642
2643 r = -ENOMEM; 2643 r = -ENOMEM;
2644 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 2644 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2645 if (!aliases) 2645 if (!aliases)
2646 goto out; 2646 goto out;
2647 2647
2648 mutex_lock(&kvm->slots_lock); 2648 mutex_lock(&kvm->slots_lock);
2649 2649
2650 /* invalidate any gfn reference in case of deletion/shrinking */ 2650 /* invalidate any gfn reference in case of deletion/shrinking */
2651 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); 2651 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2652 aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; 2652 aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
2653 old_aliases = kvm->arch.aliases; 2653 old_aliases = kvm->arch.aliases;
2654 rcu_assign_pointer(kvm->arch.aliases, aliases); 2654 rcu_assign_pointer(kvm->arch.aliases, aliases);
2655 synchronize_srcu_expedited(&kvm->srcu); 2655 synchronize_srcu_expedited(&kvm->srcu);
2656 kvm_mmu_zap_all(kvm); 2656 kvm_mmu_zap_all(kvm);
2657 kfree(old_aliases); 2657 kfree(old_aliases);
2658 2658
2659 r = -ENOMEM; 2659 r = -ENOMEM;
2660 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 2660 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2661 if (!aliases) 2661 if (!aliases)
2662 goto out_unlock; 2662 goto out_unlock;
2663 2663
2664 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); 2664 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2665 2665
2666 p = &aliases->aliases[alias->slot]; 2666 p = &aliases->aliases[alias->slot];
2667 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 2667 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
2668 p->npages = alias->memory_size >> PAGE_SHIFT; 2668 p->npages = alias->memory_size >> PAGE_SHIFT;
2669 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; 2669 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
2670 p->flags &= ~(KVM_ALIAS_INVALID); 2670 p->flags &= ~(KVM_ALIAS_INVALID);
2671 2671
2672 for (n = KVM_ALIAS_SLOTS; n > 0; --n) 2672 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
2673 if (aliases->aliases[n - 1].npages) 2673 if (aliases->aliases[n - 1].npages)
2674 break; 2674 break;
2675 aliases->naliases = n; 2675 aliases->naliases = n;
2676 2676
2677 old_aliases = kvm->arch.aliases; 2677 old_aliases = kvm->arch.aliases;
2678 rcu_assign_pointer(kvm->arch.aliases, aliases); 2678 rcu_assign_pointer(kvm->arch.aliases, aliases);
2679 synchronize_srcu_expedited(&kvm->srcu); 2679 synchronize_srcu_expedited(&kvm->srcu);
2680 kfree(old_aliases); 2680 kfree(old_aliases);
2681 r = 0; 2681 r = 0;
2682 2682
2683 out_unlock: 2683 out_unlock:
2684 mutex_unlock(&kvm->slots_lock); 2684 mutex_unlock(&kvm->slots_lock);
2685 out: 2685 out:
2686 return r; 2686 return r;
2687 } 2687 }
2688 2688
2689 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) 2689 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2690 { 2690 {
2691 int r; 2691 int r;
2692 2692
2693 r = 0; 2693 r = 0;
2694 switch (chip->chip_id) { 2694 switch (chip->chip_id) {
2695 case KVM_IRQCHIP_PIC_MASTER: 2695 case KVM_IRQCHIP_PIC_MASTER:
2696 memcpy(&chip->chip.pic, 2696 memcpy(&chip->chip.pic,
2697 &pic_irqchip(kvm)->pics[0], 2697 &pic_irqchip(kvm)->pics[0],
2698 sizeof(struct kvm_pic_state)); 2698 sizeof(struct kvm_pic_state));
2699 break; 2699 break;
2700 case KVM_IRQCHIP_PIC_SLAVE: 2700 case KVM_IRQCHIP_PIC_SLAVE:
2701 memcpy(&chip->chip.pic, 2701 memcpy(&chip->chip.pic,
2702 &pic_irqchip(kvm)->pics[1], 2702 &pic_irqchip(kvm)->pics[1],
2703 sizeof(struct kvm_pic_state)); 2703 sizeof(struct kvm_pic_state));
2704 break; 2704 break;
2705 case KVM_IRQCHIP_IOAPIC: 2705 case KVM_IRQCHIP_IOAPIC:
2706 r = kvm_get_ioapic(kvm, &chip->chip.ioapic); 2706 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
2707 break; 2707 break;
2708 default: 2708 default:
2709 r = -EINVAL; 2709 r = -EINVAL;
2710 break; 2710 break;
2711 } 2711 }
2712 return r; 2712 return r;
2713 } 2713 }
2714 2714
2715 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) 2715 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2716 { 2716 {
2717 int r; 2717 int r;
2718 2718
2719 r = 0; 2719 r = 0;
2720 switch (chip->chip_id) { 2720 switch (chip->chip_id) {
2721 case KVM_IRQCHIP_PIC_MASTER: 2721 case KVM_IRQCHIP_PIC_MASTER:
2722 raw_spin_lock(&pic_irqchip(kvm)->lock); 2722 raw_spin_lock(&pic_irqchip(kvm)->lock);
2723 memcpy(&pic_irqchip(kvm)->pics[0], 2723 memcpy(&pic_irqchip(kvm)->pics[0],
2724 &chip->chip.pic, 2724 &chip->chip.pic,
2725 sizeof(struct kvm_pic_state)); 2725 sizeof(struct kvm_pic_state));
2726 raw_spin_unlock(&pic_irqchip(kvm)->lock); 2726 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2727 break; 2727 break;
2728 case KVM_IRQCHIP_PIC_SLAVE: 2728 case KVM_IRQCHIP_PIC_SLAVE:
2729 raw_spin_lock(&pic_irqchip(kvm)->lock); 2729 raw_spin_lock(&pic_irqchip(kvm)->lock);
2730 memcpy(&pic_irqchip(kvm)->pics[1], 2730 memcpy(&pic_irqchip(kvm)->pics[1],
2731 &chip->chip.pic, 2731 &chip->chip.pic,
2732 sizeof(struct kvm_pic_state)); 2732 sizeof(struct kvm_pic_state));
2733 raw_spin_unlock(&pic_irqchip(kvm)->lock); 2733 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2734 break; 2734 break;
2735 case KVM_IRQCHIP_IOAPIC: 2735 case KVM_IRQCHIP_IOAPIC:
2736 r = kvm_set_ioapic(kvm, &chip->chip.ioapic); 2736 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
2737 break; 2737 break;
2738 default: 2738 default:
2739 r = -EINVAL; 2739 r = -EINVAL;
2740 break; 2740 break;
2741 } 2741 }
2742 kvm_pic_update_irq(pic_irqchip(kvm)); 2742 kvm_pic_update_irq(pic_irqchip(kvm));
2743 return r; 2743 return r;
2744 } 2744 }
2745 2745
2746 static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) 2746 static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
2747 { 2747 {
2748 int r = 0; 2748 int r = 0;
2749 2749
2750 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2750 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2751 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); 2751 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
2752 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2752 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2753 return r; 2753 return r;
2754 } 2754 }
2755 2755
2756 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) 2756 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
2757 { 2757 {
2758 int r = 0; 2758 int r = 0;
2759 2759
2760 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2760 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2761 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); 2761 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
2762 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); 2762 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
2763 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2763 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2764 return r; 2764 return r;
2765 } 2765 }
2766 2766
2767 static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) 2767 static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
2768 { 2768 {
2769 int r = 0; 2769 int r = 0;
2770 2770
2771 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2771 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2772 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels, 2772 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
2773 sizeof(ps->channels)); 2773 sizeof(ps->channels));
2774 ps->flags = kvm->arch.vpit->pit_state.flags; 2774 ps->flags = kvm->arch.vpit->pit_state.flags;
2775 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2775 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2776 return r; 2776 return r;
2777 } 2777 }
2778 2778
2779 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) 2779 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
2780 { 2780 {
2781 int r = 0, start = 0; 2781 int r = 0, start = 0;
2782 u32 prev_legacy, cur_legacy; 2782 u32 prev_legacy, cur_legacy;
2783 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2783 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2784 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; 2784 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
2785 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY; 2785 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
2786 if (!prev_legacy && cur_legacy) 2786 if (!prev_legacy && cur_legacy)
2787 start = 1; 2787 start = 1;
2788 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels, 2788 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
2789 sizeof(kvm->arch.vpit->pit_state.channels)); 2789 sizeof(kvm->arch.vpit->pit_state.channels));
2790 kvm->arch.vpit->pit_state.flags = ps->flags; 2790 kvm->arch.vpit->pit_state.flags = ps->flags;
2791 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); 2791 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
2792 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2792 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2793 return r; 2793 return r;
2794 } 2794 }
2795 2795
2796 static int kvm_vm_ioctl_reinject(struct kvm *kvm, 2796 static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2797 struct kvm_reinject_control *control) 2797 struct kvm_reinject_control *control)
2798 { 2798 {
2799 if (!kvm->arch.vpit) 2799 if (!kvm->arch.vpit)
2800 return -ENXIO; 2800 return -ENXIO;
2801 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2801 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2802 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; 2802 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
2803 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2803 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2804 return 0; 2804 return 0;
2805 } 2805 }
2806 2806
2807 /* 2807 /*
2808 * Get (and clear) the dirty memory log for a memory slot. 2808 * Get (and clear) the dirty memory log for a memory slot.
2809 */ 2809 */
2810 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2810 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2811 struct kvm_dirty_log *log) 2811 struct kvm_dirty_log *log)
2812 { 2812 {
2813 int r, i; 2813 int r, i;
2814 struct kvm_memory_slot *memslot; 2814 struct kvm_memory_slot *memslot;
2815 unsigned long n; 2815 unsigned long n;
2816 unsigned long is_dirty = 0; 2816 unsigned long is_dirty = 0;
2817 2817
2818 mutex_lock(&kvm->slots_lock); 2818 mutex_lock(&kvm->slots_lock);
2819 2819
2820 r = -EINVAL; 2820 r = -EINVAL;
2821 if (log->slot >= KVM_MEMORY_SLOTS) 2821 if (log->slot >= KVM_MEMORY_SLOTS)
2822 goto out; 2822 goto out;
2823 2823
2824 memslot = &kvm->memslots->memslots[log->slot]; 2824 memslot = &kvm->memslots->memslots[log->slot];
2825 r = -ENOENT; 2825 r = -ENOENT;
2826 if (!memslot->dirty_bitmap) 2826 if (!memslot->dirty_bitmap)
2827 goto out; 2827 goto out;
2828 2828
2829 n = kvm_dirty_bitmap_bytes(memslot); 2829 n = kvm_dirty_bitmap_bytes(memslot);
2830 2830
2831 for (i = 0; !is_dirty && i < n/sizeof(long); i++) 2831 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
2832 is_dirty = memslot->dirty_bitmap[i]; 2832 is_dirty = memslot->dirty_bitmap[i];
2833 2833
2834 /* If nothing is dirty, don't bother messing with page tables. */ 2834 /* If nothing is dirty, don't bother messing with page tables. */
2835 if (is_dirty) { 2835 if (is_dirty) {
2836 struct kvm_memslots *slots, *old_slots; 2836 struct kvm_memslots *slots, *old_slots;
2837 unsigned long *dirty_bitmap; 2837 unsigned long *dirty_bitmap;
2838 2838
2839 spin_lock(&kvm->mmu_lock); 2839 spin_lock(&kvm->mmu_lock);
2840 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2840 kvm_mmu_slot_remove_write_access(kvm, log->slot);
2841 spin_unlock(&kvm->mmu_lock); 2841 spin_unlock(&kvm->mmu_lock);
2842 2842
2843 r = -ENOMEM; 2843 r = -ENOMEM;
2844 dirty_bitmap = vmalloc(n); 2844 dirty_bitmap = vmalloc(n);
2845 if (!dirty_bitmap) 2845 if (!dirty_bitmap)
2846 goto out; 2846 goto out;
2847 memset(dirty_bitmap, 0, n); 2847 memset(dirty_bitmap, 0, n);
2848 2848
2849 r = -ENOMEM; 2849 r = -ENOMEM;
2850 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 2850 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
2851 if (!slots) { 2851 if (!slots) {
2852 vfree(dirty_bitmap); 2852 vfree(dirty_bitmap);
2853 goto out; 2853 goto out;
2854 } 2854 }
2855 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 2855 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
2856 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; 2856 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
2857 2857
2858 old_slots = kvm->memslots; 2858 old_slots = kvm->memslots;
2859 rcu_assign_pointer(kvm->memslots, slots); 2859 rcu_assign_pointer(kvm->memslots, slots);
2860 synchronize_srcu_expedited(&kvm->srcu); 2860 synchronize_srcu_expedited(&kvm->srcu);
2861 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; 2861 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
2862 kfree(old_slots); 2862 kfree(old_slots);
2863 2863
2864 r = -EFAULT; 2864 r = -EFAULT;
2865 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { 2865 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) {
2866 vfree(dirty_bitmap); 2866 vfree(dirty_bitmap);
2867 goto out; 2867 goto out;
2868 } 2868 }
2869 vfree(dirty_bitmap); 2869 vfree(dirty_bitmap);
2870 } else { 2870 } else {
2871 r = -EFAULT; 2871 r = -EFAULT;
2872 if (clear_user(log->dirty_bitmap, n)) 2872 if (clear_user(log->dirty_bitmap, n))
2873 goto out; 2873 goto out;
2874 } 2874 }
2875 2875
2876 r = 0; 2876 r = 0;
2877 out: 2877 out:
2878 mutex_unlock(&kvm->slots_lock); 2878 mutex_unlock(&kvm->slots_lock);
2879 return r; 2879 return r;
2880 } 2880 }
2881 2881
2882 long kvm_arch_vm_ioctl(struct file *filp, 2882 long kvm_arch_vm_ioctl(struct file *filp,
2883 unsigned int ioctl, unsigned long arg) 2883 unsigned int ioctl, unsigned long arg)
2884 { 2884 {
2885 struct kvm *kvm = filp->private_data; 2885 struct kvm *kvm = filp->private_data;
2886 void __user *argp = (void __user *)arg; 2886 void __user *argp = (void __user *)arg;
2887 int r = -ENOTTY; 2887 int r = -ENOTTY;
2888 /* 2888 /*
2889 * This union makes it completely explicit to gcc-3.x 2889 * This union makes it completely explicit to gcc-3.x
2890 * that these two variables' stack usage should be 2890 * that these two variables' stack usage should be
2891 * combined, not added together. 2891 * combined, not added together.
2892 */ 2892 */
2893 union { 2893 union {
2894 struct kvm_pit_state ps; 2894 struct kvm_pit_state ps;
2895 struct kvm_pit_state2 ps2; 2895 struct kvm_pit_state2 ps2;
2896 struct kvm_memory_alias alias; 2896 struct kvm_memory_alias alias;
2897 struct kvm_pit_config pit_config; 2897 struct kvm_pit_config pit_config;
2898 } u; 2898 } u;
2899 2899
2900 switch (ioctl) { 2900 switch (ioctl) {
2901 case KVM_SET_TSS_ADDR: 2901 case KVM_SET_TSS_ADDR:
2902 r = kvm_vm_ioctl_set_tss_addr(kvm, arg); 2902 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
2903 if (r < 0) 2903 if (r < 0)
2904 goto out; 2904 goto out;
2905 break; 2905 break;
2906 case KVM_SET_IDENTITY_MAP_ADDR: { 2906 case KVM_SET_IDENTITY_MAP_ADDR: {
2907 u64 ident_addr; 2907 u64 ident_addr;
2908 2908
2909 r = -EFAULT; 2909 r = -EFAULT;
2910 if (copy_from_user(&ident_addr, argp, sizeof ident_addr)) 2910 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
2911 goto out; 2911 goto out;
2912 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); 2912 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
2913 if (r < 0) 2913 if (r < 0)
2914 goto out; 2914 goto out;
2915 break; 2915 break;
2916 } 2916 }
2917 case KVM_SET_MEMORY_REGION: { 2917 case KVM_SET_MEMORY_REGION: {
2918 struct kvm_memory_region kvm_mem; 2918 struct kvm_memory_region kvm_mem;
2919 struct kvm_userspace_memory_region kvm_userspace_mem; 2919 struct kvm_userspace_memory_region kvm_userspace_mem;
2920 2920
2921 r = -EFAULT; 2921 r = -EFAULT;
2922 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) 2922 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
2923 goto out; 2923 goto out;
2924 kvm_userspace_mem.slot = kvm_mem.slot; 2924 kvm_userspace_mem.slot = kvm_mem.slot;
2925 kvm_userspace_mem.flags = kvm_mem.flags; 2925 kvm_userspace_mem.flags = kvm_mem.flags;
2926 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; 2926 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
2927 kvm_userspace_mem.memory_size = kvm_mem.memory_size; 2927 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
2928 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0); 2928 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
2929 if (r) 2929 if (r)
2930 goto out; 2930 goto out;
2931 break; 2931 break;
2932 } 2932 }
2933 case KVM_SET_NR_MMU_PAGES: 2933 case KVM_SET_NR_MMU_PAGES:
2934 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); 2934 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
2935 if (r) 2935 if (r)
2936 goto out; 2936 goto out;
2937 break; 2937 break;
2938 case KVM_GET_NR_MMU_PAGES: 2938 case KVM_GET_NR_MMU_PAGES:
2939 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); 2939 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
2940 break; 2940 break;
2941 case KVM_SET_MEMORY_ALIAS: 2941 case KVM_SET_MEMORY_ALIAS:
2942 r = -EFAULT; 2942 r = -EFAULT;
2943 if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias))) 2943 if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
2944 goto out; 2944 goto out;
2945 r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias); 2945 r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
2946 if (r) 2946 if (r)
2947 goto out; 2947 goto out;
2948 break; 2948 break;
2949 case KVM_CREATE_IRQCHIP: { 2949 case KVM_CREATE_IRQCHIP: {
2950 struct kvm_pic *vpic; 2950 struct kvm_pic *vpic;
2951 2951
2952 mutex_lock(&kvm->lock); 2952 mutex_lock(&kvm->lock);
2953 r = -EEXIST; 2953 r = -EEXIST;
2954 if (kvm->arch.vpic) 2954 if (kvm->arch.vpic)
2955 goto create_irqchip_unlock; 2955 goto create_irqchip_unlock;
2956 r = -ENOMEM; 2956 r = -ENOMEM;
2957 vpic = kvm_create_pic(kvm); 2957 vpic = kvm_create_pic(kvm);
2958 if (vpic) { 2958 if (vpic) {
2959 r = kvm_ioapic_init(kvm); 2959 r = kvm_ioapic_init(kvm);
2960 if (r) { 2960 if (r) {
2961 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, 2961 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
2962 &vpic->dev); 2962 &vpic->dev);
2963 kfree(vpic); 2963 kfree(vpic);
2964 goto create_irqchip_unlock; 2964 goto create_irqchip_unlock;
2965 } 2965 }
2966 } else 2966 } else
2967 goto create_irqchip_unlock; 2967 goto create_irqchip_unlock;
2968 smp_wmb(); 2968 smp_wmb();
2969 kvm->arch.vpic = vpic; 2969 kvm->arch.vpic = vpic;
2970 smp_wmb(); 2970 smp_wmb();
2971 r = kvm_setup_default_irq_routing(kvm); 2971 r = kvm_setup_default_irq_routing(kvm);
2972 if (r) { 2972 if (r) {
2973 mutex_lock(&kvm->irq_lock); 2973 mutex_lock(&kvm->irq_lock);
2974 kvm_ioapic_destroy(kvm); 2974 kvm_ioapic_destroy(kvm);
2975 kvm_destroy_pic(kvm); 2975 kvm_destroy_pic(kvm);
2976 mutex_unlock(&kvm->irq_lock); 2976 mutex_unlock(&kvm->irq_lock);
2977 } 2977 }
2978 create_irqchip_unlock: 2978 create_irqchip_unlock:
2979 mutex_unlock(&kvm->lock); 2979 mutex_unlock(&kvm->lock);
2980 break; 2980 break;
2981 } 2981 }
2982 case KVM_CREATE_PIT: 2982 case KVM_CREATE_PIT:
2983 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; 2983 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
2984 goto create_pit; 2984 goto create_pit;
2985 case KVM_CREATE_PIT2: 2985 case KVM_CREATE_PIT2:
2986 r = -EFAULT; 2986 r = -EFAULT;
2987 if (copy_from_user(&u.pit_config, argp, 2987 if (copy_from_user(&u.pit_config, argp,
2988 sizeof(struct kvm_pit_config))) 2988 sizeof(struct kvm_pit_config)))
2989 goto out; 2989 goto out;
2990 create_pit: 2990 create_pit:
2991 mutex_lock(&kvm->slots_lock); 2991 mutex_lock(&kvm->slots_lock);
2992 r = -EEXIST; 2992 r = -EEXIST;
2993 if (kvm->arch.vpit) 2993 if (kvm->arch.vpit)
2994 goto create_pit_unlock; 2994 goto create_pit_unlock;
2995 r = -ENOMEM; 2995 r = -ENOMEM;
2996 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags); 2996 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
2997 if (kvm->arch.vpit) 2997 if (kvm->arch.vpit)
2998 r = 0; 2998 r = 0;
2999 create_pit_unlock: 2999 create_pit_unlock:
3000 mutex_unlock(&kvm->slots_lock); 3000 mutex_unlock(&kvm->slots_lock);
3001 break; 3001 break;
3002 case KVM_IRQ_LINE_STATUS: 3002 case KVM_IRQ_LINE_STATUS:
3003 case KVM_IRQ_LINE: { 3003 case KVM_IRQ_LINE: {
3004 struct kvm_irq_level irq_event; 3004 struct kvm_irq_level irq_event;
3005 3005
3006 r = -EFAULT; 3006 r = -EFAULT;
3007 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 3007 if (copy_from_user(&irq_event, argp, sizeof irq_event))
3008 goto out; 3008 goto out;
3009 r = -ENXIO; 3009 r = -ENXIO;
3010 if (irqchip_in_kernel(kvm)) { 3010 if (irqchip_in_kernel(kvm)) {
3011 __s32 status; 3011 __s32 status;
3012 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 3012 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3013 irq_event.irq, irq_event.level); 3013 irq_event.irq, irq_event.level);
3014 if (ioctl == KVM_IRQ_LINE_STATUS) { 3014 if (ioctl == KVM_IRQ_LINE_STATUS) {
3015 r = -EFAULT; 3015 r = -EFAULT;
3016 irq_event.status = status; 3016 irq_event.status = status;
3017 if (copy_to_user(argp, &irq_event, 3017 if (copy_to_user(argp, &irq_event,
3018 sizeof irq_event)) 3018 sizeof irq_event))
3019 goto out; 3019 goto out;
3020 } 3020 }
3021 r = 0; 3021 r = 0;
3022 } 3022 }
3023 break; 3023 break;
3024 } 3024 }
3025 case KVM_GET_IRQCHIP: { 3025 case KVM_GET_IRQCHIP: {
3026 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 3026 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3027 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); 3027 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
3028 3028
3029 r = -ENOMEM; 3029 r = -ENOMEM;
3030 if (!chip) 3030 if (!chip)
3031 goto out; 3031 goto out;
3032 r = -EFAULT; 3032 r = -EFAULT;
3033 if (copy_from_user(chip, argp, sizeof *chip)) 3033 if (copy_from_user(chip, argp, sizeof *chip))
3034 goto get_irqchip_out; 3034 goto get_irqchip_out;
3035 r = -ENXIO; 3035 r = -ENXIO;
3036 if (!irqchip_in_kernel(kvm)) 3036 if (!irqchip_in_kernel(kvm))
3037 goto get_irqchip_out; 3037 goto get_irqchip_out;
3038 r = kvm_vm_ioctl_get_irqchip(kvm, chip); 3038 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3039 if (r) 3039 if (r)
3040 goto get_irqchip_out; 3040 goto get_irqchip_out;
3041 r = -EFAULT; 3041 r = -EFAULT;
3042 if (copy_to_user(argp, chip, sizeof *chip)) 3042 if (copy_to_user(argp, chip, sizeof *chip))
3043 goto get_irqchip_out; 3043 goto get_irqchip_out;
3044 r = 0; 3044 r = 0;
3045 get_irqchip_out: 3045 get_irqchip_out:
3046 kfree(chip); 3046 kfree(chip);
3047 if (r) 3047 if (r)
3048 goto out; 3048 goto out;
3049 break; 3049 break;
3050 } 3050 }
3051 case KVM_SET_IRQCHIP: { 3051 case KVM_SET_IRQCHIP: {
3052 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 3052 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3053 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); 3053 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
3054 3054
3055 r = -ENOMEM; 3055 r = -ENOMEM;
3056 if (!chip) 3056 if (!chip)
3057 goto out; 3057 goto out;
3058 r = -EFAULT; 3058 r = -EFAULT;
3059 if (copy_from_user(chip, argp, sizeof *chip)) 3059 if (copy_from_user(chip, argp, sizeof *chip))
3060 goto set_irqchip_out; 3060 goto set_irqchip_out;
3061 r = -ENXIO; 3061 r = -ENXIO;
3062 if (!irqchip_in_kernel(kvm)) 3062 if (!irqchip_in_kernel(kvm))
3063 goto set_irqchip_out; 3063 goto set_irqchip_out;
3064 r = kvm_vm_ioctl_set_irqchip(kvm, chip); 3064 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3065 if (r) 3065 if (r)
3066 goto set_irqchip_out; 3066 goto set_irqchip_out;
3067 r = 0; 3067 r = 0;
3068 set_irqchip_out: 3068 set_irqchip_out:
3069 kfree(chip); 3069 kfree(chip);
3070 if (r) 3070 if (r)
3071 goto out; 3071 goto out;
3072 break; 3072 break;
3073 } 3073 }
3074 case KVM_GET_PIT: { 3074 case KVM_GET_PIT: {
3075 r = -EFAULT; 3075 r = -EFAULT;
3076 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state))) 3076 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3077 goto out; 3077 goto out;
3078 r = -ENXIO; 3078 r = -ENXIO;
3079 if (!kvm->arch.vpit) 3079 if (!kvm->arch.vpit)
3080 goto out; 3080 goto out;
3081 r = kvm_vm_ioctl_get_pit(kvm, &u.ps); 3081 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3082 if (r) 3082 if (r)
3083 goto out; 3083 goto out;
3084 r = -EFAULT; 3084 r = -EFAULT;
3085 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state))) 3085 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3086 goto out; 3086 goto out;
3087 r = 0; 3087 r = 0;
3088 break; 3088 break;
3089 } 3089 }
3090 case KVM_SET_PIT: { 3090 case KVM_SET_PIT: {
3091 r = -EFAULT; 3091 r = -EFAULT;
3092 if (copy_from_user(&u.ps, argp, sizeof u.ps)) 3092 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3093 goto out; 3093 goto out;
3094 r = -ENXIO; 3094 r = -ENXIO;
3095 if (!kvm->arch.vpit) 3095 if (!kvm->arch.vpit)
3096 goto out; 3096 goto out;
3097 r = kvm_vm_ioctl_set_pit(kvm, &u.ps); 3097 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3098 if (r) 3098 if (r)
3099 goto out; 3099 goto out;
3100 r = 0; 3100 r = 0;
3101 break; 3101 break;
3102 } 3102 }
3103 case KVM_GET_PIT2: { 3103 case KVM_GET_PIT2: {
3104 r = -ENXIO; 3104 r = -ENXIO;
3105 if (!kvm->arch.vpit) 3105 if (!kvm->arch.vpit)
3106 goto out; 3106 goto out;
3107 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2); 3107 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3108 if (r) 3108 if (r)
3109 goto out; 3109 goto out;
3110 r = -EFAULT; 3110 r = -EFAULT;
3111 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2))) 3111 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3112 goto out; 3112 goto out;
3113 r = 0; 3113 r = 0;
3114 break; 3114 break;
3115 } 3115 }
3116 case KVM_SET_PIT2: { 3116 case KVM_SET_PIT2: {
3117 r = -EFAULT; 3117 r = -EFAULT;
3118 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2))) 3118 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3119 goto out; 3119 goto out;
3120 r = -ENXIO; 3120 r = -ENXIO;
3121 if (!kvm->arch.vpit) 3121 if (!kvm->arch.vpit)
3122 goto out; 3122 goto out;
3123 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); 3123 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3124 if (r) 3124 if (r)
3125 goto out; 3125 goto out;
3126 r = 0; 3126 r = 0;
3127 break; 3127 break;
3128 } 3128 }
3129 case KVM_REINJECT_CONTROL: { 3129 case KVM_REINJECT_CONTROL: {
3130 struct kvm_reinject_control control; 3130 struct kvm_reinject_control control;
3131 r = -EFAULT; 3131 r = -EFAULT;
3132 if (copy_from_user(&control, argp, sizeof(control))) 3132 if (copy_from_user(&control, argp, sizeof(control)))
3133 goto out; 3133 goto out;
3134 r = kvm_vm_ioctl_reinject(kvm, &control); 3134 r = kvm_vm_ioctl_reinject(kvm, &control);
3135 if (r) 3135 if (r)
3136 goto out; 3136 goto out;
3137 r = 0; 3137 r = 0;
3138 break; 3138 break;
3139 } 3139 }
3140 case KVM_XEN_HVM_CONFIG: { 3140 case KVM_XEN_HVM_CONFIG: {
3141 r = -EFAULT; 3141 r = -EFAULT;
3142 if (copy_from_user(&kvm->arch.xen_hvm_config, argp, 3142 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3143 sizeof(struct kvm_xen_hvm_config))) 3143 sizeof(struct kvm_xen_hvm_config)))
3144 goto out; 3144 goto out;
3145 r = -EINVAL; 3145 r = -EINVAL;
3146 if (kvm->arch.xen_hvm_config.flags) 3146 if (kvm->arch.xen_hvm_config.flags)
3147 goto out; 3147 goto out;
3148 r = 0; 3148 r = 0;
3149 break; 3149 break;
3150 } 3150 }
3151 case KVM_SET_CLOCK: { 3151 case KVM_SET_CLOCK: {
3152 struct timespec now; 3152 struct timespec now;
3153 struct kvm_clock_data user_ns; 3153 struct kvm_clock_data user_ns;
3154 u64 now_ns; 3154 u64 now_ns;
3155 s64 delta; 3155 s64 delta;
3156 3156
3157 r = -EFAULT; 3157 r = -EFAULT;
3158 if (copy_from_user(&user_ns, argp, sizeof(user_ns))) 3158 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3159 goto out; 3159 goto out;
3160 3160
3161 r = -EINVAL; 3161 r = -EINVAL;
3162 if (user_ns.flags) 3162 if (user_ns.flags)
3163 goto out; 3163 goto out;
3164 3164
3165 r = 0; 3165 r = 0;
3166 ktime_get_ts(&now); 3166 ktime_get_ts(&now);
3167 now_ns = timespec_to_ns(&now); 3167 now_ns = timespec_to_ns(&now);
3168 delta = user_ns.clock - now_ns; 3168 delta = user_ns.clock - now_ns;
3169 kvm->arch.kvmclock_offset = delta; 3169 kvm->arch.kvmclock_offset = delta;
3170 break; 3170 break;
3171 } 3171 }
3172 case KVM_GET_CLOCK: { 3172 case KVM_GET_CLOCK: {
3173 struct timespec now; 3173 struct timespec now;
3174 struct kvm_clock_data user_ns; 3174 struct kvm_clock_data user_ns;
3175 u64 now_ns; 3175 u64 now_ns;
3176 3176
3177 ktime_get_ts(&now); 3177 ktime_get_ts(&now);
3178 now_ns = timespec_to_ns(&now); 3178 now_ns = timespec_to_ns(&now);
3179 user_ns.clock = kvm->arch.kvmclock_offset + now_ns; 3179 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3180 user_ns.flags = 0; 3180 user_ns.flags = 0;
3181 3181
3182 r = -EFAULT; 3182 r = -EFAULT;
3183 if (copy_to_user(argp, &user_ns, sizeof(user_ns))) 3183 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
3184 goto out; 3184 goto out;
3185 r = 0; 3185 r = 0;
3186 break; 3186 break;
3187 } 3187 }
3188 3188
3189 default: 3189 default:
3190 ; 3190 ;
3191 } 3191 }
3192 out: 3192 out:
3193 return r; 3193 return r;
3194 } 3194 }
3195 3195
3196 static void kvm_init_msr_list(void) 3196 static void kvm_init_msr_list(void)
3197 { 3197 {
3198 u32 dummy[2]; 3198 u32 dummy[2];
3199 unsigned i, j; 3199 unsigned i, j;
3200 3200
3201 /* skip the first msrs in the list. KVM-specific */ 3201 /* skip the first msrs in the list. KVM-specific */
3202 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { 3202 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3203 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) 3203 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3204 continue; 3204 continue;
3205 if (j < i) 3205 if (j < i)
3206 msrs_to_save[j] = msrs_to_save[i]; 3206 msrs_to_save[j] = msrs_to_save[i];
3207 j++; 3207 j++;
3208 } 3208 }
3209 num_msrs_to_save = j; 3209 num_msrs_to_save = j;
3210 } 3210 }
3211 3211
3212 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, 3212 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3213 const void *v) 3213 const void *v)
3214 { 3214 {
3215 if (vcpu->arch.apic && 3215 if (vcpu->arch.apic &&
3216 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) 3216 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
3217 return 0; 3217 return 0;
3218 3218
3219 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3219 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
3220 } 3220 }
3221 3221
3222 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) 3222 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3223 { 3223 {
3224 if (vcpu->arch.apic && 3224 if (vcpu->arch.apic &&
3225 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) 3225 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
3226 return 0; 3226 return 0;
3227 3227
3228 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3228 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
3229 } 3229 }
3230 3230
3231 static void kvm_set_segment(struct kvm_vcpu *vcpu, 3231 static void kvm_set_segment(struct kvm_vcpu *vcpu,
3232 struct kvm_segment *var, int seg) 3232 struct kvm_segment *var, int seg)
3233 { 3233 {
3234 kvm_x86_ops->set_segment(vcpu, var, seg); 3234 kvm_x86_ops->set_segment(vcpu, var, seg);
3235 } 3235 }
3236 3236
3237 void kvm_get_segment(struct kvm_vcpu *vcpu, 3237 void kvm_get_segment(struct kvm_vcpu *vcpu,
3238 struct kvm_segment *var, int seg) 3238 struct kvm_segment *var, int seg)
3239 { 3239 {
3240 kvm_x86_ops->get_segment(vcpu, var, seg); 3240 kvm_x86_ops->get_segment(vcpu, var, seg);
3241 } 3241 }
3242 3242
3243 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3243 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3244 { 3244 {
3245 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3245 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3246 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); 3246 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3247 } 3247 }
3248 3248
3249 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3249 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3250 { 3250 {
3251 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3251 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3252 access |= PFERR_FETCH_MASK; 3252 access |= PFERR_FETCH_MASK;
3253 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); 3253 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3254 } 3254 }
3255 3255
3256 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3256 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3257 { 3257 {
3258 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3258 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3259 access |= PFERR_WRITE_MASK; 3259 access |= PFERR_WRITE_MASK;
3260 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); 3260 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3261 } 3261 }
3262 3262
3263 /* uses this to access any guest's mapped memory without checking CPL */ 3263 /* uses this to access any guest's mapped memory without checking CPL */
3264 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3264 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3265 { 3265 {
3266 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error); 3266 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
3267 } 3267 }
3268 3268
3269 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, 3269 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3270 struct kvm_vcpu *vcpu, u32 access, 3270 struct kvm_vcpu *vcpu, u32 access,
3271 u32 *error) 3271 u32 *error)
3272 { 3272 {
3273 void *data = val; 3273 void *data = val;
3274 int r = X86EMUL_CONTINUE; 3274 int r = X86EMUL_CONTINUE;
3275 3275
3276 while (bytes) { 3276 while (bytes) {
3277 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error); 3277 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
3278 unsigned offset = addr & (PAGE_SIZE-1); 3278 unsigned offset = addr & (PAGE_SIZE-1);
3279 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); 3279 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3280 int ret; 3280 int ret;
3281 3281
3282 if (gpa == UNMAPPED_GVA) { 3282 if (gpa == UNMAPPED_GVA) {
3283 r = X86EMUL_PROPAGATE_FAULT; 3283 r = X86EMUL_PROPAGATE_FAULT;
3284 goto out; 3284 goto out;
3285 } 3285 }
3286 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); 3286 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3287 if (ret < 0) { 3287 if (ret < 0) {
3288 r = X86EMUL_IO_NEEDED; 3288 r = X86EMUL_IO_NEEDED;
3289 goto out; 3289 goto out;
3290 } 3290 }
3291 3291
3292 bytes -= toread; 3292 bytes -= toread;
3293 data += toread; 3293 data += toread;
3294 addr += toread; 3294 addr += toread;
3295 } 3295 }
3296 out: 3296 out:
3297 return r; 3297 return r;
3298 } 3298 }
3299 3299
3300 /* used for instruction fetching */ 3300 /* used for instruction fetching */
3301 static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, 3301 static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
3302 struct kvm_vcpu *vcpu, u32 *error) 3302 struct kvm_vcpu *vcpu, u32 *error)
3303 { 3303 {
3304 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3304 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3305 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 3305 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3306 access | PFERR_FETCH_MASK, error); 3306 access | PFERR_FETCH_MASK, error);
3307 } 3307 }
3308 3308
3309 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3309 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
3310 struct kvm_vcpu *vcpu, u32 *error) 3310 struct kvm_vcpu *vcpu, u32 *error)
3311 { 3311 {
3312 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3312 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3313 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, 3313 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3314 error); 3314 error);
3315 } 3315 }
3316 3316
3317 static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, 3317 static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3318 struct kvm_vcpu *vcpu, u32 *error) 3318 struct kvm_vcpu *vcpu, u32 *error)
3319 { 3319 {
3320 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); 3320 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
3321 } 3321 }
3322 3322
3323 static int kvm_write_guest_virt_system(gva_t addr, void *val, 3323 static int kvm_write_guest_virt_system(gva_t addr, void *val,
3324 unsigned int bytes, 3324 unsigned int bytes,
3325 struct kvm_vcpu *vcpu, 3325 struct kvm_vcpu *vcpu,
3326 u32 *error) 3326 u32 *error)
3327 { 3327 {
3328 void *data = val; 3328 void *data = val;
3329 int r = X86EMUL_CONTINUE; 3329 int r = X86EMUL_CONTINUE;
3330 3330
3331 while (bytes) { 3331 while (bytes) {
3332 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, 3332 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr,
3333 PFERR_WRITE_MASK, error); 3333 PFERR_WRITE_MASK, error);
3334 unsigned offset = addr & (PAGE_SIZE-1); 3334 unsigned offset = addr & (PAGE_SIZE-1);
3335 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3335 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3336 int ret; 3336 int ret;
3337 3337
3338 if (gpa == UNMAPPED_GVA) { 3338 if (gpa == UNMAPPED_GVA) {
3339 r = X86EMUL_PROPAGATE_FAULT; 3339 r = X86EMUL_PROPAGATE_FAULT;
3340 goto out; 3340 goto out;
3341 } 3341 }
3342 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); 3342 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
3343 if (ret < 0) { 3343 if (ret < 0) {
3344 r = X86EMUL_IO_NEEDED; 3344 r = X86EMUL_IO_NEEDED;
3345 goto out; 3345 goto out;
3346 } 3346 }
3347 3347
3348 bytes -= towrite; 3348 bytes -= towrite;
3349 data += towrite; 3349 data += towrite;
3350 addr += towrite; 3350 addr += towrite;
3351 } 3351 }
3352 out: 3352 out:
3353 return r; 3353 return r;
3354 } 3354 }
3355 3355
3356 static int emulator_read_emulated(unsigned long addr, 3356 static int emulator_read_emulated(unsigned long addr,
3357 void *val, 3357 void *val,
3358 unsigned int bytes, 3358 unsigned int bytes,
3359 unsigned int *error_code, 3359 unsigned int *error_code,
3360 struct kvm_vcpu *vcpu) 3360 struct kvm_vcpu *vcpu)
3361 { 3361 {
3362 gpa_t gpa; 3362 gpa_t gpa;
3363 3363
3364 if (vcpu->mmio_read_completed) { 3364 if (vcpu->mmio_read_completed) {
3365 memcpy(val, vcpu->mmio_data, bytes); 3365 memcpy(val, vcpu->mmio_data, bytes);
3366 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, 3366 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
3367 vcpu->mmio_phys_addr, *(u64 *)val); 3367 vcpu->mmio_phys_addr, *(u64 *)val);
3368 vcpu->mmio_read_completed = 0; 3368 vcpu->mmio_read_completed = 0;
3369 return X86EMUL_CONTINUE; 3369 return X86EMUL_CONTINUE;
3370 } 3370 }
3371 3371
3372 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); 3372 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code);
3373 3373
3374 if (gpa == UNMAPPED_GVA) 3374 if (gpa == UNMAPPED_GVA)
3375 return X86EMUL_PROPAGATE_FAULT; 3375 return X86EMUL_PROPAGATE_FAULT;
3376 3376
3377 /* For APIC access vmexit */ 3377 /* For APIC access vmexit */
3378 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3378 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3379 goto mmio; 3379 goto mmio;
3380 3380
3381 if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) 3381 if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
3382 == X86EMUL_CONTINUE) 3382 == X86EMUL_CONTINUE)
3383 return X86EMUL_CONTINUE; 3383 return X86EMUL_CONTINUE;
3384 3384
3385 mmio: 3385 mmio:
3386 /* 3386 /*
3387 * Is this MMIO handled locally? 3387 * Is this MMIO handled locally?
3388 */ 3388 */
3389 if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { 3389 if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
3390 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); 3390 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
3391 return X86EMUL_CONTINUE; 3391 return X86EMUL_CONTINUE;
3392 } 3392 }
3393 3393
3394 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 3394 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
3395 3395
3396 vcpu->mmio_needed = 1; 3396 vcpu->mmio_needed = 1;
3397 vcpu->run->exit_reason = KVM_EXIT_MMIO; 3397 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3398 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; 3398 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3399 vcpu->run->mmio.len = vcpu->mmio_size = bytes; 3399 vcpu->run->mmio.len = vcpu->mmio_size = bytes;
3400 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; 3400 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
3401 3401
3402 return X86EMUL_IO_NEEDED; 3402 return X86EMUL_IO_NEEDED;
3403 } 3403 }
3404 3404
3405 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 3405 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
3406 const void *val, int bytes) 3406 const void *val, int bytes)
3407 { 3407 {
3408 int ret; 3408 int ret;
3409 3409
3410 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 3410 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
3411 if (ret < 0) 3411 if (ret < 0)
3412 return 0; 3412 return 0;
3413 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); 3413 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
3414 return 1; 3414 return 1;
3415 } 3415 }
3416 3416
3417 static int emulator_write_emulated_onepage(unsigned long addr, 3417 static int emulator_write_emulated_onepage(unsigned long addr,
3418 const void *val, 3418 const void *val,
3419 unsigned int bytes, 3419 unsigned int bytes,
3420 unsigned int *error_code, 3420 unsigned int *error_code,
3421 struct kvm_vcpu *vcpu) 3421 struct kvm_vcpu *vcpu)
3422 { 3422 {
3423 gpa_t gpa; 3423 gpa_t gpa;
3424 3424
3425 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); 3425 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code);
3426 3426
3427 if (gpa == UNMAPPED_GVA) 3427 if (gpa == UNMAPPED_GVA)
3428 return X86EMUL_PROPAGATE_FAULT; 3428 return X86EMUL_PROPAGATE_FAULT;
3429 3429
3430 /* For APIC access vmexit */ 3430 /* For APIC access vmexit */
3431 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3431 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3432 goto mmio; 3432 goto mmio;
3433 3433
3434 if (emulator_write_phys(vcpu, gpa, val, bytes)) 3434 if (emulator_write_phys(vcpu, gpa, val, bytes))
3435 return X86EMUL_CONTINUE; 3435 return X86EMUL_CONTINUE;
3436 3436
3437 mmio: 3437 mmio:
3438 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); 3438 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
3439 /* 3439 /*
3440 * Is this MMIO handled locally? 3440 * Is this MMIO handled locally?
3441 */ 3441 */
3442 if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) 3442 if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
3443 return X86EMUL_CONTINUE; 3443 return X86EMUL_CONTINUE;
3444 3444
3445 vcpu->mmio_needed = 1; 3445 vcpu->mmio_needed = 1;
3446 vcpu->run->exit_reason = KVM_EXIT_MMIO; 3446 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3447 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; 3447 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3448 vcpu->run->mmio.len = vcpu->mmio_size = bytes; 3448 vcpu->run->mmio.len = vcpu->mmio_size = bytes;
3449 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; 3449 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
3450 memcpy(vcpu->run->mmio.data, val, bytes); 3450 memcpy(vcpu->run->mmio.data, val, bytes);
3451 3451
3452 return X86EMUL_CONTINUE; 3452 return X86EMUL_CONTINUE;
3453 } 3453 }
3454 3454
3455 int emulator_write_emulated(unsigned long addr, 3455 int emulator_write_emulated(unsigned long addr,
3456 const void *val, 3456 const void *val,
3457 unsigned int bytes, 3457 unsigned int bytes,
3458 unsigned int *error_code, 3458 unsigned int *error_code,
3459 struct kvm_vcpu *vcpu) 3459 struct kvm_vcpu *vcpu)
3460 { 3460 {
3461 /* Crossing a page boundary? */ 3461 /* Crossing a page boundary? */
3462 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 3462 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
3463 int rc, now; 3463 int rc, now;
3464 3464
3465 now = -addr & ~PAGE_MASK; 3465 now = -addr & ~PAGE_MASK;
3466 rc = emulator_write_emulated_onepage(addr, val, now, error_code, 3466 rc = emulator_write_emulated_onepage(addr, val, now, error_code,
3467 vcpu); 3467 vcpu);
3468 if (rc != X86EMUL_CONTINUE) 3468 if (rc != X86EMUL_CONTINUE)
3469 return rc; 3469 return rc;
3470 addr += now; 3470 addr += now;
3471 val += now; 3471 val += now;
3472 bytes -= now; 3472 bytes -= now;
3473 } 3473 }
3474 return emulator_write_emulated_onepage(addr, val, bytes, error_code, 3474 return emulator_write_emulated_onepage(addr, val, bytes, error_code,
3475 vcpu); 3475 vcpu);
3476 } 3476 }
3477 3477
3478 #define CMPXCHG_TYPE(t, ptr, old, new) \ 3478 #define CMPXCHG_TYPE(t, ptr, old, new) \
3479 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) 3479 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
3480 3480
3481 #ifdef CONFIG_X86_64 3481 #ifdef CONFIG_X86_64
3482 # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) 3482 # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
3483 #else 3483 #else
3484 # define CMPXCHG64(ptr, old, new) \ 3484 # define CMPXCHG64(ptr, old, new) \
3485 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) 3485 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
3486 #endif 3486 #endif
3487 3487
3488 static int emulator_cmpxchg_emulated(unsigned long addr, 3488 static int emulator_cmpxchg_emulated(unsigned long addr,
3489 const void *old, 3489 const void *old,
3490 const void *new, 3490 const void *new,
3491 unsigned int bytes, 3491 unsigned int bytes,
3492 unsigned int *error_code, 3492 unsigned int *error_code,
3493 struct kvm_vcpu *vcpu) 3493 struct kvm_vcpu *vcpu)
3494 { 3494 {
3495 gpa_t gpa; 3495 gpa_t gpa;
3496 struct page *page; 3496 struct page *page;
3497 char *kaddr; 3497 char *kaddr;
3498 bool exchanged; 3498 bool exchanged;
3499 3499
3500 /* guests cmpxchg8b have to be emulated atomically */ 3500 /* guests cmpxchg8b have to be emulated atomically */
3501 if (bytes > 8 || (bytes & (bytes - 1))) 3501 if (bytes > 8 || (bytes & (bytes - 1)))
3502 goto emul_write; 3502 goto emul_write;
3503 3503
3504 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); 3504 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
3505 3505
3506 if (gpa == UNMAPPED_GVA || 3506 if (gpa == UNMAPPED_GVA ||
3507 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3507 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3508 goto emul_write; 3508 goto emul_write;
3509 3509
3510 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) 3510 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
3511 goto emul_write; 3511 goto emul_write;
3512 3512
3513 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3513 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3514 3514
3515 kaddr = kmap_atomic(page, KM_USER0); 3515 kaddr = kmap_atomic(page, KM_USER0);
3516 kaddr += offset_in_page(gpa); 3516 kaddr += offset_in_page(gpa);
3517 switch (bytes) { 3517 switch (bytes) {
3518 case 1: 3518 case 1:
3519 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); 3519 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
3520 break; 3520 break;
3521 case 2: 3521 case 2:
3522 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); 3522 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
3523 break; 3523 break;
3524 case 4: 3524 case 4:
3525 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); 3525 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
3526 break; 3526 break;
3527 case 8: 3527 case 8:
3528 exchanged = CMPXCHG64(kaddr, old, new); 3528 exchanged = CMPXCHG64(kaddr, old, new);
3529 break; 3529 break;
3530 default: 3530 default:
3531 BUG(); 3531 BUG();
3532 } 3532 }
3533 kunmap_atomic(kaddr, KM_USER0); 3533 kunmap_atomic(kaddr, KM_USER0);
3534 kvm_release_page_dirty(page); 3534 kvm_release_page_dirty(page);
3535 3535
3536 if (!exchanged) 3536 if (!exchanged)
3537 return X86EMUL_CMPXCHG_FAILED; 3537 return X86EMUL_CMPXCHG_FAILED;
3538 3538
3539 kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); 3539 kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);
3540 3540
3541 return X86EMUL_CONTINUE; 3541 return X86EMUL_CONTINUE;
3542 3542
3543 emul_write: 3543 emul_write:
3544 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3544 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3545 3545
3546 return emulator_write_emulated(addr, new, bytes, error_code, vcpu); 3546 return emulator_write_emulated(addr, new, bytes, error_code, vcpu);
3547 } 3547 }
3548 3548
3549 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) 3549 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3550 { 3550 {
3551 /* TODO: String I/O for in kernel device */ 3551 /* TODO: String I/O for in kernel device */
3552 int r; 3552 int r;
3553 3553
3554 if (vcpu->arch.pio.in) 3554 if (vcpu->arch.pio.in)
3555 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, 3555 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3556 vcpu->arch.pio.size, pd); 3556 vcpu->arch.pio.size, pd);
3557 else 3557 else
3558 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, 3558 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3559 vcpu->arch.pio.port, vcpu->arch.pio.size, 3559 vcpu->arch.pio.port, vcpu->arch.pio.size,
3560 pd); 3560 pd);
3561 return r; 3561 return r;
3562 } 3562 }
3563 3563
3564 3564
3565 static int emulator_pio_in_emulated(int size, unsigned short port, void *val, 3565 static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
3566 unsigned int count, struct kvm_vcpu *vcpu) 3566 unsigned int count, struct kvm_vcpu *vcpu)
3567 { 3567 {
3568 if (vcpu->arch.pio.count) 3568 if (vcpu->arch.pio.count)
3569 goto data_avail; 3569 goto data_avail;
3570 3570
3571 trace_kvm_pio(1, port, size, 1); 3571 trace_kvm_pio(1, port, size, 1);
3572 3572
3573 vcpu->arch.pio.port = port; 3573 vcpu->arch.pio.port = port;
3574 vcpu->arch.pio.in = 1; 3574 vcpu->arch.pio.in = 1;
3575 vcpu->arch.pio.count = count; 3575 vcpu->arch.pio.count = count;
3576 vcpu->arch.pio.size = size; 3576 vcpu->arch.pio.size = size;
3577 3577
3578 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3578 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3579 data_avail: 3579 data_avail:
3580 memcpy(val, vcpu->arch.pio_data, size * count); 3580 memcpy(val, vcpu->arch.pio_data, size * count);
3581 vcpu->arch.pio.count = 0; 3581 vcpu->arch.pio.count = 0;
3582 return 1; 3582 return 1;
3583 } 3583 }
3584 3584
3585 vcpu->run->exit_reason = KVM_EXIT_IO; 3585 vcpu->run->exit_reason = KVM_EXIT_IO;
3586 vcpu->run->io.direction = KVM_EXIT_IO_IN; 3586 vcpu->run->io.direction = KVM_EXIT_IO_IN;
3587 vcpu->run->io.size = size; 3587 vcpu->run->io.size = size;
3588 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 3588 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3589 vcpu->run->io.count = count; 3589 vcpu->run->io.count = count;
3590 vcpu->run->io.port = port; 3590 vcpu->run->io.port = port;
3591 3591
3592 return 0; 3592 return 0;
3593 } 3593 }
3594 3594
3595 static int emulator_pio_out_emulated(int size, unsigned short port, 3595 static int emulator_pio_out_emulated(int size, unsigned short port,
3596 const void *val, unsigned int count, 3596 const void *val, unsigned int count,
3597 struct kvm_vcpu *vcpu) 3597 struct kvm_vcpu *vcpu)
3598 { 3598 {
3599 trace_kvm_pio(0, port, size, 1); 3599 trace_kvm_pio(0, port, size, 1);
3600 3600
3601 vcpu->arch.pio.port = port; 3601 vcpu->arch.pio.port = port;
3602 vcpu->arch.pio.in = 0; 3602 vcpu->arch.pio.in = 0;
3603 vcpu->arch.pio.count = count; 3603 vcpu->arch.pio.count = count;
3604 vcpu->arch.pio.size = size; 3604 vcpu->arch.pio.size = size;
3605 3605
3606 memcpy(vcpu->arch.pio_data, val, size * count); 3606 memcpy(vcpu->arch.pio_data, val, size * count);
3607 3607
3608 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3608 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3609 vcpu->arch.pio.count = 0; 3609 vcpu->arch.pio.count = 0;
3610 return 1; 3610 return 1;
3611 } 3611 }
3612 3612
3613 vcpu->run->exit_reason = KVM_EXIT_IO; 3613 vcpu->run->exit_reason = KVM_EXIT_IO;
3614 vcpu->run->io.direction = KVM_EXIT_IO_OUT; 3614 vcpu->run->io.direction = KVM_EXIT_IO_OUT;
3615 vcpu->run->io.size = size; 3615 vcpu->run->io.size = size;
3616 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 3616 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3617 vcpu->run->io.count = count; 3617 vcpu->run->io.count = count;
3618 vcpu->run->io.port = port; 3618 vcpu->run->io.port = port;
3619 3619
3620 return 0; 3620 return 0;
3621 } 3621 }
3622 3622
3623 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) 3623 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
3624 { 3624 {
3625 return kvm_x86_ops->get_segment_base(vcpu, seg); 3625 return kvm_x86_ops->get_segment_base(vcpu, seg);
3626 } 3626 }
3627 3627
3628 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) 3628 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
3629 { 3629 {
3630 kvm_mmu_invlpg(vcpu, address); 3630 kvm_mmu_invlpg(vcpu, address);
3631 return X86EMUL_CONTINUE; 3631 return X86EMUL_CONTINUE;
3632 } 3632 }
3633 3633
3634 int emulate_clts(struct kvm_vcpu *vcpu) 3634 int emulate_clts(struct kvm_vcpu *vcpu)
3635 { 3635 {
3636 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); 3636 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
3637 kvm_x86_ops->fpu_activate(vcpu); 3637 kvm_x86_ops->fpu_activate(vcpu);
3638 return X86EMUL_CONTINUE; 3638 return X86EMUL_CONTINUE;
3639 } 3639 }
3640 3640
3641 int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) 3641 int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu)
3642 { 3642 {
3643 return _kvm_get_dr(vcpu, dr, dest); 3643 return _kvm_get_dr(vcpu, dr, dest);
3644 } 3644 }
3645 3645
3646 int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) 3646 int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu)
3647 { 3647 {
3648 3648
3649 return __kvm_set_dr(vcpu, dr, value); 3649 return __kvm_set_dr(vcpu, dr, value);
3650 } 3650 }
3651 3651
3652 static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3652 static u64 mk_cr_64(u64 curr_cr, u32 new_val)
3653 { 3653 {
3654 return (curr_cr & ~((1ULL << 32) - 1)) | new_val; 3654 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
3655 } 3655 }
3656 3656
3657 static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) 3657 static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
3658 { 3658 {
3659 unsigned long value; 3659 unsigned long value;
3660 3660
3661 switch (cr) { 3661 switch (cr) {
3662 case 0: 3662 case 0:
3663 value = kvm_read_cr0(vcpu); 3663 value = kvm_read_cr0(vcpu);
3664 break; 3664 break;
3665 case 2: 3665 case 2:
3666 value = vcpu->arch.cr2; 3666 value = vcpu->arch.cr2;
3667 break; 3667 break;
3668 case 3: 3668 case 3:
3669 value = vcpu->arch.cr3; 3669 value = vcpu->arch.cr3;
3670 break; 3670 break;
3671 case 4: 3671 case 4:
3672 value = kvm_read_cr4(vcpu); 3672 value = kvm_read_cr4(vcpu);
3673 break; 3673 break;
3674 case 8: 3674 case 8:
3675 value = kvm_get_cr8(vcpu); 3675 value = kvm_get_cr8(vcpu);
3676 break; 3676 break;
3677 default: 3677 default:
3678 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 3678 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3679 return 0; 3679 return 0;
3680 } 3680 }
3681 3681
3682 return value; 3682 return value;
3683 } 3683 }
3684 3684
3685 static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) 3685 static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
3686 { 3686 {
3687 int res = 0; 3687 int res = 0;
3688 3688
3689 switch (cr) { 3689 switch (cr) {
3690 case 0: 3690 case 0:
3691 res = __kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); 3691 res = __kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
3692 break; 3692 break;
3693 case 2: 3693 case 2:
3694 vcpu->arch.cr2 = val; 3694 vcpu->arch.cr2 = val;
3695 break; 3695 break;
3696 case 3: 3696 case 3:
3697 res = __kvm_set_cr3(vcpu, val); 3697 res = __kvm_set_cr3(vcpu, val);
3698 break; 3698 break;
3699 case 4: 3699 case 4:
3700 res = __kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); 3700 res = __kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
3701 break; 3701 break;
3702 case 8: 3702 case 8:
3703 res = __kvm_set_cr8(vcpu, val & 0xfUL); 3703 res = __kvm_set_cr8(vcpu, val & 0xfUL);
3704 break; 3704 break;
3705 default: 3705 default:
3706 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 3706 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3707 res = -1; 3707 res = -1;
3708 } 3708 }
3709 3709
3710 return res; 3710 return res;
3711 } 3711 }
3712 3712
3713 static int emulator_get_cpl(struct kvm_vcpu *vcpu) 3713 static int emulator_get_cpl(struct kvm_vcpu *vcpu)
3714 { 3714 {
3715 return kvm_x86_ops->get_cpl(vcpu); 3715 return kvm_x86_ops->get_cpl(vcpu);
3716 } 3716 }
3717 3717
3718 static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) 3718 static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
3719 { 3719 {
3720 kvm_x86_ops->get_gdt(vcpu, dt); 3720 kvm_x86_ops->get_gdt(vcpu, dt);
3721 } 3721 }
3722 3722
3723 static unsigned long emulator_get_cached_segment_base(int seg, 3723 static unsigned long emulator_get_cached_segment_base(int seg,
3724 struct kvm_vcpu *vcpu) 3724 struct kvm_vcpu *vcpu)
3725 { 3725 {
3726 return get_segment_base(vcpu, seg); 3726 return get_segment_base(vcpu, seg);
3727 } 3727 }
3728 3728
3729 static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, 3729 static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
3730 struct kvm_vcpu *vcpu) 3730 struct kvm_vcpu *vcpu)
3731 { 3731 {
3732 struct kvm_segment var; 3732 struct kvm_segment var;
3733 3733
3734 kvm_get_segment(vcpu, &var, seg); 3734 kvm_get_segment(vcpu, &var, seg);
3735 3735
3736 if (var.unusable) 3736 if (var.unusable)
3737 return false; 3737 return false;
3738 3738
3739 if (var.g) 3739 if (var.g)
3740 var.limit >>= 12; 3740 var.limit >>= 12;
3741 set_desc_limit(desc, var.limit); 3741 set_desc_limit(desc, var.limit);
3742 set_desc_base(desc, (unsigned long)var.base); 3742 set_desc_base(desc, (unsigned long)var.base);
3743 desc->type = var.type; 3743 desc->type = var.type;
3744 desc->s = var.s; 3744 desc->s = var.s;
3745 desc->dpl = var.dpl; 3745 desc->dpl = var.dpl;
3746 desc->p = var.present; 3746 desc->p = var.present;
3747 desc->avl = var.avl; 3747 desc->avl = var.avl;
3748 desc->l = var.l; 3748 desc->l = var.l;
3749 desc->d = var.db; 3749 desc->d = var.db;
3750 desc->g = var.g; 3750 desc->g = var.g;
3751 3751
3752 return true; 3752 return true;
3753 } 3753 }
3754 3754
3755 static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, 3755 static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg,
3756 struct kvm_vcpu *vcpu) 3756 struct kvm_vcpu *vcpu)
3757 { 3757 {
3758 struct kvm_segment var; 3758 struct kvm_segment var;
3759 3759
3760 /* needed to preserve selector */ 3760 /* needed to preserve selector */
3761 kvm_get_segment(vcpu, &var, seg); 3761 kvm_get_segment(vcpu, &var, seg);
3762 3762
3763 var.base = get_desc_base(desc); 3763 var.base = get_desc_base(desc);
3764 var.limit = get_desc_limit(desc); 3764 var.limit = get_desc_limit(desc);
3765 if (desc->g) 3765 if (desc->g)
3766 var.limit = (var.limit << 12) | 0xfff; 3766 var.limit = (var.limit << 12) | 0xfff;
3767 var.type = desc->type; 3767 var.type = desc->type;
3768 var.present = desc->p; 3768 var.present = desc->p;
3769 var.dpl = desc->dpl; 3769 var.dpl = desc->dpl;
3770 var.db = desc->d; 3770 var.db = desc->d;
3771 var.s = desc->s; 3771 var.s = desc->s;
3772 var.l = desc->l; 3772 var.l = desc->l;
3773 var.g = desc->g; 3773 var.g = desc->g;
3774 var.avl = desc->avl; 3774 var.avl = desc->avl;
3775 var.present = desc->p; 3775 var.present = desc->p;
3776 var.unusable = !var.present; 3776 var.unusable = !var.present;
3777 var.padding = 0; 3777 var.padding = 0;
3778 3778
3779 kvm_set_segment(vcpu, &var, seg); 3779 kvm_set_segment(vcpu, &var, seg);
3780 return; 3780 return;
3781 } 3781 }
3782 3782
3783 static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) 3783 static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu)
3784 { 3784 {
3785 struct kvm_segment kvm_seg; 3785 struct kvm_segment kvm_seg;
3786 3786
3787 kvm_get_segment(vcpu, &kvm_seg, seg); 3787 kvm_get_segment(vcpu, &kvm_seg, seg);
3788 return kvm_seg.selector; 3788 return kvm_seg.selector;
3789 } 3789 }
3790 3790
3791 static void emulator_set_segment_selector(u16 sel, int seg, 3791 static void emulator_set_segment_selector(u16 sel, int seg,
3792 struct kvm_vcpu *vcpu) 3792 struct kvm_vcpu *vcpu)
3793 { 3793 {
3794 struct kvm_segment kvm_seg; 3794 struct kvm_segment kvm_seg;
3795 3795
3796 kvm_get_segment(vcpu, &kvm_seg, seg); 3796 kvm_get_segment(vcpu, &kvm_seg, seg);
3797 kvm_seg.selector = sel; 3797 kvm_seg.selector = sel;
3798 kvm_set_segment(vcpu, &kvm_seg, seg); 3798 kvm_set_segment(vcpu, &kvm_seg, seg);
3799 } 3799 }
3800 3800
3801 static struct x86_emulate_ops emulate_ops = { 3801 static struct x86_emulate_ops emulate_ops = {
3802 .read_std = kvm_read_guest_virt_system, 3802 .read_std = kvm_read_guest_virt_system,
3803 .write_std = kvm_write_guest_virt_system, 3803 .write_std = kvm_write_guest_virt_system,
3804 .fetch = kvm_fetch_guest_virt, 3804 .fetch = kvm_fetch_guest_virt,
3805 .read_emulated = emulator_read_emulated, 3805 .read_emulated = emulator_read_emulated,
3806 .write_emulated = emulator_write_emulated, 3806 .write_emulated = emulator_write_emulated,
3807 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3807 .cmpxchg_emulated = emulator_cmpxchg_emulated,
3808 .pio_in_emulated = emulator_pio_in_emulated, 3808 .pio_in_emulated = emulator_pio_in_emulated,
3809 .pio_out_emulated = emulator_pio_out_emulated, 3809 .pio_out_emulated = emulator_pio_out_emulated,
3810 .get_cached_descriptor = emulator_get_cached_descriptor, 3810 .get_cached_descriptor = emulator_get_cached_descriptor,
3811 .set_cached_descriptor = emulator_set_cached_descriptor, 3811 .set_cached_descriptor = emulator_set_cached_descriptor,
3812 .get_segment_selector = emulator_get_segment_selector, 3812 .get_segment_selector = emulator_get_segment_selector,
3813 .set_segment_selector = emulator_set_segment_selector, 3813 .set_segment_selector = emulator_set_segment_selector,
3814 .get_cached_segment_base = emulator_get_cached_segment_base, 3814 .get_cached_segment_base = emulator_get_cached_segment_base,
3815 .get_gdt = emulator_get_gdt, 3815 .get_gdt = emulator_get_gdt,
3816 .get_cr = emulator_get_cr, 3816 .get_cr = emulator_get_cr,
3817 .set_cr = emulator_set_cr, 3817 .set_cr = emulator_set_cr,
3818 .cpl = emulator_get_cpl, 3818 .cpl = emulator_get_cpl,
3819 .get_dr = emulator_get_dr, 3819 .get_dr = emulator_get_dr,
3820 .set_dr = emulator_set_dr, 3820 .set_dr = emulator_set_dr,
3821 .set_msr = kvm_set_msr, 3821 .set_msr = kvm_set_msr,
3822 .get_msr = kvm_get_msr, 3822 .get_msr = kvm_get_msr,
3823 }; 3823 };
3824 3824
3825 static void cache_all_regs(struct kvm_vcpu *vcpu) 3825 static void cache_all_regs(struct kvm_vcpu *vcpu)
3826 { 3826 {
3827 kvm_register_read(vcpu, VCPU_REGS_RAX); 3827 kvm_register_read(vcpu, VCPU_REGS_RAX);
3828 kvm_register_read(vcpu, VCPU_REGS_RSP); 3828 kvm_register_read(vcpu, VCPU_REGS_RSP);
3829 kvm_register_read(vcpu, VCPU_REGS_RIP); 3829 kvm_register_read(vcpu, VCPU_REGS_RIP);
3830 vcpu->arch.regs_dirty = ~0; 3830 vcpu->arch.regs_dirty = ~0;
3831 } 3831 }
3832 3832
3833 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) 3833 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
3834 { 3834 {
3835 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); 3835 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
3836 /* 3836 /*
3837 * an sti; sti; sequence only disable interrupts for the first 3837 * an sti; sti; sequence only disable interrupts for the first
3838 * instruction. So, if the last instruction, be it emulated or 3838 * instruction. So, if the last instruction, be it emulated or
3839 * not, left the system with the INT_STI flag enabled, it 3839 * not, left the system with the INT_STI flag enabled, it
3840 * means that the last instruction is an sti. We should not 3840 * means that the last instruction is an sti. We should not
3841 * leave the flag on in this case. The same goes for mov ss 3841 * leave the flag on in this case. The same goes for mov ss
3842 */ 3842 */
3843 if (!(int_shadow & mask)) 3843 if (!(int_shadow & mask))
3844 kvm_x86_ops->set_interrupt_shadow(vcpu, mask); 3844 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
3845 } 3845 }
3846 3846
3847 static void inject_emulated_exception(struct kvm_vcpu *vcpu) 3847 static void inject_emulated_exception(struct kvm_vcpu *vcpu)
3848 { 3848 {
3849 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 3849 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
3850 if (ctxt->exception == PF_VECTOR) 3850 if (ctxt->exception == PF_VECTOR)
3851 kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code); 3851 kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code);
3852 else if (ctxt->error_code_valid) 3852 else if (ctxt->error_code_valid)
3853 kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); 3853 kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code);
3854 else 3854 else
3855 kvm_queue_exception(vcpu, ctxt->exception); 3855 kvm_queue_exception(vcpu, ctxt->exception);
3856 } 3856 }
3857 3857
3858 static int handle_emulation_failure(struct kvm_vcpu *vcpu) 3858 static int handle_emulation_failure(struct kvm_vcpu *vcpu)
3859 { 3859 {
3860 ++vcpu->stat.insn_emulation_fail; 3860 ++vcpu->stat.insn_emulation_fail;
3861 trace_kvm_emulate_insn_failed(vcpu); 3861 trace_kvm_emulate_insn_failed(vcpu);
3862 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3862 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3863 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 3863 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3864 vcpu->run->internal.ndata = 0; 3864 vcpu->run->internal.ndata = 0;
3865 kvm_queue_exception(vcpu, UD_VECTOR); 3865 kvm_queue_exception(vcpu, UD_VECTOR);
3866 return EMULATE_FAIL; 3866 return EMULATE_FAIL;
3867 } 3867 }
3868 3868
3869 int emulate_instruction(struct kvm_vcpu *vcpu, 3869 int emulate_instruction(struct kvm_vcpu *vcpu,
3870 unsigned long cr2, 3870 unsigned long cr2,
3871 u16 error_code, 3871 u16 error_code,
3872 int emulation_type) 3872 int emulation_type)
3873 { 3873 {
3874 int r; 3874 int r;
3875 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 3875 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
3876 3876
3877 kvm_clear_exception_queue(vcpu); 3877 kvm_clear_exception_queue(vcpu);
3878 vcpu->arch.mmio_fault_cr2 = cr2; 3878 vcpu->arch.mmio_fault_cr2 = cr2;
3879 /* 3879 /*
3880 * TODO: fix emulate.c to use guest_read/write_register 3880 * TODO: fix emulate.c to use guest_read/write_register
3881 * instead of direct ->regs accesses, can save hundred cycles 3881 * instead of direct ->regs accesses, can save hundred cycles
3882 * on Intel for instructions that don't read/change RSP, for 3882 * on Intel for instructions that don't read/change RSP, for
3883 * for example. 3883 * for example.
3884 */ 3884 */
3885 cache_all_regs(vcpu); 3885 cache_all_regs(vcpu);
3886 3886
3887 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 3887 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
3888 int cs_db, cs_l; 3888 int cs_db, cs_l;
3889 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3889 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3890 3890
3891 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3891 vcpu->arch.emulate_ctxt.vcpu = vcpu;
3892 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); 3892 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
3893 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); 3893 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
3894 vcpu->arch.emulate_ctxt.mode = 3894 vcpu->arch.emulate_ctxt.mode =
3895 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 3895 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
3896 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3896 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
3897 ? X86EMUL_MODE_VM86 : cs_l 3897 ? X86EMUL_MODE_VM86 : cs_l
3898 ? X86EMUL_MODE_PROT64 : cs_db 3898 ? X86EMUL_MODE_PROT64 : cs_db
3899 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3899 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
3900 memset(c, 0, sizeof(struct decode_cache)); 3900 memset(c, 0, sizeof(struct decode_cache));
3901 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 3901 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
3902 vcpu->arch.emulate_ctxt.interruptibility = 0; 3902 vcpu->arch.emulate_ctxt.interruptibility = 0;
3903 vcpu->arch.emulate_ctxt.exception = -1; 3903 vcpu->arch.emulate_ctxt.exception = -1;
3904 3904
3905 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3905 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3906 trace_kvm_emulate_insn_start(vcpu); 3906 trace_kvm_emulate_insn_start(vcpu);
3907 3907
3908 /* Only allow emulation of specific instructions on #UD 3908 /* Only allow emulation of specific instructions on #UD
3909 * (namely VMMCALL, sysenter, sysexit, syscall)*/ 3909 * (namely VMMCALL, sysenter, sysexit, syscall)*/
3910 if (emulation_type & EMULTYPE_TRAP_UD) { 3910 if (emulation_type & EMULTYPE_TRAP_UD) {
3911 if (!c->twobyte) 3911 if (!c->twobyte)
3912 return EMULATE_FAIL; 3912 return EMULATE_FAIL;
3913 switch (c->b) { 3913 switch (c->b) {
3914 case 0x01: /* VMMCALL */ 3914 case 0x01: /* VMMCALL */
3915 if (c->modrm_mod != 3 || c->modrm_rm != 1) 3915 if (c->modrm_mod != 3 || c->modrm_rm != 1)
3916 return EMULATE_FAIL; 3916 return EMULATE_FAIL;
3917 break; 3917 break;
3918 case 0x34: /* sysenter */ 3918 case 0x34: /* sysenter */
3919 case 0x35: /* sysexit */ 3919 case 0x35: /* sysexit */
3920 if (c->modrm_mod != 0 || c->modrm_rm != 0) 3920 if (c->modrm_mod != 0 || c->modrm_rm != 0)
3921 return EMULATE_FAIL; 3921 return EMULATE_FAIL;
3922 break; 3922 break;
3923 case 0x05: /* syscall */ 3923 case 0x05: /* syscall */
3924 if (c->modrm_mod != 0 || c->modrm_rm != 0) 3924 if (c->modrm_mod != 0 || c->modrm_rm != 0)
3925 return EMULATE_FAIL; 3925 return EMULATE_FAIL;
3926 break; 3926 break;
3927 default: 3927 default:
3928 return EMULATE_FAIL; 3928 return EMULATE_FAIL;
3929 } 3929 }
3930 3930
3931 if (!(c->modrm_reg == 0 || c->modrm_reg == 3)) 3931 if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
3932 return EMULATE_FAIL; 3932 return EMULATE_FAIL;
3933 } 3933 }
3934 3934
3935 ++vcpu->stat.insn_emulation; 3935 ++vcpu->stat.insn_emulation;
3936 if (r) { 3936 if (r) {
3937 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3937 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3938 return EMULATE_DONE; 3938 return EMULATE_DONE;
3939 if (emulation_type & EMULTYPE_SKIP) 3939 if (emulation_type & EMULTYPE_SKIP)
3940 return EMULATE_FAIL; 3940 return EMULATE_FAIL;
3941 return handle_emulation_failure(vcpu); 3941 return handle_emulation_failure(vcpu);
3942 } 3942 }
3943 } 3943 }
3944 3944
3945 if (emulation_type & EMULTYPE_SKIP) { 3945 if (emulation_type & EMULTYPE_SKIP) {
3946 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip); 3946 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
3947 return EMULATE_DONE; 3947 return EMULATE_DONE;
3948 } 3948 }
3949 3949
3950 /* this is needed for vmware backdor interface to work since it 3950 /* this is needed for vmware backdor interface to work since it
3951 changes registers values during IO operation */ 3951 changes registers values during IO operation */
3952 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 3952 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
3953 3953
3954 restart: 3954 restart:
3955 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3955 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3956 3956
3957 if (r) { /* emulation failed */ 3957 if (r) { /* emulation failed */
3958 /* 3958 /*
3959 * if emulation was due to access to shadowed page table 3959 * if emulation was due to access to shadowed page table
3960 * and it failed try to unshadow page and re-entetr the 3960 * and it failed try to unshadow page and re-entetr the
3961 * guest to let CPU execute the instruction. 3961 * guest to let CPU execute the instruction.
3962 */ 3962 */
3963 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3963 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3964 return EMULATE_DONE; 3964 return EMULATE_DONE;
3965 3965
3966 return handle_emulation_failure(vcpu); 3966 return handle_emulation_failure(vcpu);
3967 } 3967 }
3968 3968
3969 toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); 3969 toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
3970 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 3970 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
3971 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 3971 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
3972 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 3972 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
3973 3973
3974 if (vcpu->arch.emulate_ctxt.exception >= 0) { 3974 if (vcpu->arch.emulate_ctxt.exception >= 0) {
3975 inject_emulated_exception(vcpu); 3975 inject_emulated_exception(vcpu);
3976 return EMULATE_DONE; 3976 return EMULATE_DONE;
3977 } 3977 }
3978 3978
3979 if (vcpu->arch.pio.count) { 3979 if (vcpu->arch.pio.count) {
3980 if (!vcpu->arch.pio.in) 3980 if (!vcpu->arch.pio.in)
3981 vcpu->arch.pio.count = 0; 3981 vcpu->arch.pio.count = 0;
3982 return EMULATE_DO_MMIO; 3982 return EMULATE_DO_MMIO;
3983 } 3983 }
3984 3984
3985 if (vcpu->mmio_needed) { 3985 if (vcpu->mmio_needed) {
3986 if (vcpu->mmio_is_write) 3986 if (vcpu->mmio_is_write)
3987 vcpu->mmio_needed = 0; 3987 vcpu->mmio_needed = 0;
3988 return EMULATE_DO_MMIO; 3988 return EMULATE_DO_MMIO;
3989 } 3989 }
3990 3990
3991 if (vcpu->arch.emulate_ctxt.restart) 3991 if (vcpu->arch.emulate_ctxt.restart)
3992 goto restart; 3992 goto restart;
3993 3993
3994 return EMULATE_DONE; 3994 return EMULATE_DONE;
3995 } 3995 }
3996 EXPORT_SYMBOL_GPL(emulate_instruction); 3996 EXPORT_SYMBOL_GPL(emulate_instruction);
3997 3997
3998 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) 3998 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
3999 { 3999 {
4000 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); 4000 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
4001 int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); 4001 int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
4002 /* do not return to emulator after return from userspace */ 4002 /* do not return to emulator after return from userspace */
4003 vcpu->arch.pio.count = 0; 4003 vcpu->arch.pio.count = 0;
4004 return ret; 4004 return ret;
4005 } 4005 }
4006 EXPORT_SYMBOL_GPL(kvm_fast_pio_out); 4006 EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
4007 4007
4008 static void bounce_off(void *info) 4008 static void bounce_off(void *info)
4009 { 4009 {
4010 /* nothing */ 4010 /* nothing */
4011 } 4011 }
4012 4012
4013 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 4013 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
4014 void *data) 4014 void *data)
4015 { 4015 {
4016 struct cpufreq_freqs *freq = data; 4016 struct cpufreq_freqs *freq = data;
4017 struct kvm *kvm; 4017 struct kvm *kvm;
4018 struct kvm_vcpu *vcpu; 4018 struct kvm_vcpu *vcpu;
4019 int i, send_ipi = 0; 4019 int i, send_ipi = 0;
4020 4020
4021 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) 4021 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
4022 return 0; 4022 return 0;
4023 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) 4023 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
4024 return 0; 4024 return 0;
4025 per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; 4025 per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
4026 4026
4027 spin_lock(&kvm_lock); 4027 spin_lock(&kvm_lock);
4028 list_for_each_entry(kvm, &vm_list, vm_list) { 4028 list_for_each_entry(kvm, &vm_list, vm_list) {
4029 kvm_for_each_vcpu(i, vcpu, kvm) { 4029 kvm_for_each_vcpu(i, vcpu, kvm) {
4030 if (vcpu->cpu != freq->cpu) 4030 if (vcpu->cpu != freq->cpu)
4031 continue; 4031 continue;
4032 if (!kvm_request_guest_time_update(vcpu)) 4032 if (!kvm_request_guest_time_update(vcpu))
4033 continue; 4033 continue;
4034 if (vcpu->cpu != smp_processor_id()) 4034 if (vcpu->cpu != smp_processor_id())
4035 send_ipi++; 4035 send_ipi++;
4036 } 4036 }
4037 } 4037 }
4038 spin_unlock(&kvm_lock); 4038 spin_unlock(&kvm_lock);
4039 4039
4040 if (freq->old < freq->new && send_ipi) { 4040 if (freq->old < freq->new && send_ipi) {
4041 /* 4041 /*
4042 * We upscale the frequency. Must make the guest 4042 * We upscale the frequency. Must make the guest
4043 * doesn't see old kvmclock values while running with 4043 * doesn't see old kvmclock values while running with
4044 * the new frequency, otherwise we risk the guest sees 4044 * the new frequency, otherwise we risk the guest sees
4045 * time go backwards. 4045 * time go backwards.
4046 * 4046 *
4047 * In case we update the frequency for another cpu 4047 * In case we update the frequency for another cpu
4048 * (which might be in guest context) send an interrupt 4048 * (which might be in guest context) send an interrupt
4049 * to kick the cpu out of guest context. Next time 4049 * to kick the cpu out of guest context. Next time
4050 * guest context is entered kvmclock will be updated, 4050 * guest context is entered kvmclock will be updated,
4051 * so the guest will not see stale values. 4051 * so the guest will not see stale values.
4052 */ 4052 */
4053 smp_call_function_single(freq->cpu, bounce_off, NULL, 1); 4053 smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
4054 } 4054 }
4055 return 0; 4055 return 0;
4056 } 4056 }
4057 4057
4058 static struct notifier_block kvmclock_cpufreq_notifier_block = { 4058 static struct notifier_block kvmclock_cpufreq_notifier_block = {
4059 .notifier_call = kvmclock_cpufreq_notifier 4059 .notifier_call = kvmclock_cpufreq_notifier
4060 }; 4060 };
4061 4061
4062 static void kvm_timer_init(void) 4062 static void kvm_timer_init(void)
4063 { 4063 {
4064 int cpu; 4064 int cpu;
4065 4065
4066 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { 4066 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
4067 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 4067 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4068 CPUFREQ_TRANSITION_NOTIFIER); 4068 CPUFREQ_TRANSITION_NOTIFIER);
4069 for_each_online_cpu(cpu) { 4069 for_each_online_cpu(cpu) {
4070 unsigned long khz = cpufreq_get(cpu); 4070 unsigned long khz = cpufreq_get(cpu);
4071 if (!khz) 4071 if (!khz)
4072 khz = tsc_khz; 4072 khz = tsc_khz;
4073 per_cpu(cpu_tsc_khz, cpu) = khz; 4073 per_cpu(cpu_tsc_khz, cpu) = khz;
4074 } 4074 }
4075 } else { 4075 } else {
4076 for_each_possible_cpu(cpu) 4076 for_each_possible_cpu(cpu)
4077 per_cpu(cpu_tsc_khz, cpu) = tsc_khz; 4077 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
4078 } 4078 }
4079 } 4079 }
4080 4080
4081 static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); 4081 static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
4082 4082
4083 static int kvm_is_in_guest(void) 4083 static int kvm_is_in_guest(void)
4084 { 4084 {
4085 return percpu_read(current_vcpu) != NULL; 4085 return percpu_read(current_vcpu) != NULL;
4086 } 4086 }
4087 4087
4088 static int kvm_is_user_mode(void) 4088 static int kvm_is_user_mode(void)
4089 { 4089 {
4090 int user_mode = 3; 4090 int user_mode = 3;
4091 4091
4092 if (percpu_read(current_vcpu)) 4092 if (percpu_read(current_vcpu))
4093 user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu)); 4093 user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
4094 4094
4095 return user_mode != 0; 4095 return user_mode != 0;
4096 } 4096 }
4097 4097
4098 static unsigned long kvm_get_guest_ip(void) 4098 static unsigned long kvm_get_guest_ip(void)
4099 { 4099 {
4100 unsigned long ip = 0; 4100 unsigned long ip = 0;
4101 4101
4102 if (percpu_read(current_vcpu)) 4102 if (percpu_read(current_vcpu))
4103 ip = kvm_rip_read(percpu_read(current_vcpu)); 4103 ip = kvm_rip_read(percpu_read(current_vcpu));
4104 4104
4105 return ip; 4105 return ip;
4106 } 4106 }
4107 4107
4108 static struct perf_guest_info_callbacks kvm_guest_cbs = { 4108 static struct perf_guest_info_callbacks kvm_guest_cbs = {
4109 .is_in_guest = kvm_is_in_guest, 4109 .is_in_guest = kvm_is_in_guest,
4110 .is_user_mode = kvm_is_user_mode, 4110 .is_user_mode = kvm_is_user_mode,
4111 .get_guest_ip = kvm_get_guest_ip, 4111 .get_guest_ip = kvm_get_guest_ip,
4112 }; 4112 };
4113 4113
4114 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) 4114 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
4115 { 4115 {
4116 percpu_write(current_vcpu, vcpu); 4116 percpu_write(current_vcpu, vcpu);
4117 } 4117 }
4118 EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); 4118 EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
4119 4119
4120 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) 4120 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
4121 { 4121 {
4122 percpu_write(current_vcpu, NULL); 4122 percpu_write(current_vcpu, NULL);
4123 } 4123 }
4124 EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); 4124 EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
4125 4125
4126 int kvm_arch_init(void *opaque) 4126 int kvm_arch_init(void *opaque)
4127 { 4127 {
4128 int r; 4128 int r;
4129 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; 4129 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
4130 4130
4131 if (kvm_x86_ops) { 4131 if (kvm_x86_ops) {
4132 printk(KERN_ERR "kvm: already loaded the other module\n"); 4132 printk(KERN_ERR "kvm: already loaded the other module\n");
4133 r = -EEXIST; 4133 r = -EEXIST;
4134 goto out; 4134 goto out;
4135 } 4135 }
4136 4136
4137 if (!ops->cpu_has_kvm_support()) { 4137 if (!ops->cpu_has_kvm_support()) {
4138 printk(KERN_ERR "kvm: no hardware support\n"); 4138 printk(KERN_ERR "kvm: no hardware support\n");
4139 r = -EOPNOTSUPP; 4139 r = -EOPNOTSUPP;
4140 goto out; 4140 goto out;
4141 } 4141 }
4142 if (ops->disabled_by_bios()) { 4142 if (ops->disabled_by_bios()) {
4143 printk(KERN_ERR "kvm: disabled by bios\n"); 4143 printk(KERN_ERR "kvm: disabled by bios\n");
4144 r = -EOPNOTSUPP; 4144 r = -EOPNOTSUPP;
4145 goto out; 4145 goto out;
4146 } 4146 }
4147 4147
4148 r = kvm_mmu_module_init(); 4148 r = kvm_mmu_module_init();
4149 if (r) 4149 if (r)
4150 goto out; 4150 goto out;
4151 4151
4152 kvm_init_msr_list(); 4152 kvm_init_msr_list();
4153 4153
4154 kvm_x86_ops = ops; 4154 kvm_x86_ops = ops;
4155 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 4155 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
4156 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 4156 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
4157 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 4157 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
4158 PT_DIRTY_MASK, PT64_NX_MASK, 0); 4158 PT_DIRTY_MASK, PT64_NX_MASK, 0);
4159 4159
4160 kvm_timer_init(); 4160 kvm_timer_init();
4161 4161
4162 perf_register_guest_info_callbacks(&kvm_guest_cbs); 4162 perf_register_guest_info_callbacks(&kvm_guest_cbs);
4163 4163
4164 return 0; 4164 return 0;
4165 4165
4166 out: 4166 out:
4167 return r; 4167 return r;
4168 } 4168 }
4169 4169
4170 void kvm_arch_exit(void) 4170 void kvm_arch_exit(void)
4171 { 4171 {
4172 perf_unregister_guest_info_callbacks(&kvm_guest_cbs); 4172 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
4173 4173
4174 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 4174 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4175 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, 4175 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4176 CPUFREQ_TRANSITION_NOTIFIER); 4176 CPUFREQ_TRANSITION_NOTIFIER);
4177 kvm_x86_ops = NULL; 4177 kvm_x86_ops = NULL;
4178 kvm_mmu_module_exit(); 4178 kvm_mmu_module_exit();
4179 } 4179 }
4180 4180
4181 int kvm_emulate_halt(struct kvm_vcpu *vcpu) 4181 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
4182 { 4182 {
4183 ++vcpu->stat.halt_exits; 4183 ++vcpu->stat.halt_exits;
4184 if (irqchip_in_kernel(vcpu->kvm)) { 4184 if (irqchip_in_kernel(vcpu->kvm)) {
4185 vcpu->arch.mp_state = KVM_MP_STATE_HALTED; 4185 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
4186 return 1; 4186 return 1;
4187 } else { 4187 } else {
4188 vcpu->run->exit_reason = KVM_EXIT_HLT; 4188 vcpu->run->exit_reason = KVM_EXIT_HLT;
4189 return 0; 4189 return 0;
4190 } 4190 }
4191 } 4191 }
4192 EXPORT_SYMBOL_GPL(kvm_emulate_halt); 4192 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
4193 4193
4194 static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, 4194 static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
4195 unsigned long a1) 4195 unsigned long a1)
4196 { 4196 {
4197 if (is_long_mode(vcpu)) 4197 if (is_long_mode(vcpu))
4198 return a0; 4198 return a0;
4199 else 4199 else
4200 return a0 | ((gpa_t)a1 << 32); 4200 return a0 | ((gpa_t)a1 << 32);
4201 } 4201 }
4202 4202
4203 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 4203 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
4204 { 4204 {
4205 u64 param, ingpa, outgpa, ret; 4205 u64 param, ingpa, outgpa, ret;
4206 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; 4206 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
4207 bool fast, longmode; 4207 bool fast, longmode;
4208 int cs_db, cs_l; 4208 int cs_db, cs_l;
4209 4209
4210 /* 4210 /*
4211 * hypercall generates UD from non zero cpl and real mode 4211 * hypercall generates UD from non zero cpl and real mode
4212 * per HYPER-V spec 4212 * per HYPER-V spec
4213 */ 4213 */
4214 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 4214 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
4215 kvm_queue_exception(vcpu, UD_VECTOR); 4215 kvm_queue_exception(vcpu, UD_VECTOR);
4216 return 0; 4216 return 0;
4217 } 4217 }
4218 4218
4219 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 4219 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4220 longmode = is_long_mode(vcpu) && cs_l == 1; 4220 longmode = is_long_mode(vcpu) && cs_l == 1;
4221 4221
4222 if (!longmode) { 4222 if (!longmode) {
4223 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | 4223 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
4224 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); 4224 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
4225 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | 4225 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
4226 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); 4226 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
4227 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | 4227 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
4228 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); 4228 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
4229 } 4229 }
4230 #ifdef CONFIG_X86_64 4230 #ifdef CONFIG_X86_64
4231 else { 4231 else {
4232 param = kvm_register_read(vcpu, VCPU_REGS_RCX); 4232 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
4233 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); 4233 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
4234 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); 4234 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
4235 } 4235 }
4236 #endif 4236 #endif
4237 4237
4238 code = param & 0xffff; 4238 code = param & 0xffff;
4239 fast = (param >> 16) & 0x1; 4239 fast = (param >> 16) & 0x1;
4240 rep_cnt = (param >> 32) & 0xfff; 4240 rep_cnt = (param >> 32) & 0xfff;
4241 rep_idx = (param >> 48) & 0xfff; 4241 rep_idx = (param >> 48) & 0xfff;
4242 4242
4243 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 4243 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
4244 4244
4245 switch (code) { 4245 switch (code) {
4246 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: 4246 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
4247 kvm_vcpu_on_spin(vcpu); 4247 kvm_vcpu_on_spin(vcpu);
4248 break; 4248 break;
4249 default: 4249 default:
4250 res = HV_STATUS_INVALID_HYPERCALL_CODE; 4250 res = HV_STATUS_INVALID_HYPERCALL_CODE;
4251 break; 4251 break;
4252 } 4252 }
4253 4253
4254 ret = res | (((u64)rep_done & 0xfff) << 32); 4254 ret = res | (((u64)rep_done & 0xfff) << 32);
4255 if (longmode) { 4255 if (longmode) {
4256 kvm_register_write(vcpu, VCPU_REGS_RAX, ret); 4256 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
4257 } else { 4257 } else {
4258 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); 4258 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
4259 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); 4259 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
4260 } 4260 }
4261 4261
4262 return 1; 4262 return 1;
4263 } 4263 }
4264 4264
4265 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 4265 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
4266 { 4266 {
4267 unsigned long nr, a0, a1, a2, a3, ret; 4267 unsigned long nr, a0, a1, a2, a3, ret;
4268 int r = 1; 4268 int r = 1;
4269 4269
4270 if (kvm_hv_hypercall_enabled(vcpu->kvm)) 4270 if (kvm_hv_hypercall_enabled(vcpu->kvm))
4271 return kvm_hv_hypercall(vcpu); 4271 return kvm_hv_hypercall(vcpu);
4272 4272
4273 nr = kvm_register_read(vcpu, VCPU_REGS_RAX); 4273 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
4274 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); 4274 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
4275 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); 4275 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
4276 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); 4276 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
4277 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); 4277 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
4278 4278
4279 trace_kvm_hypercall(nr, a0, a1, a2, a3); 4279 trace_kvm_hypercall(nr, a0, a1, a2, a3);
4280 4280
4281 if (!is_long_mode(vcpu)) { 4281 if (!is_long_mode(vcpu)) {
4282 nr &= 0xFFFFFFFF; 4282 nr &= 0xFFFFFFFF;
4283 a0 &= 0xFFFFFFFF; 4283 a0 &= 0xFFFFFFFF;
4284 a1 &= 0xFFFFFFFF; 4284 a1 &= 0xFFFFFFFF;
4285 a2 &= 0xFFFFFFFF; 4285 a2 &= 0xFFFFFFFF;
4286 a3 &= 0xFFFFFFFF; 4286 a3 &= 0xFFFFFFFF;
4287 } 4287 }
4288 4288
4289 if (kvm_x86_ops->get_cpl(vcpu) != 0) { 4289 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
4290 ret = -KVM_EPERM; 4290 ret = -KVM_EPERM;
4291 goto out; 4291 goto out;
4292 } 4292 }
4293 4293
4294 switch (nr) { 4294 switch (nr) {
4295 case KVM_HC_VAPIC_POLL_IRQ: 4295 case KVM_HC_VAPIC_POLL_IRQ:
4296 ret = 0; 4296 ret = 0;
4297 break; 4297 break;
4298 case KVM_HC_MMU_OP: 4298 case KVM_HC_MMU_OP:
4299 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret); 4299 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
4300 break; 4300 break;
4301 default: 4301 default:
4302 ret = -KVM_ENOSYS; 4302 ret = -KVM_ENOSYS;
4303 break; 4303 break;
4304 } 4304 }
4305 out: 4305 out:
4306 kvm_register_write(vcpu, VCPU_REGS_RAX, ret); 4306 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
4307 ++vcpu->stat.hypercalls; 4307 ++vcpu->stat.hypercalls;
4308 return r; 4308 return r;
4309 } 4309 }
4310 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 4310 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
4311 4311
4312 int kvm_fix_hypercall(struct kvm_vcpu *vcpu) 4312 int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
4313 { 4313 {
4314 char instruction[3]; 4314 char instruction[3];
4315 unsigned long rip = kvm_rip_read(vcpu); 4315 unsigned long rip = kvm_rip_read(vcpu);
4316 4316
4317 /* 4317 /*
4318 * Blow out the MMU to ensure that no other VCPU has an active mapping 4318 * Blow out the MMU to ensure that no other VCPU has an active mapping
4319 * to ensure that the updated hypercall appears atomically across all 4319 * to ensure that the updated hypercall appears atomically across all
4320 * VCPUs. 4320 * VCPUs.
4321 */ 4321 */
4322 kvm_mmu_zap_all(vcpu->kvm); 4322 kvm_mmu_zap_all(vcpu->kvm);
4323 4323
4324 kvm_x86_ops->patch_hypercall(vcpu, instruction); 4324 kvm_x86_ops->patch_hypercall(vcpu, instruction);
4325 4325
4326 return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); 4326 return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
4327 } 4327 }
4328 4328
4329 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4329 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4330 { 4330 {
4331 struct desc_ptr dt = { limit, base }; 4331 struct desc_ptr dt = { limit, base };
4332 4332
4333 kvm_x86_ops->set_gdt(vcpu, &dt); 4333 kvm_x86_ops->set_gdt(vcpu, &dt);
4334 } 4334 }
4335 4335
4336 void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4336 void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4337 { 4337 {
4338 struct desc_ptr dt = { limit, base }; 4338 struct desc_ptr dt = { limit, base };
4339 4339
4340 kvm_x86_ops->set_idt(vcpu, &dt); 4340 kvm_x86_ops->set_idt(vcpu, &dt);
4341 } 4341 }
4342 4342
4343 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) 4343 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
4344 { 4344 {
4345 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; 4345 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
4346 int j, nent = vcpu->arch.cpuid_nent; 4346 int j, nent = vcpu->arch.cpuid_nent;
4347 4347
4348 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; 4348 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
4349 /* when no next entry is found, the current entry[i] is reselected */ 4349 /* when no next entry is found, the current entry[i] is reselected */
4350 for (j = i + 1; ; j = (j + 1) % nent) { 4350 for (j = i + 1; ; j = (j + 1) % nent) {
4351 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; 4351 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
4352 if (ej->function == e->function) { 4352 if (ej->function == e->function) {
4353 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; 4353 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
4354 return j; 4354 return j;
4355 } 4355 }
4356 } 4356 }
4357 return 0; /* silence gcc, even though control never reaches here */ 4357 return 0; /* silence gcc, even though control never reaches here */
4358 } 4358 }
4359 4359
4360 /* find an entry with matching function, matching index (if needed), and that 4360 /* find an entry with matching function, matching index (if needed), and that
4361 * should be read next (if it's stateful) */ 4361 * should be read next (if it's stateful) */
4362 static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, 4362 static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
4363 u32 function, u32 index) 4363 u32 function, u32 index)
4364 { 4364 {
4365 if (e->function != function) 4365 if (e->function != function)
4366 return 0; 4366 return 0;
4367 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) 4367 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
4368 return 0; 4368 return 0;
4369 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && 4369 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
4370 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) 4370 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
4371 return 0; 4371 return 0;
4372 return 1; 4372 return 1;
4373 } 4373 }
4374 4374
4375 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 4375 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
4376 u32 function, u32 index) 4376 u32 function, u32 index)
4377 { 4377 {
4378 int i; 4378 int i;
4379 struct kvm_cpuid_entry2 *best = NULL; 4379 struct kvm_cpuid_entry2 *best = NULL;
4380 4380
4381 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { 4381 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
4382 struct kvm_cpuid_entry2 *e; 4382 struct kvm_cpuid_entry2 *e;
4383 4383
4384 e = &vcpu->arch.cpuid_entries[i]; 4384 e = &vcpu->arch.cpuid_entries[i];
4385 if (is_matching_cpuid_entry(e, function, index)) { 4385 if (is_matching_cpuid_entry(e, function, index)) {
4386 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) 4386 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
4387 move_to_next_stateful_cpuid_entry(vcpu, i); 4387 move_to_next_stateful_cpuid_entry(vcpu, i);
4388 best = e; 4388 best = e;
4389 break; 4389 break;
4390 } 4390 }
4391 /* 4391 /*
4392 * Both basic or both extended? 4392 * Both basic or both extended?
4393 */ 4393 */
4394 if (((e->function ^ function) & 0x80000000) == 0) 4394 if (((e->function ^ function) & 0x80000000) == 0)
4395 if (!best || e->function > best->function) 4395 if (!best || e->function > best->function)
4396 best = e; 4396 best = e;
4397 } 4397 }
4398 return best; 4398 return best;
4399 } 4399 }
4400 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); 4400 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
4401 4401
4402 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) 4402 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
4403 { 4403 {
4404 struct kvm_cpuid_entry2 *best; 4404 struct kvm_cpuid_entry2 *best;
4405 4405
4406 best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); 4406 best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
4407 if (!best || best->eax < 0x80000008) 4407 if (!best || best->eax < 0x80000008)
4408 goto not_found; 4408 goto not_found;
4409 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); 4409 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
4410 if (best) 4410 if (best)
4411 return best->eax & 0xff; 4411 return best->eax & 0xff;
4412 not_found: 4412 not_found:
4413 return 36; 4413 return 36;
4414 } 4414 }
4415 4415
4416 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 4416 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
4417 { 4417 {
4418 u32 function, index; 4418 u32 function, index;
4419 struct kvm_cpuid_entry2 *best; 4419 struct kvm_cpuid_entry2 *best;
4420 4420
4421 function = kvm_register_read(vcpu, VCPU_REGS_RAX); 4421 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
4422 index = kvm_register_read(vcpu, VCPU_REGS_RCX); 4422 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
4423 kvm_register_write(vcpu, VCPU_REGS_RAX, 0); 4423 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
4424 kvm_register_write(vcpu, VCPU_REGS_RBX, 0); 4424 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
4425 kvm_register_write(vcpu, VCPU_REGS_RCX, 0); 4425 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
4426 kvm_register_write(vcpu, VCPU_REGS_RDX, 0); 4426 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
4427 best = kvm_find_cpuid_entry(vcpu, function, index); 4427 best = kvm_find_cpuid_entry(vcpu, function, index);
4428 if (best) { 4428 if (best) {
4429 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); 4429 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
4430 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); 4430 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
4431 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); 4431 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
4432 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); 4432 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
4433 } 4433 }
4434 kvm_x86_ops->skip_emulated_instruction(vcpu); 4434 kvm_x86_ops->skip_emulated_instruction(vcpu);
4435 trace_kvm_cpuid(function, 4435 trace_kvm_cpuid(function,
4436 kvm_register_read(vcpu, VCPU_REGS_RAX), 4436 kvm_register_read(vcpu, VCPU_REGS_RAX),
4437 kvm_register_read(vcpu, VCPU_REGS_RBX), 4437 kvm_register_read(vcpu, VCPU_REGS_RBX),
4438 kvm_register_read(vcpu, VCPU_REGS_RCX), 4438 kvm_register_read(vcpu, VCPU_REGS_RCX),
4439 kvm_register_read(vcpu, VCPU_REGS_RDX)); 4439 kvm_register_read(vcpu, VCPU_REGS_RDX));
4440 } 4440 }
4441 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 4441 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
4442 4442
4443 /* 4443 /*
4444 * Check if userspace requested an interrupt window, and that the 4444 * Check if userspace requested an interrupt window, and that the
4445 * interrupt window is open. 4445 * interrupt window is open.
4446 * 4446 *
4447 * No need to exit to userspace if we already have an interrupt queued. 4447 * No need to exit to userspace if we already have an interrupt queued.
4448 */ 4448 */
4449 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) 4449 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
4450 { 4450 {
4451 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && 4451 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
4452 vcpu->run->request_interrupt_window && 4452 vcpu->run->request_interrupt_window &&
4453 kvm_arch_interrupt_allowed(vcpu)); 4453 kvm_arch_interrupt_allowed(vcpu));
4454 } 4454 }
4455 4455
4456 static void post_kvm_run_save(struct kvm_vcpu *vcpu) 4456 static void post_kvm_run_save(struct kvm_vcpu *vcpu)
4457 { 4457 {
4458 struct kvm_run *kvm_run = vcpu->run; 4458 struct kvm_run *kvm_run = vcpu->run;
4459 4459
4460 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; 4460 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
4461 kvm_run->cr8 = kvm_get_cr8(vcpu); 4461 kvm_run->cr8 = kvm_get_cr8(vcpu);
4462 kvm_run->apic_base = kvm_get_apic_base(vcpu); 4462 kvm_run->apic_base = kvm_get_apic_base(vcpu);
4463 if (irqchip_in_kernel(vcpu->kvm)) 4463 if (irqchip_in_kernel(vcpu->kvm))
4464 kvm_run->ready_for_interrupt_injection = 1; 4464 kvm_run->ready_for_interrupt_injection = 1;
4465 else 4465 else
4466 kvm_run->ready_for_interrupt_injection = 4466 kvm_run->ready_for_interrupt_injection =
4467 kvm_arch_interrupt_allowed(vcpu) && 4467 kvm_arch_interrupt_allowed(vcpu) &&
4468 !kvm_cpu_has_interrupt(vcpu) && 4468 !kvm_cpu_has_interrupt(vcpu) &&
4469 !kvm_event_needs_reinjection(vcpu); 4469 !kvm_event_needs_reinjection(vcpu);
4470 } 4470 }
4471 4471
4472 static void vapic_enter(struct kvm_vcpu *vcpu) 4472 static void vapic_enter(struct kvm_vcpu *vcpu)
4473 { 4473 {
4474 struct kvm_lapic *apic = vcpu->arch.apic; 4474 struct kvm_lapic *apic = vcpu->arch.apic;
4475 struct page *page; 4475 struct page *page;
4476 4476
4477 if (!apic || !apic->vapic_addr) 4477 if (!apic || !apic->vapic_addr)
4478 return; 4478 return;
4479 4479
4480 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 4480 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
4481 4481
4482 vcpu->arch.apic->vapic_page = page; 4482 vcpu->arch.apic->vapic_page = page;
4483 } 4483 }
4484 4484
4485 static void vapic_exit(struct kvm_vcpu *vcpu) 4485 static void vapic_exit(struct kvm_vcpu *vcpu)
4486 { 4486 {
4487 struct kvm_lapic *apic = vcpu->arch.apic; 4487 struct kvm_lapic *apic = vcpu->arch.apic;
4488 int idx; 4488 int idx;
4489 4489
4490 if (!apic || !apic->vapic_addr) 4490 if (!apic || !apic->vapic_addr)
4491 return; 4491 return;
4492 4492
4493 idx = srcu_read_lock(&vcpu->kvm->srcu); 4493 idx = srcu_read_lock(&vcpu->kvm->srcu);
4494 kvm_release_page_dirty(apic->vapic_page); 4494 kvm_release_page_dirty(apic->vapic_page);
4495 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 4495 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
4496 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4496 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4497 } 4497 }
4498 4498
4499 static void update_cr8_intercept(struct kvm_vcpu *vcpu) 4499 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
4500 { 4500 {
4501 int max_irr, tpr; 4501 int max_irr, tpr;
4502 4502
4503 if (!kvm_x86_ops->update_cr8_intercept) 4503 if (!kvm_x86_ops->update_cr8_intercept)
4504 return; 4504 return;
4505 4505
4506 if (!vcpu->arch.apic) 4506 if (!vcpu->arch.apic)
4507 return; 4507 return;
4508 4508
4509 if (!vcpu->arch.apic->vapic_addr) 4509 if (!vcpu->arch.apic->vapic_addr)
4510 max_irr = kvm_lapic_find_highest_irr(vcpu); 4510 max_irr = kvm_lapic_find_highest_irr(vcpu);
4511 else 4511 else
4512 max_irr = -1; 4512 max_irr = -1;
4513 4513
4514 if (max_irr != -1) 4514 if (max_irr != -1)
4515 max_irr >>= 4; 4515 max_irr >>= 4;
4516 4516
4517 tpr = kvm_lapic_get_cr8(vcpu); 4517 tpr = kvm_lapic_get_cr8(vcpu);
4518 4518
4519 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); 4519 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
4520 } 4520 }
4521 4521
4522 static void inject_pending_event(struct kvm_vcpu *vcpu) 4522 static void inject_pending_event(struct kvm_vcpu *vcpu)
4523 { 4523 {
4524 /* try to reinject previous events if any */ 4524 /* try to reinject previous events if any */
4525 if (vcpu->arch.exception.pending) { 4525 if (vcpu->arch.exception.pending) {
4526 trace_kvm_inj_exception(vcpu->arch.exception.nr, 4526 trace_kvm_inj_exception(vcpu->arch.exception.nr,
4527 vcpu->arch.exception.has_error_code, 4527 vcpu->arch.exception.has_error_code,
4528 vcpu->arch.exception.error_code); 4528 vcpu->arch.exception.error_code);
4529 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 4529 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
4530 vcpu->arch.exception.has_error_code, 4530 vcpu->arch.exception.has_error_code,
4531 vcpu->arch.exception.error_code, 4531 vcpu->arch.exception.error_code,
4532 vcpu->arch.exception.reinject); 4532 vcpu->arch.exception.reinject);
4533 return; 4533 return;
4534 } 4534 }
4535 4535
4536 if (vcpu->arch.nmi_injected) { 4536 if (vcpu->arch.nmi_injected) {
4537 kvm_x86_ops->set_nmi(vcpu); 4537 kvm_x86_ops->set_nmi(vcpu);
4538 return; 4538 return;
4539 } 4539 }
4540 4540
4541 if (vcpu->arch.interrupt.pending) { 4541 if (vcpu->arch.interrupt.pending) {
4542 kvm_x86_ops->set_irq(vcpu); 4542 kvm_x86_ops->set_irq(vcpu);
4543 return; 4543 return;
4544 } 4544 }
4545 4545
4546 /* try to inject new event if pending */ 4546 /* try to inject new event if pending */
4547 if (vcpu->arch.nmi_pending) { 4547 if (vcpu->arch.nmi_pending) {
4548 if (kvm_x86_ops->nmi_allowed(vcpu)) { 4548 if (kvm_x86_ops->nmi_allowed(vcpu)) {
4549 vcpu->arch.nmi_pending = false; 4549 vcpu->arch.nmi_pending = false;
4550 vcpu->arch.nmi_injected = true; 4550 vcpu->arch.nmi_injected = true;
4551 kvm_x86_ops->set_nmi(vcpu); 4551 kvm_x86_ops->set_nmi(vcpu);
4552 } 4552 }
4553 } else if (kvm_cpu_has_interrupt(vcpu)) { 4553 } else if (kvm_cpu_has_interrupt(vcpu)) {
4554 if (kvm_x86_ops->interrupt_allowed(vcpu)) { 4554 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
4555 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), 4555 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
4556 false); 4556 false);
4557 kvm_x86_ops->set_irq(vcpu); 4557 kvm_x86_ops->set_irq(vcpu);
4558 } 4558 }
4559 } 4559 }
4560 } 4560 }
4561 4561
4562 static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 4562 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
4563 { 4563 {
4564 int r; 4564 int r;
4565 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 4565 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
4566 vcpu->run->request_interrupt_window; 4566 vcpu->run->request_interrupt_window;
4567 4567
4568 if (vcpu->requests) 4568 if (vcpu->requests)
4569 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 4569 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
4570 kvm_mmu_unload(vcpu); 4570 kvm_mmu_unload(vcpu);
4571 4571
4572 r = kvm_mmu_reload(vcpu); 4572 r = kvm_mmu_reload(vcpu);
4573 if (unlikely(r)) 4573 if (unlikely(r))
4574 goto out; 4574 goto out;
4575 4575
4576 if (vcpu->requests) { 4576 if (vcpu->requests) {
4577 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 4577 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
4578 __kvm_migrate_timers(vcpu); 4578 __kvm_migrate_timers(vcpu);
4579 if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) 4579 if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
4580 kvm_write_guest_time(vcpu); 4580 kvm_write_guest_time(vcpu);
4581 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) 4581 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
4582 kvm_mmu_sync_roots(vcpu); 4582 kvm_mmu_sync_roots(vcpu);
4583 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 4583 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
4584 kvm_x86_ops->tlb_flush(vcpu); 4584 kvm_x86_ops->tlb_flush(vcpu);
4585 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 4585 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
4586 &vcpu->requests)) { 4586 &vcpu->requests)) {
4587 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; 4587 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
4588 r = 0; 4588 r = 0;
4589 goto out; 4589 goto out;
4590 } 4590 }
4591 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { 4591 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
4592 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; 4592 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
4593 r = 0; 4593 r = 0;
4594 goto out; 4594 goto out;
4595 } 4595 }
4596 if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { 4596 if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
4597 vcpu->fpu_active = 0; 4597 vcpu->fpu_active = 0;
4598 kvm_x86_ops->fpu_deactivate(vcpu); 4598 kvm_x86_ops->fpu_deactivate(vcpu);
4599 } 4599 }
4600 } 4600 }
4601 4601
4602 preempt_disable(); 4602 preempt_disable();
4603 4603
4604 kvm_x86_ops->prepare_guest_switch(vcpu); 4604 kvm_x86_ops->prepare_guest_switch(vcpu);
4605 if (vcpu->fpu_active) 4605 if (vcpu->fpu_active)
4606 kvm_load_guest_fpu(vcpu); 4606 kvm_load_guest_fpu(vcpu);
4607 4607
4608 atomic_set(&vcpu->guest_mode, 1); 4608 atomic_set(&vcpu->guest_mode, 1);
4609 smp_wmb(); 4609 smp_wmb();
4610 4610
4611 local_irq_disable(); 4611 local_irq_disable();
4612 4612
4613 if (!atomic_read(&vcpu->guest_mode) || vcpu->requests 4613 if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
4614 || need_resched() || signal_pending(current)) { 4614 || need_resched() || signal_pending(current)) {
4615 atomic_set(&vcpu->guest_mode, 0); 4615 atomic_set(&vcpu->guest_mode, 0);
4616 smp_wmb(); 4616 smp_wmb();
4617 local_irq_enable(); 4617 local_irq_enable();
4618 preempt_enable(); 4618 preempt_enable();
4619 r = 1; 4619 r = 1;
4620 goto out; 4620 goto out;
4621 } 4621 }
4622 4622
4623 inject_pending_event(vcpu); 4623 inject_pending_event(vcpu);
4624 4624
4625 /* enable NMI/IRQ window open exits if needed */ 4625 /* enable NMI/IRQ window open exits if needed */
4626 if (vcpu->arch.nmi_pending) 4626 if (vcpu->arch.nmi_pending)
4627 kvm_x86_ops->enable_nmi_window(vcpu); 4627 kvm_x86_ops->enable_nmi_window(vcpu);
4628 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 4628 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
4629 kvm_x86_ops->enable_irq_window(vcpu); 4629 kvm_x86_ops->enable_irq_window(vcpu);
4630 4630
4631 if (kvm_lapic_enabled(vcpu)) { 4631 if (kvm_lapic_enabled(vcpu)) {
4632 update_cr8_intercept(vcpu); 4632 update_cr8_intercept(vcpu);
4633 kvm_lapic_sync_to_vapic(vcpu); 4633 kvm_lapic_sync_to_vapic(vcpu);
4634 } 4634 }
4635 4635
4636 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4636 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4637 4637
4638 kvm_guest_enter(); 4638 kvm_guest_enter();
4639 4639
4640 if (unlikely(vcpu->arch.switch_db_regs)) { 4640 if (unlikely(vcpu->arch.switch_db_regs)) {
4641 set_debugreg(0, 7); 4641 set_debugreg(0, 7);
4642 set_debugreg(vcpu->arch.eff_db[0], 0); 4642 set_debugreg(vcpu->arch.eff_db[0], 0);
4643 set_debugreg(vcpu->arch.eff_db[1], 1); 4643 set_debugreg(vcpu->arch.eff_db[1], 1);
4644 set_debugreg(vcpu->arch.eff_db[2], 2); 4644 set_debugreg(vcpu->arch.eff_db[2], 2);
4645 set_debugreg(vcpu->arch.eff_db[3], 3); 4645 set_debugreg(vcpu->arch.eff_db[3], 3);
4646 } 4646 }
4647 4647
4648 trace_kvm_entry(vcpu->vcpu_id); 4648 trace_kvm_entry(vcpu->vcpu_id);
4649 kvm_x86_ops->run(vcpu); 4649 kvm_x86_ops->run(vcpu);
4650 4650
4651 /* 4651 /*
4652 * If the guest has used debug registers, at least dr7 4652 * If the guest has used debug registers, at least dr7
4653 * will be disabled while returning to the host. 4653 * will be disabled while returning to the host.
4654 * If we don't have active breakpoints in the host, we don't 4654 * If we don't have active breakpoints in the host, we don't
4655 * care about the messed up debug address registers. But if 4655 * care about the messed up debug address registers. But if
4656 * we have some of them active, restore the old state. 4656 * we have some of them active, restore the old state.
4657 */ 4657 */
4658 if (hw_breakpoint_active()) 4658 if (hw_breakpoint_active())
4659 hw_breakpoint_restore(); 4659 hw_breakpoint_restore();
4660 4660
4661 atomic_set(&vcpu->guest_mode, 0); 4661 atomic_set(&vcpu->guest_mode, 0);
4662 smp_wmb(); 4662 smp_wmb();
4663 local_irq_enable(); 4663 local_irq_enable();
4664 4664
4665 ++vcpu->stat.exits; 4665 ++vcpu->stat.exits;
4666 4666
4667 /* 4667 /*
4668 * We must have an instruction between local_irq_enable() and 4668 * We must have an instruction between local_irq_enable() and
4669 * kvm_guest_exit(), so the timer interrupt isn't delayed by 4669 * kvm_guest_exit(), so the timer interrupt isn't delayed by
4670 * the interrupt shadow. The stat.exits increment will do nicely. 4670 * the interrupt shadow. The stat.exits increment will do nicely.
4671 * But we need to prevent reordering, hence this barrier(): 4671 * But we need to prevent reordering, hence this barrier():
4672 */ 4672 */
4673 barrier(); 4673 barrier();
4674 4674
4675 kvm_guest_exit(); 4675 kvm_guest_exit();
4676 4676
4677 preempt_enable(); 4677 preempt_enable();
4678 4678
4679 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4679 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4680 4680
4681 /* 4681 /*
4682 * Profile KVM exit RIPs: 4682 * Profile KVM exit RIPs:
4683 */ 4683 */
4684 if (unlikely(prof_on == KVM_PROFILING)) { 4684 if (unlikely(prof_on == KVM_PROFILING)) {
4685 unsigned long rip = kvm_rip_read(vcpu); 4685 unsigned long rip = kvm_rip_read(vcpu);
4686 profile_hit(KVM_PROFILING, (void *)rip); 4686 profile_hit(KVM_PROFILING, (void *)rip);
4687 } 4687 }
4688 4688
4689 4689
4690 kvm_lapic_sync_from_vapic(vcpu); 4690 kvm_lapic_sync_from_vapic(vcpu);
4691 4691
4692 r = kvm_x86_ops->handle_exit(vcpu); 4692 r = kvm_x86_ops->handle_exit(vcpu);
4693 out: 4693 out:
4694 return r; 4694 return r;
4695 } 4695 }
4696 4696
4697 4697
4698 static int __vcpu_run(struct kvm_vcpu *vcpu) 4698 static int __vcpu_run(struct kvm_vcpu *vcpu)
4699 { 4699 {
4700 int r; 4700 int r;
4701 struct kvm *kvm = vcpu->kvm; 4701 struct kvm *kvm = vcpu->kvm;
4702 4702
4703 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { 4703 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
4704 pr_debug("vcpu %d received sipi with vector # %x\n", 4704 pr_debug("vcpu %d received sipi with vector # %x\n",
4705 vcpu->vcpu_id, vcpu->arch.sipi_vector); 4705 vcpu->vcpu_id, vcpu->arch.sipi_vector);
4706 kvm_lapic_reset(vcpu); 4706 kvm_lapic_reset(vcpu);
4707 r = kvm_arch_vcpu_reset(vcpu); 4707 r = kvm_arch_vcpu_reset(vcpu);
4708 if (r) 4708 if (r)
4709 return r; 4709 return r;
4710 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4710 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4711 } 4711 }
4712 4712
4713 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4713 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4714 vapic_enter(vcpu); 4714 vapic_enter(vcpu);
4715 4715
4716 r = 1; 4716 r = 1;
4717 while (r > 0) { 4717 while (r > 0) {
4718 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 4718 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
4719 r = vcpu_enter_guest(vcpu); 4719 r = vcpu_enter_guest(vcpu);
4720 else { 4720 else {
4721 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4721 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4722 kvm_vcpu_block(vcpu); 4722 kvm_vcpu_block(vcpu);
4723 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4723 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4724 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 4724 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
4725 { 4725 {
4726 switch(vcpu->arch.mp_state) { 4726 switch(vcpu->arch.mp_state) {
4727 case KVM_MP_STATE_HALTED: 4727 case KVM_MP_STATE_HALTED:
4728 vcpu->arch.mp_state = 4728 vcpu->arch.mp_state =
4729 KVM_MP_STATE_RUNNABLE; 4729 KVM_MP_STATE_RUNNABLE;
4730 case KVM_MP_STATE_RUNNABLE: 4730 case KVM_MP_STATE_RUNNABLE:
4731 break; 4731 break;
4732 case KVM_MP_STATE_SIPI_RECEIVED: 4732 case KVM_MP_STATE_SIPI_RECEIVED:
4733 default: 4733 default:
4734 r = -EINTR; 4734 r = -EINTR;
4735 break; 4735 break;
4736 } 4736 }
4737 } 4737 }
4738 } 4738 }
4739 4739
4740 if (r <= 0) 4740 if (r <= 0)
4741 break; 4741 break;
4742 4742
4743 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); 4743 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
4744 if (kvm_cpu_has_pending_timer(vcpu)) 4744 if (kvm_cpu_has_pending_timer(vcpu))
4745 kvm_inject_pending_timer_irqs(vcpu); 4745 kvm_inject_pending_timer_irqs(vcpu);
4746 4746
4747 if (dm_request_for_irq_injection(vcpu)) { 4747 if (dm_request_for_irq_injection(vcpu)) {
4748 r = -EINTR; 4748 r = -EINTR;
4749 vcpu->run->exit_reason = KVM_EXIT_INTR; 4749 vcpu->run->exit_reason = KVM_EXIT_INTR;
4750 ++vcpu->stat.request_irq_exits; 4750 ++vcpu->stat.request_irq_exits;
4751 } 4751 }
4752 if (signal_pending(current)) { 4752 if (signal_pending(current)) {
4753 r = -EINTR; 4753 r = -EINTR;
4754 vcpu->run->exit_reason = KVM_EXIT_INTR; 4754 vcpu->run->exit_reason = KVM_EXIT_INTR;
4755 ++vcpu->stat.signal_exits; 4755 ++vcpu->stat.signal_exits;
4756 } 4756 }
4757 if (need_resched()) { 4757 if (need_resched()) {
4758 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4758 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4759 kvm_resched(vcpu); 4759 kvm_resched(vcpu);
4760 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4760 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4761 } 4761 }
4762 } 4762 }
4763 4763
4764 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4764 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4765 4765
4766 vapic_exit(vcpu); 4766 vapic_exit(vcpu);
4767 4767
4768 return r; 4768 return r;
4769 } 4769 }
4770 4770
4771 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 4771 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4772 { 4772 {
4773 int r; 4773 int r;
4774 sigset_t sigsaved; 4774 sigset_t sigsaved;
4775 4775
4776 vcpu_load(vcpu);
4777
4778 if (vcpu->sigset_active) 4776 if (vcpu->sigset_active)
4779 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 4777 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
4780 4778
4781 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 4779 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
4782 kvm_vcpu_block(vcpu); 4780 kvm_vcpu_block(vcpu);
4783 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 4781 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
4784 r = -EAGAIN; 4782 r = -EAGAIN;
4785 goto out; 4783 goto out;
4786 } 4784 }
4787 4785
4788 /* re-sync apic's tpr */ 4786 /* re-sync apic's tpr */
4789 if (!irqchip_in_kernel(vcpu->kvm)) 4787 if (!irqchip_in_kernel(vcpu->kvm))
4790 kvm_set_cr8(vcpu, kvm_run->cr8); 4788 kvm_set_cr8(vcpu, kvm_run->cr8);
4791 4789
4792 if (vcpu->arch.pio.count || vcpu->mmio_needed || 4790 if (vcpu->arch.pio.count || vcpu->mmio_needed ||
4793 vcpu->arch.emulate_ctxt.restart) { 4791 vcpu->arch.emulate_ctxt.restart) {
4794 if (vcpu->mmio_needed) { 4792 if (vcpu->mmio_needed) {
4795 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 4793 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
4796 vcpu->mmio_read_completed = 1; 4794 vcpu->mmio_read_completed = 1;
4797 vcpu->mmio_needed = 0; 4795 vcpu->mmio_needed = 0;
4798 } 4796 }
4799 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4797 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4800 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); 4798 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
4801 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4799 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4802 if (r != EMULATE_DONE) { 4800 if (r != EMULATE_DONE) {
4803 r = 0; 4801 r = 0;
4804 goto out; 4802 goto out;
4805 } 4803 }
4806 } 4804 }
4807 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) 4805 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
4808 kvm_register_write(vcpu, VCPU_REGS_RAX, 4806 kvm_register_write(vcpu, VCPU_REGS_RAX,
4809 kvm_run->hypercall.ret); 4807 kvm_run->hypercall.ret);
4810 4808
4811 r = __vcpu_run(vcpu); 4809 r = __vcpu_run(vcpu);
4812 4810
4813 out: 4811 out:
4814 post_kvm_run_save(vcpu); 4812 post_kvm_run_save(vcpu);
4815 if (vcpu->sigset_active) 4813 if (vcpu->sigset_active)
4816 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 4814 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
4817 4815
4818 vcpu_put(vcpu);
4819 return r; 4816 return r;
4820 } 4817 }
4821 4818
4822 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4819 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4823 { 4820 {
4824 vcpu_load(vcpu);
4825
4826 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4821 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4827 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); 4822 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
4828 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); 4823 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4829 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX); 4824 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
4830 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI); 4825 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
4831 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI); 4826 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
4832 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); 4827 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
4833 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP); 4828 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
4834 #ifdef CONFIG_X86_64 4829 #ifdef CONFIG_X86_64
4835 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8); 4830 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
4836 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9); 4831 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
4837 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10); 4832 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
4838 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11); 4833 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
4839 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12); 4834 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
4840 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13); 4835 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
4841 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14); 4836 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
4842 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15); 4837 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
4843 #endif 4838 #endif
4844 4839
4845 regs->rip = kvm_rip_read(vcpu); 4840 regs->rip = kvm_rip_read(vcpu);
4846 regs->rflags = kvm_get_rflags(vcpu); 4841 regs->rflags = kvm_get_rflags(vcpu);
4847 4842
4848 vcpu_put(vcpu);
4849
4850 return 0; 4843 return 0;
4851 } 4844 }
4852 4845
4853 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4846 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4854 { 4847 {
4855 vcpu_load(vcpu);
4856
4857 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); 4848 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
4858 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); 4849 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
4859 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); 4850 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
4860 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx); 4851 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
4861 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi); 4852 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
4862 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi); 4853 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
4863 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); 4854 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
4864 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp); 4855 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
4865 #ifdef CONFIG_X86_64 4856 #ifdef CONFIG_X86_64
4866 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8); 4857 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
4867 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9); 4858 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
4868 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10); 4859 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
4869 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11); 4860 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
4870 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12); 4861 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
4871 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); 4862 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
4872 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); 4863 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
4873 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); 4864 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
4874 #endif 4865 #endif
4875 4866
4876 kvm_rip_write(vcpu, regs->rip); 4867 kvm_rip_write(vcpu, regs->rip);
4877 kvm_set_rflags(vcpu, regs->rflags); 4868 kvm_set_rflags(vcpu, regs->rflags);
4878 4869
4879 vcpu->arch.exception.pending = false; 4870 vcpu->arch.exception.pending = false;
4880 4871
4881 vcpu_put(vcpu);
4882
4883 return 0; 4872 return 0;
4884 } 4873 }
4885 4874
4886 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 4875 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
4887 { 4876 {
4888 struct kvm_segment cs; 4877 struct kvm_segment cs;
4889 4878
4890 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); 4879 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
4891 *db = cs.db; 4880 *db = cs.db;
4892 *l = cs.l; 4881 *l = cs.l;
4893 } 4882 }
4894 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); 4883 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
4895 4884
4896 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4885 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4897 struct kvm_sregs *sregs) 4886 struct kvm_sregs *sregs)
4898 { 4887 {
4899 struct desc_ptr dt; 4888 struct desc_ptr dt;
4900 4889
4901 vcpu_load(vcpu);
4902
4903 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 4890 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
4904 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 4891 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
4905 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); 4892 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
4906 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 4893 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
4907 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 4894 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
4908 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 4895 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
4909 4896
4910 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 4897 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
4911 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 4898 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
4912 4899
4913 kvm_x86_ops->get_idt(vcpu, &dt); 4900 kvm_x86_ops->get_idt(vcpu, &dt);
4914 sregs->idt.limit = dt.size; 4901 sregs->idt.limit = dt.size;
4915 sregs->idt.base = dt.address; 4902 sregs->idt.base = dt.address;
4916 kvm_x86_ops->get_gdt(vcpu, &dt); 4903 kvm_x86_ops->get_gdt(vcpu, &dt);
4917 sregs->gdt.limit = dt.size; 4904 sregs->gdt.limit = dt.size;
4918 sregs->gdt.base = dt.address; 4905 sregs->gdt.base = dt.address;
4919 4906
4920 sregs->cr0 = kvm_read_cr0(vcpu); 4907 sregs->cr0 = kvm_read_cr0(vcpu);
4921 sregs->cr2 = vcpu->arch.cr2; 4908 sregs->cr2 = vcpu->arch.cr2;
4922 sregs->cr3 = vcpu->arch.cr3; 4909 sregs->cr3 = vcpu->arch.cr3;
4923 sregs->cr4 = kvm_read_cr4(vcpu); 4910 sregs->cr4 = kvm_read_cr4(vcpu);
4924 sregs->cr8 = kvm_get_cr8(vcpu); 4911 sregs->cr8 = kvm_get_cr8(vcpu);
4925 sregs->efer = vcpu->arch.efer; 4912 sregs->efer = vcpu->arch.efer;
4926 sregs->apic_base = kvm_get_apic_base(vcpu); 4913 sregs->apic_base = kvm_get_apic_base(vcpu);
4927 4914
4928 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); 4915 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
4929 4916
4930 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) 4917 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
4931 set_bit(vcpu->arch.interrupt.nr, 4918 set_bit(vcpu->arch.interrupt.nr,
4932 (unsigned long *)sregs->interrupt_bitmap); 4919 (unsigned long *)sregs->interrupt_bitmap);
4933 4920
4934 vcpu_put(vcpu);
4935
4936 return 0; 4921 return 0;
4937 } 4922 }
4938 4923
4939 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 4924 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4940 struct kvm_mp_state *mp_state) 4925 struct kvm_mp_state *mp_state)
4941 { 4926 {
4942 vcpu_load(vcpu);
4943 mp_state->mp_state = vcpu->arch.mp_state; 4927 mp_state->mp_state = vcpu->arch.mp_state;
4944 vcpu_put(vcpu);
4945 return 0; 4928 return 0;
4946 } 4929 }
4947 4930
4948 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 4931 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4949 struct kvm_mp_state *mp_state) 4932 struct kvm_mp_state *mp_state)
4950 { 4933 {
4951 vcpu_load(vcpu);
4952 vcpu->arch.mp_state = mp_state->mp_state; 4934 vcpu->arch.mp_state = mp_state->mp_state;
4953 vcpu_put(vcpu);
4954 return 0; 4935 return 0;
4955 } 4936 }
4956 4937
4957 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 4938 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
4958 bool has_error_code, u32 error_code) 4939 bool has_error_code, u32 error_code)
4959 { 4940 {
4960 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4941 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4961 int cs_db, cs_l, ret; 4942 int cs_db, cs_l, ret;
4962 cache_all_regs(vcpu); 4943 cache_all_regs(vcpu);
4963 4944
4964 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 4945 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4965 4946
4966 vcpu->arch.emulate_ctxt.vcpu = vcpu; 4947 vcpu->arch.emulate_ctxt.vcpu = vcpu;
4967 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); 4948 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
4968 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); 4949 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
4969 vcpu->arch.emulate_ctxt.mode = 4950 vcpu->arch.emulate_ctxt.mode =
4970 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 4951 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4971 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 4952 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
4972 ? X86EMUL_MODE_VM86 : cs_l 4953 ? X86EMUL_MODE_VM86 : cs_l
4973 ? X86EMUL_MODE_PROT64 : cs_db 4954 ? X86EMUL_MODE_PROT64 : cs_db
4974 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 4955 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
4975 memset(c, 0, sizeof(struct decode_cache)); 4956 memset(c, 0, sizeof(struct decode_cache));
4976 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 4957 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4977 4958
4978 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, 4959 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,
4979 tss_selector, reason, has_error_code, 4960 tss_selector, reason, has_error_code,
4980 error_code); 4961 error_code);
4981 4962
4982 if (ret) 4963 if (ret)
4983 return EMULATE_FAIL; 4964 return EMULATE_FAIL;
4984 4965
4985 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 4966 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
4986 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 4967 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
4987 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4968 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4988 return EMULATE_DONE; 4969 return EMULATE_DONE;
4989 } 4970 }
4990 EXPORT_SYMBOL_GPL(kvm_task_switch); 4971 EXPORT_SYMBOL_GPL(kvm_task_switch);
4991 4972
4992 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 4973 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4993 struct kvm_sregs *sregs) 4974 struct kvm_sregs *sregs)
4994 { 4975 {
4995 int mmu_reset_needed = 0; 4976 int mmu_reset_needed = 0;
4996 int pending_vec, max_bits; 4977 int pending_vec, max_bits;
4997 struct desc_ptr dt; 4978 struct desc_ptr dt;
4998 4979
4999 vcpu_load(vcpu);
5000
5001 dt.size = sregs->idt.limit; 4980 dt.size = sregs->idt.limit;
5002 dt.address = sregs->idt.base; 4981 dt.address = sregs->idt.base;
5003 kvm_x86_ops->set_idt(vcpu, &dt); 4982 kvm_x86_ops->set_idt(vcpu, &dt);
5004 dt.size = sregs->gdt.limit; 4983 dt.size = sregs->gdt.limit;
5005 dt.address = sregs->gdt.base; 4984 dt.address = sregs->gdt.base;
5006 kvm_x86_ops->set_gdt(vcpu, &dt); 4985 kvm_x86_ops->set_gdt(vcpu, &dt);
5007 4986
5008 vcpu->arch.cr2 = sregs->cr2; 4987 vcpu->arch.cr2 = sregs->cr2;
5009 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; 4988 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
5010 vcpu->arch.cr3 = sregs->cr3; 4989 vcpu->arch.cr3 = sregs->cr3;
5011 4990
5012 kvm_set_cr8(vcpu, sregs->cr8); 4991 kvm_set_cr8(vcpu, sregs->cr8);
5013 4992
5014 mmu_reset_needed |= vcpu->arch.efer != sregs->efer; 4993 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
5015 kvm_x86_ops->set_efer(vcpu, sregs->efer); 4994 kvm_x86_ops->set_efer(vcpu, sregs->efer);
5016 kvm_set_apic_base(vcpu, sregs->apic_base); 4995 kvm_set_apic_base(vcpu, sregs->apic_base);
5017 4996
5018 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; 4997 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
5019 kvm_x86_ops->set_cr0(vcpu, sregs->cr0); 4998 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
5020 vcpu->arch.cr0 = sregs->cr0; 4999 vcpu->arch.cr0 = sregs->cr0;
5021 5000
5022 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; 5001 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
5023 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5002 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5024 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5003 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5025 load_pdptrs(vcpu, vcpu->arch.cr3); 5004 load_pdptrs(vcpu, vcpu->arch.cr3);
5026 mmu_reset_needed = 1; 5005 mmu_reset_needed = 1;
5027 } 5006 }
5028 5007
5029 if (mmu_reset_needed) 5008 if (mmu_reset_needed)
5030 kvm_mmu_reset_context(vcpu); 5009 kvm_mmu_reset_context(vcpu);
5031 5010
5032 max_bits = (sizeof sregs->interrupt_bitmap) << 3; 5011 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
5033 pending_vec = find_first_bit( 5012 pending_vec = find_first_bit(
5034 (const unsigned long *)sregs->interrupt_bitmap, max_bits); 5013 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
5035 if (pending_vec < max_bits) { 5014 if (pending_vec < max_bits) {
5036 kvm_queue_interrupt(vcpu, pending_vec, false); 5015 kvm_queue_interrupt(vcpu, pending_vec, false);
5037 pr_debug("Set back pending irq %d\n", pending_vec); 5016 pr_debug("Set back pending irq %d\n", pending_vec);
5038 if (irqchip_in_kernel(vcpu->kvm)) 5017 if (irqchip_in_kernel(vcpu->kvm))
5039 kvm_pic_clear_isr_ack(vcpu->kvm); 5018 kvm_pic_clear_isr_ack(vcpu->kvm);
5040 } 5019 }
5041 5020
5042 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 5021 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
5043 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 5022 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
5044 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES); 5023 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
5045 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 5024 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
5046 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 5025 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
5047 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 5026 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
5048 5027
5049 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 5028 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
5050 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 5029 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
5051 5030
5052 update_cr8_intercept(vcpu); 5031 update_cr8_intercept(vcpu);
5053 5032
5054 /* Older userspace won't unhalt the vcpu on reset. */ 5033 /* Older userspace won't unhalt the vcpu on reset. */
5055 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && 5034 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
5056 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && 5035 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
5057 !is_protmode(vcpu)) 5036 !is_protmode(vcpu))
5058 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5037 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5059 5038
5060 vcpu_put(vcpu);
5061
5062 return 0; 5039 return 0;
5063 } 5040 }
5064 5041
5065 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 5042 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5066 struct kvm_guest_debug *dbg) 5043 struct kvm_guest_debug *dbg)
5067 { 5044 {
5068 unsigned long rflags; 5045 unsigned long rflags;
5069 int i, r; 5046 int i, r;
5070 5047
5071 vcpu_load(vcpu);
5072
5073 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { 5048 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
5074 r = -EBUSY; 5049 r = -EBUSY;
5075 if (vcpu->arch.exception.pending) 5050 if (vcpu->arch.exception.pending)
5076 goto unlock_out; 5051 goto out;
5077 if (dbg->control & KVM_GUESTDBG_INJECT_DB) 5052 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
5078 kvm_queue_exception(vcpu, DB_VECTOR); 5053 kvm_queue_exception(vcpu, DB_VECTOR);
5079 else 5054 else
5080 kvm_queue_exception(vcpu, BP_VECTOR); 5055 kvm_queue_exception(vcpu, BP_VECTOR);
5081 } 5056 }
5082 5057
5083 /* 5058 /*
5084 * Read rflags as long as potentially injected trace flags are still 5059 * Read rflags as long as potentially injected trace flags are still
5085 * filtered out. 5060 * filtered out.
5086 */ 5061 */
5087 rflags = kvm_get_rflags(vcpu); 5062 rflags = kvm_get_rflags(vcpu);
5088 5063
5089 vcpu->guest_debug = dbg->control; 5064 vcpu->guest_debug = dbg->control;
5090 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) 5065 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
5091 vcpu->guest_debug = 0; 5066 vcpu->guest_debug = 0;
5092 5067
5093 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { 5068 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5094 for (i = 0; i < KVM_NR_DB_REGS; ++i) 5069 for (i = 0; i < KVM_NR_DB_REGS; ++i)
5095 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; 5070 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
5096 vcpu->arch.switch_db_regs = 5071 vcpu->arch.switch_db_regs =
5097 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK); 5072 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
5098 } else { 5073 } else {
5099 for (i = 0; i < KVM_NR_DB_REGS; i++) 5074 for (i = 0; i < KVM_NR_DB_REGS; i++)
5100 vcpu->arch.eff_db[i] = vcpu->arch.db[i]; 5075 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
5101 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); 5076 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
5102 } 5077 }
5103 5078
5104 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 5079 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5105 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + 5080 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
5106 get_segment_base(vcpu, VCPU_SREG_CS); 5081 get_segment_base(vcpu, VCPU_SREG_CS);
5107 5082
5108 /* 5083 /*
5109 * Trigger an rflags update that will inject or remove the trace 5084 * Trigger an rflags update that will inject or remove the trace
5110 * flags. 5085 * flags.
5111 */ 5086 */
5112 kvm_set_rflags(vcpu, rflags); 5087 kvm_set_rflags(vcpu, rflags);
5113 5088
5114 kvm_x86_ops->set_guest_debug(vcpu, dbg); 5089 kvm_x86_ops->set_guest_debug(vcpu, dbg);
5115 5090
5116 r = 0; 5091 r = 0;
5117 5092
5118 unlock_out: 5093 out:
5119 vcpu_put(vcpu);
5120 5094
5121 return r; 5095 return r;
5122 } 5096 }
5123 5097
5124 /* 5098 /*
5125 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when 5099 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when
5126 * we have asm/x86/processor.h 5100 * we have asm/x86/processor.h
5127 */ 5101 */
5128 struct fxsave { 5102 struct fxsave {
5129 u16 cwd; 5103 u16 cwd;
5130 u16 swd; 5104 u16 swd;
5131 u16 twd; 5105 u16 twd;
5132 u16 fop; 5106 u16 fop;
5133 u64 rip; 5107 u64 rip;
5134 u64 rdp; 5108 u64 rdp;
5135 u32 mxcsr; 5109 u32 mxcsr;
5136 u32 mxcsr_mask; 5110 u32 mxcsr_mask;
5137 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ 5111 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
5138 #ifdef CONFIG_X86_64 5112 #ifdef CONFIG_X86_64
5139 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ 5113 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
5140 #else 5114 #else
5141 u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ 5115 u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
5142 #endif 5116 #endif
5143 }; 5117 };
5144 5118
5145 /* 5119 /*
5146 * Translate a guest virtual address to a guest physical address. 5120 * Translate a guest virtual address to a guest physical address.
5147 */ 5121 */
5148 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 5122 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
5149 struct kvm_translation *tr) 5123 struct kvm_translation *tr)
5150 { 5124 {
5151 unsigned long vaddr = tr->linear_address; 5125 unsigned long vaddr = tr->linear_address;
5152 gpa_t gpa; 5126 gpa_t gpa;
5153 int idx; 5127 int idx;
5154 5128
5155 vcpu_load(vcpu);
5156 idx = srcu_read_lock(&vcpu->kvm->srcu); 5129 idx = srcu_read_lock(&vcpu->kvm->srcu);
5157 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); 5130 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
5158 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5131 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5159 tr->physical_address = gpa; 5132 tr->physical_address = gpa;
5160 tr->valid = gpa != UNMAPPED_GVA; 5133 tr->valid = gpa != UNMAPPED_GVA;
5161 tr->writeable = 1; 5134 tr->writeable = 1;
5162 tr->usermode = 0; 5135 tr->usermode = 0;
5163 vcpu_put(vcpu);
5164 5136
5165 return 0; 5137 return 0;
5166 } 5138 }
5167 5139
5168 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5140 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5169 { 5141 {
5170 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; 5142 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
5171 5143
5172 vcpu_load(vcpu);
5173
5174 memcpy(fpu->fpr, fxsave->st_space, 128); 5144 memcpy(fpu->fpr, fxsave->st_space, 128);
5175 fpu->fcw = fxsave->cwd; 5145 fpu->fcw = fxsave->cwd;
5176 fpu->fsw = fxsave->swd; 5146 fpu->fsw = fxsave->swd;
5177 fpu->ftwx = fxsave->twd; 5147 fpu->ftwx = fxsave->twd;
5178 fpu->last_opcode = fxsave->fop; 5148 fpu->last_opcode = fxsave->fop;
5179 fpu->last_ip = fxsave->rip; 5149 fpu->last_ip = fxsave->rip;
5180 fpu->last_dp = fxsave->rdp; 5150 fpu->last_dp = fxsave->rdp;
5181 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); 5151 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
5182 5152
5183 vcpu_put(vcpu);
5184
5185 return 0; 5153 return 0;
5186 } 5154 }
5187 5155
5188 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5156 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5189 { 5157 {
5190 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; 5158 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
5191 5159
5192 vcpu_load(vcpu);
5193
5194 memcpy(fxsave->st_space, fpu->fpr, 128); 5160 memcpy(fxsave->st_space, fpu->fpr, 128);
5195 fxsave->cwd = fpu->fcw; 5161 fxsave->cwd = fpu->fcw;
5196 fxsave->swd = fpu->fsw; 5162 fxsave->swd = fpu->fsw;
5197 fxsave->twd = fpu->ftwx; 5163 fxsave->twd = fpu->ftwx;
5198 fxsave->fop = fpu->last_opcode; 5164 fxsave->fop = fpu->last_opcode;
5199 fxsave->rip = fpu->last_ip; 5165 fxsave->rip = fpu->last_ip;
5200 fxsave->rdp = fpu->last_dp; 5166 fxsave->rdp = fpu->last_dp;
5201 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); 5167 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
5202
5203 vcpu_put(vcpu);
5204 5168
5205 return 0; 5169 return 0;
5206 } 5170 }
5207 5171
5208 void fx_init(struct kvm_vcpu *vcpu) 5172 void fx_init(struct kvm_vcpu *vcpu)
5209 { 5173 {
5210 unsigned after_mxcsr_mask; 5174 unsigned after_mxcsr_mask;
5211 5175
5212 /* 5176 /*
5213 * Touch the fpu the first time in non atomic context as if 5177 * Touch the fpu the first time in non atomic context as if
5214 * this is the first fpu instruction the exception handler 5178 * this is the first fpu instruction the exception handler
5215 * will fire before the instruction returns and it'll have to 5179 * will fire before the instruction returns and it'll have to
5216 * allocate ram with GFP_KERNEL. 5180 * allocate ram with GFP_KERNEL.
5217 */ 5181 */
5218 if (!used_math()) 5182 if (!used_math())
5219 kvm_fx_save(&vcpu->arch.host_fx_image); 5183 kvm_fx_save(&vcpu->arch.host_fx_image);
5220 5184
5221 /* Initialize guest FPU by resetting ours and saving into guest's */ 5185 /* Initialize guest FPU by resetting ours and saving into guest's */
5222 preempt_disable(); 5186 preempt_disable();
5223 kvm_fx_save(&vcpu->arch.host_fx_image); 5187 kvm_fx_save(&vcpu->arch.host_fx_image);
5224 kvm_fx_finit(); 5188 kvm_fx_finit();
5225 kvm_fx_save(&vcpu->arch.guest_fx_image); 5189 kvm_fx_save(&vcpu->arch.guest_fx_image);
5226 kvm_fx_restore(&vcpu->arch.host_fx_image); 5190 kvm_fx_restore(&vcpu->arch.host_fx_image);
5227 preempt_enable(); 5191 preempt_enable();
5228 5192
5229 vcpu->arch.cr0 |= X86_CR0_ET; 5193 vcpu->arch.cr0 |= X86_CR0_ET;
5230 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); 5194 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
5231 vcpu->arch.guest_fx_image.mxcsr = 0x1f80; 5195 vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
5232 memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask, 5196 memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
5233 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); 5197 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
5234 } 5198 }
5235 EXPORT_SYMBOL_GPL(fx_init); 5199 EXPORT_SYMBOL_GPL(fx_init);
5236 5200
5237 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5201 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
5238 { 5202 {
5239 if (vcpu->guest_fpu_loaded) 5203 if (vcpu->guest_fpu_loaded)
5240 return; 5204 return;
5241 5205
5242 vcpu->guest_fpu_loaded = 1; 5206 vcpu->guest_fpu_loaded = 1;
5243 kvm_fx_save(&vcpu->arch.host_fx_image); 5207 kvm_fx_save(&vcpu->arch.host_fx_image);
5244 kvm_fx_restore(&vcpu->arch.guest_fx_image); 5208 kvm_fx_restore(&vcpu->arch.guest_fx_image);
5245 trace_kvm_fpu(1); 5209 trace_kvm_fpu(1);
5246 } 5210 }
5247 5211
5248 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5212 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
5249 { 5213 {
5250 if (!vcpu->guest_fpu_loaded) 5214 if (!vcpu->guest_fpu_loaded)
5251 return; 5215 return;
5252 5216
5253 vcpu->guest_fpu_loaded = 0; 5217 vcpu->guest_fpu_loaded = 0;
5254 kvm_fx_save(&vcpu->arch.guest_fx_image); 5218 kvm_fx_save(&vcpu->arch.guest_fx_image);
5255 kvm_fx_restore(&vcpu->arch.host_fx_image); 5219 kvm_fx_restore(&vcpu->arch.host_fx_image);
5256 ++vcpu->stat.fpu_reload; 5220 ++vcpu->stat.fpu_reload;
5257 set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); 5221 set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
5258 trace_kvm_fpu(0); 5222 trace_kvm_fpu(0);
5259 } 5223 }
5260 5224
5261 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 5225 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
5262 { 5226 {
5263 if (vcpu->arch.time_page) { 5227 if (vcpu->arch.time_page) {
5264 kvm_release_page_dirty(vcpu->arch.time_page); 5228 kvm_release_page_dirty(vcpu->arch.time_page);
5265 vcpu->arch.time_page = NULL; 5229 vcpu->arch.time_page = NULL;
5266 } 5230 }
5267 5231
5268 kvm_x86_ops->vcpu_free(vcpu); 5232 kvm_x86_ops->vcpu_free(vcpu);
5269 } 5233 }
5270 5234
5271 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 5235 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
5272 unsigned int id) 5236 unsigned int id)
5273 { 5237 {
5274 return kvm_x86_ops->vcpu_create(kvm, id); 5238 return kvm_x86_ops->vcpu_create(kvm, id);
5275 } 5239 }
5276 5240
5277 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 5241 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
5278 { 5242 {
5279 int r; 5243 int r;
5280 5244
5281 /* We do fxsave: this must be aligned. */ 5245 /* We do fxsave: this must be aligned. */
5282 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); 5246 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
5283 5247
5284 vcpu->arch.mtrr_state.have_fixed = 1; 5248 vcpu->arch.mtrr_state.have_fixed = 1;
5285 vcpu_load(vcpu); 5249 vcpu_load(vcpu);
5286 r = kvm_arch_vcpu_reset(vcpu); 5250 r = kvm_arch_vcpu_reset(vcpu);
5287 if (r == 0) 5251 if (r == 0)
5288 r = kvm_mmu_setup(vcpu); 5252 r = kvm_mmu_setup(vcpu);
5289 vcpu_put(vcpu); 5253 vcpu_put(vcpu);
5290 if (r < 0) 5254 if (r < 0)
5291 goto free_vcpu; 5255 goto free_vcpu;
5292 5256
5293 return 0; 5257 return 0;
5294 free_vcpu: 5258 free_vcpu:
5295 kvm_x86_ops->vcpu_free(vcpu); 5259 kvm_x86_ops->vcpu_free(vcpu);
5296 return r; 5260 return r;
5297 } 5261 }
5298 5262
5299 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 5263 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
5300 { 5264 {
5301 vcpu_load(vcpu); 5265 vcpu_load(vcpu);
5302 kvm_mmu_unload(vcpu); 5266 kvm_mmu_unload(vcpu);
5303 vcpu_put(vcpu); 5267 vcpu_put(vcpu);
5304 5268
5305 kvm_x86_ops->vcpu_free(vcpu); 5269 kvm_x86_ops->vcpu_free(vcpu);
5306 } 5270 }
5307 5271
5308 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 5272 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
5309 { 5273 {
5310 vcpu->arch.nmi_pending = false; 5274 vcpu->arch.nmi_pending = false;
5311 vcpu->arch.nmi_injected = false; 5275 vcpu->arch.nmi_injected = false;
5312 5276
5313 vcpu->arch.switch_db_regs = 0; 5277 vcpu->arch.switch_db_regs = 0;
5314 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); 5278 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
5315 vcpu->arch.dr6 = DR6_FIXED_1; 5279 vcpu->arch.dr6 = DR6_FIXED_1;
5316 vcpu->arch.dr7 = DR7_FIXED_1; 5280 vcpu->arch.dr7 = DR7_FIXED_1;
5317 5281
5318 return kvm_x86_ops->vcpu_reset(vcpu); 5282 return kvm_x86_ops->vcpu_reset(vcpu);
5319 } 5283 }
5320 5284
5321 int kvm_arch_hardware_enable(void *garbage) 5285 int kvm_arch_hardware_enable(void *garbage)
5322 { 5286 {
5323 /* 5287 /*
5324 * Since this may be called from a hotplug notifcation, 5288 * Since this may be called from a hotplug notifcation,
5325 * we can't get the CPU frequency directly. 5289 * we can't get the CPU frequency directly.
5326 */ 5290 */
5327 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { 5291 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5328 int cpu = raw_smp_processor_id(); 5292 int cpu = raw_smp_processor_id();
5329 per_cpu(cpu_tsc_khz, cpu) = 0; 5293 per_cpu(cpu_tsc_khz, cpu) = 0;
5330 } 5294 }
5331 5295
5332 kvm_shared_msr_cpu_online(); 5296 kvm_shared_msr_cpu_online();
5333 5297
5334 return kvm_x86_ops->hardware_enable(garbage); 5298 return kvm_x86_ops->hardware_enable(garbage);
5335 } 5299 }
5336 5300
5337 void kvm_arch_hardware_disable(void *garbage) 5301 void kvm_arch_hardware_disable(void *garbage)
5338 { 5302 {
5339 kvm_x86_ops->hardware_disable(garbage); 5303 kvm_x86_ops->hardware_disable(garbage);
5340 drop_user_return_notifiers(garbage); 5304 drop_user_return_notifiers(garbage);
5341 } 5305 }
5342 5306
5343 int kvm_arch_hardware_setup(void) 5307 int kvm_arch_hardware_setup(void)
5344 { 5308 {
5345 return kvm_x86_ops->hardware_setup(); 5309 return kvm_x86_ops->hardware_setup();
5346 } 5310 }
5347 5311
5348 void kvm_arch_hardware_unsetup(void) 5312 void kvm_arch_hardware_unsetup(void)
5349 { 5313 {
5350 kvm_x86_ops->hardware_unsetup(); 5314 kvm_x86_ops->hardware_unsetup();
5351 } 5315 }
5352 5316
5353 void kvm_arch_check_processor_compat(void *rtn) 5317 void kvm_arch_check_processor_compat(void *rtn)
5354 { 5318 {
5355 kvm_x86_ops->check_processor_compatibility(rtn); 5319 kvm_x86_ops->check_processor_compatibility(rtn);
5356 } 5320 }
5357 5321
5358 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 5322 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5359 { 5323 {
5360 struct page *page; 5324 struct page *page;
5361 struct kvm *kvm; 5325 struct kvm *kvm;
5362 int r; 5326 int r;
5363 5327
5364 BUG_ON(vcpu->kvm == NULL); 5328 BUG_ON(vcpu->kvm == NULL);
5365 kvm = vcpu->kvm; 5329 kvm = vcpu->kvm;
5366 5330
5367 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 5331 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
5368 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) 5332 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
5369 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5333 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5370 else 5334 else
5371 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; 5335 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
5372 5336
5373 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 5337 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
5374 if (!page) { 5338 if (!page) {
5375 r = -ENOMEM; 5339 r = -ENOMEM;
5376 goto fail; 5340 goto fail;
5377 } 5341 }
5378 vcpu->arch.pio_data = page_address(page); 5342 vcpu->arch.pio_data = page_address(page);
5379 5343
5380 r = kvm_mmu_create(vcpu); 5344 r = kvm_mmu_create(vcpu);
5381 if (r < 0) 5345 if (r < 0)
5382 goto fail_free_pio_data; 5346 goto fail_free_pio_data;
5383 5347
5384 if (irqchip_in_kernel(kvm)) { 5348 if (irqchip_in_kernel(kvm)) {
5385 r = kvm_create_lapic(vcpu); 5349 r = kvm_create_lapic(vcpu);
5386 if (r < 0) 5350 if (r < 0)
5387 goto fail_mmu_destroy; 5351 goto fail_mmu_destroy;
5388 } 5352 }
5389 5353
5390 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, 5354 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
5391 GFP_KERNEL); 5355 GFP_KERNEL);
5392 if (!vcpu->arch.mce_banks) { 5356 if (!vcpu->arch.mce_banks) {
5393 r = -ENOMEM; 5357 r = -ENOMEM;
5394 goto fail_free_lapic; 5358 goto fail_free_lapic;
5395 } 5359 }
5396 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 5360 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
5397 5361
5398 return 0; 5362 return 0;
5399 fail_free_lapic: 5363 fail_free_lapic:
5400 kvm_free_lapic(vcpu); 5364 kvm_free_lapic(vcpu);
5401 fail_mmu_destroy: 5365 fail_mmu_destroy:
5402 kvm_mmu_destroy(vcpu); 5366 kvm_mmu_destroy(vcpu);
5403 fail_free_pio_data: 5367 fail_free_pio_data:
5404 free_page((unsigned long)vcpu->arch.pio_data); 5368 free_page((unsigned long)vcpu->arch.pio_data);
5405 fail: 5369 fail:
5406 return r; 5370 return r;
5407 } 5371 }
5408 5372
5409 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5373 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5410 { 5374 {
5411 int idx; 5375 int idx;
5412 5376
5413 kfree(vcpu->arch.mce_banks); 5377 kfree(vcpu->arch.mce_banks);
5414 kvm_free_lapic(vcpu); 5378 kvm_free_lapic(vcpu);
5415 idx = srcu_read_lock(&vcpu->kvm->srcu); 5379 idx = srcu_read_lock(&vcpu->kvm->srcu);
5416 kvm_mmu_destroy(vcpu); 5380 kvm_mmu_destroy(vcpu);
5417 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5381 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5418 free_page((unsigned long)vcpu->arch.pio_data); 5382 free_page((unsigned long)vcpu->arch.pio_data);
5419 } 5383 }
5420 5384
5421 struct kvm *kvm_arch_create_vm(void) 5385 struct kvm *kvm_arch_create_vm(void)
5422 { 5386 {
5423 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); 5387 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
5424 5388
5425 if (!kvm) 5389 if (!kvm)
5426 return ERR_PTR(-ENOMEM); 5390 return ERR_PTR(-ENOMEM);
5427 5391
5428 kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 5392 kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
5429 if (!kvm->arch.aliases) { 5393 if (!kvm->arch.aliases) {
5430 kfree(kvm); 5394 kfree(kvm);
5431 return ERR_PTR(-ENOMEM); 5395 return ERR_PTR(-ENOMEM);
5432 } 5396 }
5433 5397
5434 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5398 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
5435 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 5399 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
5436 5400
5437 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 5401 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
5438 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 5402 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
5439 5403
5440 rdtscll(kvm->arch.vm_init_tsc); 5404 rdtscll(kvm->arch.vm_init_tsc);
5441 5405
5442 return kvm; 5406 return kvm;
5443 } 5407 }
5444 5408
5445 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) 5409 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
5446 { 5410 {
5447 vcpu_load(vcpu); 5411 vcpu_load(vcpu);
5448 kvm_mmu_unload(vcpu); 5412 kvm_mmu_unload(vcpu);
5449 vcpu_put(vcpu); 5413 vcpu_put(vcpu);
5450 } 5414 }
5451 5415
5452 static void kvm_free_vcpus(struct kvm *kvm) 5416 static void kvm_free_vcpus(struct kvm *kvm)
5453 { 5417 {
5454 unsigned int i; 5418 unsigned int i;
5455 struct kvm_vcpu *vcpu; 5419 struct kvm_vcpu *vcpu;
5456 5420
5457 /* 5421 /*
5458 * Unpin any mmu pages first. 5422 * Unpin any mmu pages first.
5459 */ 5423 */
5460 kvm_for_each_vcpu(i, vcpu, kvm) 5424 kvm_for_each_vcpu(i, vcpu, kvm)
5461 kvm_unload_vcpu_mmu(vcpu); 5425 kvm_unload_vcpu_mmu(vcpu);
5462 kvm_for_each_vcpu(i, vcpu, kvm) 5426 kvm_for_each_vcpu(i, vcpu, kvm)
5463 kvm_arch_vcpu_free(vcpu); 5427 kvm_arch_vcpu_free(vcpu);
5464 5428
5465 mutex_lock(&kvm->lock); 5429 mutex_lock(&kvm->lock);
5466 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 5430 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
5467 kvm->vcpus[i] = NULL; 5431 kvm->vcpus[i] = NULL;
5468 5432
5469 atomic_set(&kvm->online_vcpus, 0); 5433 atomic_set(&kvm->online_vcpus, 0);
5470 mutex_unlock(&kvm->lock); 5434 mutex_unlock(&kvm->lock);
5471 } 5435 }
5472 5436
5473 void kvm_arch_sync_events(struct kvm *kvm) 5437 void kvm_arch_sync_events(struct kvm *kvm)
5474 { 5438 {
5475 kvm_free_all_assigned_devices(kvm); 5439 kvm_free_all_assigned_devices(kvm);
5476 } 5440 }
5477 5441
5478 void kvm_arch_destroy_vm(struct kvm *kvm) 5442 void kvm_arch_destroy_vm(struct kvm *kvm)
5479 { 5443 {
5480 kvm_iommu_unmap_guest(kvm); 5444 kvm_iommu_unmap_guest(kvm);
5481 kvm_free_pit(kvm); 5445 kvm_free_pit(kvm);
5482 kfree(kvm->arch.vpic); 5446 kfree(kvm->arch.vpic);
5483 kfree(kvm->arch.vioapic); 5447 kfree(kvm->arch.vioapic);
5484 kvm_free_vcpus(kvm); 5448 kvm_free_vcpus(kvm);
5485 kvm_free_physmem(kvm); 5449 kvm_free_physmem(kvm);
5486 if (kvm->arch.apic_access_page) 5450 if (kvm->arch.apic_access_page)
5487 put_page(kvm->arch.apic_access_page); 5451 put_page(kvm->arch.apic_access_page);
5488 if (kvm->arch.ept_identity_pagetable) 5452 if (kvm->arch.ept_identity_pagetable)
5489 put_page(kvm->arch.ept_identity_pagetable); 5453 put_page(kvm->arch.ept_identity_pagetable);
5490 cleanup_srcu_struct(&kvm->srcu); 5454 cleanup_srcu_struct(&kvm->srcu);
5491 kfree(kvm->arch.aliases); 5455 kfree(kvm->arch.aliases);
5492 kfree(kvm); 5456 kfree(kvm);
5493 } 5457 }
5494 5458
5495 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5459 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5496 struct kvm_memory_slot *memslot, 5460 struct kvm_memory_slot *memslot,
5497 struct kvm_memory_slot old, 5461 struct kvm_memory_slot old,
5498 struct kvm_userspace_memory_region *mem, 5462 struct kvm_userspace_memory_region *mem,
5499 int user_alloc) 5463 int user_alloc)
5500 { 5464 {
5501 int npages = memslot->npages; 5465 int npages = memslot->npages;
5502 5466
5503 /*To keep backward compatibility with older userspace, 5467 /*To keep backward compatibility with older userspace,
5504 *x86 needs to hanlde !user_alloc case. 5468 *x86 needs to hanlde !user_alloc case.
5505 */ 5469 */
5506 if (!user_alloc) { 5470 if (!user_alloc) {
5507 if (npages && !old.rmap) { 5471 if (npages && !old.rmap) {
5508 unsigned long userspace_addr; 5472 unsigned long userspace_addr;
5509 5473
5510 down_write(&current->mm->mmap_sem); 5474 down_write(&current->mm->mmap_sem);
5511 userspace_addr = do_mmap(NULL, 0, 5475 userspace_addr = do_mmap(NULL, 0,
5512 npages * PAGE_SIZE, 5476 npages * PAGE_SIZE,
5513 PROT_READ | PROT_WRITE, 5477 PROT_READ | PROT_WRITE,
5514 MAP_PRIVATE | MAP_ANONYMOUS, 5478 MAP_PRIVATE | MAP_ANONYMOUS,
5515 0); 5479 0);
5516 up_write(&current->mm->mmap_sem); 5480 up_write(&current->mm->mmap_sem);
5517 5481
5518 if (IS_ERR((void *)userspace_addr)) 5482 if (IS_ERR((void *)userspace_addr))
5519 return PTR_ERR((void *)userspace_addr); 5483 return PTR_ERR((void *)userspace_addr);
5520 5484
5521 memslot->userspace_addr = userspace_addr; 5485 memslot->userspace_addr = userspace_addr;
5522 } 5486 }
5523 } 5487 }
5524 5488
5525 5489
5526 return 0; 5490 return 0;
5527 } 5491 }
5528 5492
5529 void kvm_arch_commit_memory_region(struct kvm *kvm, 5493 void kvm_arch_commit_memory_region(struct kvm *kvm,
5530 struct kvm_userspace_memory_region *mem, 5494 struct kvm_userspace_memory_region *mem,
5531 struct kvm_memory_slot old, 5495 struct kvm_memory_slot old,
5532 int user_alloc) 5496 int user_alloc)
5533 { 5497 {
5534 5498
5535 int npages = mem->memory_size >> PAGE_SHIFT; 5499 int npages = mem->memory_size >> PAGE_SHIFT;
5536 5500
5537 if (!user_alloc && !old.user_alloc && old.rmap && !npages) { 5501 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
5538 int ret; 5502 int ret;
5539 5503
5540 down_write(&current->mm->mmap_sem); 5504 down_write(&current->mm->mmap_sem);
5541 ret = do_munmap(current->mm, old.userspace_addr, 5505 ret = do_munmap(current->mm, old.userspace_addr,
5542 old.npages * PAGE_SIZE); 5506 old.npages * PAGE_SIZE);
5543 up_write(&current->mm->mmap_sem); 5507 up_write(&current->mm->mmap_sem);
5544 if (ret < 0) 5508 if (ret < 0)
5545 printk(KERN_WARNING 5509 printk(KERN_WARNING
5546 "kvm_vm_ioctl_set_memory_region: " 5510 "kvm_vm_ioctl_set_memory_region: "
5547 "failed to munmap memory\n"); 5511 "failed to munmap memory\n");
5548 } 5512 }
5549 5513
5550 spin_lock(&kvm->mmu_lock); 5514 spin_lock(&kvm->mmu_lock);
5551 if (!kvm->arch.n_requested_mmu_pages) { 5515 if (!kvm->arch.n_requested_mmu_pages) {
5552 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 5516 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
5553 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 5517 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
5554 } 5518 }
5555 5519
5556 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 5520 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
5557 spin_unlock(&kvm->mmu_lock); 5521 spin_unlock(&kvm->mmu_lock);
5558 } 5522 }
5559 5523
5560 void kvm_arch_flush_shadow(struct kvm *kvm) 5524 void kvm_arch_flush_shadow(struct kvm *kvm)
5561 { 5525 {
5562 kvm_mmu_zap_all(kvm); 5526 kvm_mmu_zap_all(kvm);
5563 kvm_reload_remote_mmus(kvm); 5527 kvm_reload_remote_mmus(kvm);
5564 } 5528 }
5565 5529
5566 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 5530 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
5567 { 5531 {
5568 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 5532 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
5569 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED 5533 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
5570 || vcpu->arch.nmi_pending || 5534 || vcpu->arch.nmi_pending ||
5571 (kvm_arch_interrupt_allowed(vcpu) && 5535 (kvm_arch_interrupt_allowed(vcpu) &&
5572 kvm_cpu_has_interrupt(vcpu)); 5536 kvm_cpu_has_interrupt(vcpu));
5573 } 5537 }
5574 5538
5575 void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 5539 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
5576 { 5540 {
5577 int me; 5541 int me;
5578 int cpu = vcpu->cpu; 5542 int cpu = vcpu->cpu;
5579 5543
5580 if (waitqueue_active(&vcpu->wq)) { 5544 if (waitqueue_active(&vcpu->wq)) {
5581 wake_up_interruptible(&vcpu->wq); 5545 wake_up_interruptible(&vcpu->wq);
5582 ++vcpu->stat.halt_wakeup; 5546 ++vcpu->stat.halt_wakeup;
5583 } 5547 }
5584 5548
5585 me = get_cpu(); 5549 me = get_cpu();
5586 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) 5550 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
5587 if (atomic_xchg(&vcpu->guest_mode, 0)) 5551 if (atomic_xchg(&vcpu->guest_mode, 0))
5588 smp_send_reschedule(cpu); 5552 smp_send_reschedule(cpu);
5589 put_cpu(); 5553 put_cpu();
5590 } 5554 }
5591 5555
5592 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) 5556 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
5593 { 5557 {
5594 return kvm_x86_ops->interrupt_allowed(vcpu); 5558 return kvm_x86_ops->interrupt_allowed(vcpu);
5595 } 5559 }
5596 5560
5597 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) 5561 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
5598 { 5562 {
5599 unsigned long current_rip = kvm_rip_read(vcpu) + 5563 unsigned long current_rip = kvm_rip_read(vcpu) +
5600 get_segment_base(vcpu, VCPU_SREG_CS); 5564 get_segment_base(vcpu, VCPU_SREG_CS);
5601 5565
5602 return current_rip == linear_rip; 5566 return current_rip == linear_rip;
5603 } 5567 }
5604 EXPORT_SYMBOL_GPL(kvm_is_linear_rip); 5568 EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
5605 5569
5606 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) 5570 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
5607 { 5571 {
5608 unsigned long rflags; 5572 unsigned long rflags;
5609 5573
5610 rflags = kvm_x86_ops->get_rflags(vcpu); 5574 rflags = kvm_x86_ops->get_rflags(vcpu);
5611 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 5575 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5612 rflags &= ~X86_EFLAGS_TF; 5576 rflags &= ~X86_EFLAGS_TF;
5613 return rflags; 5577 return rflags;
5614 } 5578 }
5615 EXPORT_SYMBOL_GPL(kvm_get_rflags); 5579 EXPORT_SYMBOL_GPL(kvm_get_rflags);
5616 5580
5617 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 5581 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
5618 { 5582 {
5619 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && 5583 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
5620 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) 5584 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
5621 rflags |= X86_EFLAGS_TF; 5585 rflags |= X86_EFLAGS_TF;
5622 kvm_x86_ops->set_rflags(vcpu, rflags); 5586 kvm_x86_ops->set_rflags(vcpu, rflags);
5623 } 5587 }
5624 EXPORT_SYMBOL_GPL(kvm_set_rflags); 5588 EXPORT_SYMBOL_GPL(kvm_set_rflags);
5625 5589
5626 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 5590 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
5627 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 5591 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
5628 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 5592 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
5629 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); 5593 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
5630 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); 5594 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
5631 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); 5595 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
5632 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); 5596 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
5633 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); 5597 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
5634 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); 5598 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
5635 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); 5599 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
5636 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); 5600 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
5637 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); 5601 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
5638 5602
1 /* 1 /*
2 * Kernel-based Virtual Machine driver for Linux 2 * Kernel-based Virtual Machine driver for Linux
3 * 3 *
4 * This module enables machines with Intel VT-x extensions to run virtual 4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation. 5 * machines without emulation or binary translation.
6 * 6 *
7 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2006 Qumranet, Inc.
8 * 8 *
9 * Authors: 9 * Authors:
10 * Avi Kivity <avi@qumranet.com> 10 * Avi Kivity <avi@qumranet.com>
11 * Yaniv Kamay <yaniv@qumranet.com> 11 * Yaniv Kamay <yaniv@qumranet.com>
12 * 12 *
13 * This work is licensed under the terms of the GNU GPL, version 2. See 13 * This work is licensed under the terms of the GNU GPL, version 2. See
14 * the COPYING file in the top-level directory. 14 * the COPYING file in the top-level directory.
15 * 15 *
16 */ 16 */
17 17
18 #include "iodev.h" 18 #include "iodev.h"
19 19
20 #include <linux/kvm_host.h> 20 #include <linux/kvm_host.h>
21 #include <linux/kvm.h> 21 #include <linux/kvm.h>
22 #include <linux/module.h> 22 #include <linux/module.h>
23 #include <linux/errno.h> 23 #include <linux/errno.h>
24 #include <linux/percpu.h> 24 #include <linux/percpu.h>
25 #include <linux/mm.h> 25 #include <linux/mm.h>
26 #include <linux/miscdevice.h> 26 #include <linux/miscdevice.h>
27 #include <linux/vmalloc.h> 27 #include <linux/vmalloc.h>
28 #include <linux/reboot.h> 28 #include <linux/reboot.h>
29 #include <linux/debugfs.h> 29 #include <linux/debugfs.h>
30 #include <linux/highmem.h> 30 #include <linux/highmem.h>
31 #include <linux/file.h> 31 #include <linux/file.h>
32 #include <linux/sysdev.h> 32 #include <linux/sysdev.h>
33 #include <linux/cpu.h> 33 #include <linux/cpu.h>
34 #include <linux/sched.h> 34 #include <linux/sched.h>
35 #include <linux/cpumask.h> 35 #include <linux/cpumask.h>
36 #include <linux/smp.h> 36 #include <linux/smp.h>
37 #include <linux/anon_inodes.h> 37 #include <linux/anon_inodes.h>
38 #include <linux/profile.h> 38 #include <linux/profile.h>
39 #include <linux/kvm_para.h> 39 #include <linux/kvm_para.h>
40 #include <linux/pagemap.h> 40 #include <linux/pagemap.h>
41 #include <linux/mman.h> 41 #include <linux/mman.h>
42 #include <linux/swap.h> 42 #include <linux/swap.h>
43 #include <linux/bitops.h> 43 #include <linux/bitops.h>
44 #include <linux/spinlock.h> 44 #include <linux/spinlock.h>
45 #include <linux/compat.h> 45 #include <linux/compat.h>
46 #include <linux/srcu.h> 46 #include <linux/srcu.h>
47 #include <linux/hugetlb.h> 47 #include <linux/hugetlb.h>
48 #include <linux/slab.h> 48 #include <linux/slab.h>
49 49
50 #include <asm/processor.h> 50 #include <asm/processor.h>
51 #include <asm/io.h> 51 #include <asm/io.h>
52 #include <asm/uaccess.h> 52 #include <asm/uaccess.h>
53 #include <asm/pgtable.h> 53 #include <asm/pgtable.h>
54 #include <asm-generic/bitops/le.h> 54 #include <asm-generic/bitops/le.h>
55 55
56 #include "coalesced_mmio.h" 56 #include "coalesced_mmio.h"
57 57
58 #define CREATE_TRACE_POINTS 58 #define CREATE_TRACE_POINTS
59 #include <trace/events/kvm.h> 59 #include <trace/events/kvm.h>
60 60
61 MODULE_AUTHOR("Qumranet"); 61 MODULE_AUTHOR("Qumranet");
62 MODULE_LICENSE("GPL"); 62 MODULE_LICENSE("GPL");
63 63
64 /* 64 /*
65 * Ordering of locks: 65 * Ordering of locks:
66 * 66 *
67 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock 67 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
68 */ 68 */
69 69
70 DEFINE_SPINLOCK(kvm_lock); 70 DEFINE_SPINLOCK(kvm_lock);
71 LIST_HEAD(vm_list); 71 LIST_HEAD(vm_list);
72 72
73 static cpumask_var_t cpus_hardware_enabled; 73 static cpumask_var_t cpus_hardware_enabled;
74 static int kvm_usage_count = 0; 74 static int kvm_usage_count = 0;
75 static atomic_t hardware_enable_failed; 75 static atomic_t hardware_enable_failed;
76 76
77 struct kmem_cache *kvm_vcpu_cache; 77 struct kmem_cache *kvm_vcpu_cache;
78 EXPORT_SYMBOL_GPL(kvm_vcpu_cache); 78 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
79 79
80 static __read_mostly struct preempt_ops kvm_preempt_ops; 80 static __read_mostly struct preempt_ops kvm_preempt_ops;
81 81
82 struct dentry *kvm_debugfs_dir; 82 struct dentry *kvm_debugfs_dir;
83 83
84 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 84 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
85 unsigned long arg); 85 unsigned long arg);
86 static int hardware_enable_all(void); 86 static int hardware_enable_all(void);
87 static void hardware_disable_all(void); 87 static void hardware_disable_all(void);
88 88
89 static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 89 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
90 90
91 static bool kvm_rebooting; 91 static bool kvm_rebooting;
92 92
93 static bool largepages_enabled = true; 93 static bool largepages_enabled = true;
94 94
95 struct page *hwpoison_page; 95 struct page *hwpoison_page;
96 pfn_t hwpoison_pfn; 96 pfn_t hwpoison_pfn;
97 97
98 inline int kvm_is_mmio_pfn(pfn_t pfn) 98 inline int kvm_is_mmio_pfn(pfn_t pfn)
99 { 99 {
100 if (pfn_valid(pfn)) { 100 if (pfn_valid(pfn)) {
101 struct page *page = compound_head(pfn_to_page(pfn)); 101 struct page *page = compound_head(pfn_to_page(pfn));
102 return PageReserved(page); 102 return PageReserved(page);
103 } 103 }
104 104
105 return true; 105 return true;
106 } 106 }
107 107
108 /* 108 /*
109 * Switches to specified vcpu, until a matching vcpu_put() 109 * Switches to specified vcpu, until a matching vcpu_put()
110 */ 110 */
111 void vcpu_load(struct kvm_vcpu *vcpu) 111 void vcpu_load(struct kvm_vcpu *vcpu)
112 { 112 {
113 int cpu; 113 int cpu;
114 114
115 mutex_lock(&vcpu->mutex); 115 mutex_lock(&vcpu->mutex);
116 cpu = get_cpu(); 116 cpu = get_cpu();
117 preempt_notifier_register(&vcpu->preempt_notifier); 117 preempt_notifier_register(&vcpu->preempt_notifier);
118 kvm_arch_vcpu_load(vcpu, cpu); 118 kvm_arch_vcpu_load(vcpu, cpu);
119 put_cpu(); 119 put_cpu();
120 } 120 }
121 121
122 void vcpu_put(struct kvm_vcpu *vcpu) 122 void vcpu_put(struct kvm_vcpu *vcpu)
123 { 123 {
124 preempt_disable(); 124 preempt_disable();
125 kvm_arch_vcpu_put(vcpu); 125 kvm_arch_vcpu_put(vcpu);
126 preempt_notifier_unregister(&vcpu->preempt_notifier); 126 preempt_notifier_unregister(&vcpu->preempt_notifier);
127 preempt_enable(); 127 preempt_enable();
128 mutex_unlock(&vcpu->mutex); 128 mutex_unlock(&vcpu->mutex);
129 } 129 }
130 130
131 static void ack_flush(void *_completed) 131 static void ack_flush(void *_completed)
132 { 132 {
133 } 133 }
134 134
135 static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) 135 static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
136 { 136 {
137 int i, cpu, me; 137 int i, cpu, me;
138 cpumask_var_t cpus; 138 cpumask_var_t cpus;
139 bool called = true; 139 bool called = true;
140 struct kvm_vcpu *vcpu; 140 struct kvm_vcpu *vcpu;
141 141
142 zalloc_cpumask_var(&cpus, GFP_ATOMIC); 142 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
143 143
144 raw_spin_lock(&kvm->requests_lock); 144 raw_spin_lock(&kvm->requests_lock);
145 me = smp_processor_id(); 145 me = smp_processor_id();
146 kvm_for_each_vcpu(i, vcpu, kvm) { 146 kvm_for_each_vcpu(i, vcpu, kvm) {
147 if (test_and_set_bit(req, &vcpu->requests)) 147 if (test_and_set_bit(req, &vcpu->requests))
148 continue; 148 continue;
149 cpu = vcpu->cpu; 149 cpu = vcpu->cpu;
150 if (cpus != NULL && cpu != -1 && cpu != me) 150 if (cpus != NULL && cpu != -1 && cpu != me)
151 cpumask_set_cpu(cpu, cpus); 151 cpumask_set_cpu(cpu, cpus);
152 } 152 }
153 if (unlikely(cpus == NULL)) 153 if (unlikely(cpus == NULL))
154 smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); 154 smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
155 else if (!cpumask_empty(cpus)) 155 else if (!cpumask_empty(cpus))
156 smp_call_function_many(cpus, ack_flush, NULL, 1); 156 smp_call_function_many(cpus, ack_flush, NULL, 1);
157 else 157 else
158 called = false; 158 called = false;
159 raw_spin_unlock(&kvm->requests_lock); 159 raw_spin_unlock(&kvm->requests_lock);
160 free_cpumask_var(cpus); 160 free_cpumask_var(cpus);
161 return called; 161 return called;
162 } 162 }
163 163
164 void kvm_flush_remote_tlbs(struct kvm *kvm) 164 void kvm_flush_remote_tlbs(struct kvm *kvm)
165 { 165 {
166 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 166 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
167 ++kvm->stat.remote_tlb_flush; 167 ++kvm->stat.remote_tlb_flush;
168 } 168 }
169 169
170 void kvm_reload_remote_mmus(struct kvm *kvm) 170 void kvm_reload_remote_mmus(struct kvm *kvm)
171 { 171 {
172 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); 172 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
173 } 173 }
174 174
175 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 175 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
176 { 176 {
177 struct page *page; 177 struct page *page;
178 int r; 178 int r;
179 179
180 mutex_init(&vcpu->mutex); 180 mutex_init(&vcpu->mutex);
181 vcpu->cpu = -1; 181 vcpu->cpu = -1;
182 vcpu->kvm = kvm; 182 vcpu->kvm = kvm;
183 vcpu->vcpu_id = id; 183 vcpu->vcpu_id = id;
184 init_waitqueue_head(&vcpu->wq); 184 init_waitqueue_head(&vcpu->wq);
185 185
186 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 186 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
187 if (!page) { 187 if (!page) {
188 r = -ENOMEM; 188 r = -ENOMEM;
189 goto fail; 189 goto fail;
190 } 190 }
191 vcpu->run = page_address(page); 191 vcpu->run = page_address(page);
192 192
193 r = kvm_arch_vcpu_init(vcpu); 193 r = kvm_arch_vcpu_init(vcpu);
194 if (r < 0) 194 if (r < 0)
195 goto fail_free_run; 195 goto fail_free_run;
196 return 0; 196 return 0;
197 197
198 fail_free_run: 198 fail_free_run:
199 free_page((unsigned long)vcpu->run); 199 free_page((unsigned long)vcpu->run);
200 fail: 200 fail:
201 return r; 201 return r;
202 } 202 }
203 EXPORT_SYMBOL_GPL(kvm_vcpu_init); 203 EXPORT_SYMBOL_GPL(kvm_vcpu_init);
204 204
205 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) 205 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
206 { 206 {
207 kvm_arch_vcpu_uninit(vcpu); 207 kvm_arch_vcpu_uninit(vcpu);
208 free_page((unsigned long)vcpu->run); 208 free_page((unsigned long)vcpu->run);
209 } 209 }
210 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); 210 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
211 211
212 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 212 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
213 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) 213 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
214 { 214 {
215 return container_of(mn, struct kvm, mmu_notifier); 215 return container_of(mn, struct kvm, mmu_notifier);
216 } 216 }
217 217
218 static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, 218 static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
219 struct mm_struct *mm, 219 struct mm_struct *mm,
220 unsigned long address) 220 unsigned long address)
221 { 221 {
222 struct kvm *kvm = mmu_notifier_to_kvm(mn); 222 struct kvm *kvm = mmu_notifier_to_kvm(mn);
223 int need_tlb_flush, idx; 223 int need_tlb_flush, idx;
224 224
225 /* 225 /*
226 * When ->invalidate_page runs, the linux pte has been zapped 226 * When ->invalidate_page runs, the linux pte has been zapped
227 * already but the page is still allocated until 227 * already but the page is still allocated until
228 * ->invalidate_page returns. So if we increase the sequence 228 * ->invalidate_page returns. So if we increase the sequence
229 * here the kvm page fault will notice if the spte can't be 229 * here the kvm page fault will notice if the spte can't be
230 * established because the page is going to be freed. If 230 * established because the page is going to be freed. If
231 * instead the kvm page fault establishes the spte before 231 * instead the kvm page fault establishes the spte before
232 * ->invalidate_page runs, kvm_unmap_hva will release it 232 * ->invalidate_page runs, kvm_unmap_hva will release it
233 * before returning. 233 * before returning.
234 * 234 *
235 * The sequence increase only need to be seen at spin_unlock 235 * The sequence increase only need to be seen at spin_unlock
236 * time, and not at spin_lock time. 236 * time, and not at spin_lock time.
237 * 237 *
238 * Increasing the sequence after the spin_unlock would be 238 * Increasing the sequence after the spin_unlock would be
239 * unsafe because the kvm page fault could then establish the 239 * unsafe because the kvm page fault could then establish the
240 * pte after kvm_unmap_hva returned, without noticing the page 240 * pte after kvm_unmap_hva returned, without noticing the page
241 * is going to be freed. 241 * is going to be freed.
242 */ 242 */
243 idx = srcu_read_lock(&kvm->srcu); 243 idx = srcu_read_lock(&kvm->srcu);
244 spin_lock(&kvm->mmu_lock); 244 spin_lock(&kvm->mmu_lock);
245 kvm->mmu_notifier_seq++; 245 kvm->mmu_notifier_seq++;
246 need_tlb_flush = kvm_unmap_hva(kvm, address); 246 need_tlb_flush = kvm_unmap_hva(kvm, address);
247 spin_unlock(&kvm->mmu_lock); 247 spin_unlock(&kvm->mmu_lock);
248 srcu_read_unlock(&kvm->srcu, idx); 248 srcu_read_unlock(&kvm->srcu, idx);
249 249
250 /* we've to flush the tlb before the pages can be freed */ 250 /* we've to flush the tlb before the pages can be freed */
251 if (need_tlb_flush) 251 if (need_tlb_flush)
252 kvm_flush_remote_tlbs(kvm); 252 kvm_flush_remote_tlbs(kvm);
253 253
254 } 254 }
255 255
256 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, 256 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
257 struct mm_struct *mm, 257 struct mm_struct *mm,
258 unsigned long address, 258 unsigned long address,
259 pte_t pte) 259 pte_t pte)
260 { 260 {
261 struct kvm *kvm = mmu_notifier_to_kvm(mn); 261 struct kvm *kvm = mmu_notifier_to_kvm(mn);
262 int idx; 262 int idx;
263 263
264 idx = srcu_read_lock(&kvm->srcu); 264 idx = srcu_read_lock(&kvm->srcu);
265 spin_lock(&kvm->mmu_lock); 265 spin_lock(&kvm->mmu_lock);
266 kvm->mmu_notifier_seq++; 266 kvm->mmu_notifier_seq++;
267 kvm_set_spte_hva(kvm, address, pte); 267 kvm_set_spte_hva(kvm, address, pte);
268 spin_unlock(&kvm->mmu_lock); 268 spin_unlock(&kvm->mmu_lock);
269 srcu_read_unlock(&kvm->srcu, idx); 269 srcu_read_unlock(&kvm->srcu, idx);
270 } 270 }
271 271
272 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 272 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
273 struct mm_struct *mm, 273 struct mm_struct *mm,
274 unsigned long start, 274 unsigned long start,
275 unsigned long end) 275 unsigned long end)
276 { 276 {
277 struct kvm *kvm = mmu_notifier_to_kvm(mn); 277 struct kvm *kvm = mmu_notifier_to_kvm(mn);
278 int need_tlb_flush = 0, idx; 278 int need_tlb_flush = 0, idx;
279 279
280 idx = srcu_read_lock(&kvm->srcu); 280 idx = srcu_read_lock(&kvm->srcu);
281 spin_lock(&kvm->mmu_lock); 281 spin_lock(&kvm->mmu_lock);
282 /* 282 /*
283 * The count increase must become visible at unlock time as no 283 * The count increase must become visible at unlock time as no
284 * spte can be established without taking the mmu_lock and 284 * spte can be established without taking the mmu_lock and
285 * count is also read inside the mmu_lock critical section. 285 * count is also read inside the mmu_lock critical section.
286 */ 286 */
287 kvm->mmu_notifier_count++; 287 kvm->mmu_notifier_count++;
288 for (; start < end; start += PAGE_SIZE) 288 for (; start < end; start += PAGE_SIZE)
289 need_tlb_flush |= kvm_unmap_hva(kvm, start); 289 need_tlb_flush |= kvm_unmap_hva(kvm, start);
290 spin_unlock(&kvm->mmu_lock); 290 spin_unlock(&kvm->mmu_lock);
291 srcu_read_unlock(&kvm->srcu, idx); 291 srcu_read_unlock(&kvm->srcu, idx);
292 292
293 /* we've to flush the tlb before the pages can be freed */ 293 /* we've to flush the tlb before the pages can be freed */
294 if (need_tlb_flush) 294 if (need_tlb_flush)
295 kvm_flush_remote_tlbs(kvm); 295 kvm_flush_remote_tlbs(kvm);
296 } 296 }
297 297
298 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, 298 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
299 struct mm_struct *mm, 299 struct mm_struct *mm,
300 unsigned long start, 300 unsigned long start,
301 unsigned long end) 301 unsigned long end)
302 { 302 {
303 struct kvm *kvm = mmu_notifier_to_kvm(mn); 303 struct kvm *kvm = mmu_notifier_to_kvm(mn);
304 304
305 spin_lock(&kvm->mmu_lock); 305 spin_lock(&kvm->mmu_lock);
306 /* 306 /*
307 * This sequence increase will notify the kvm page fault that 307 * This sequence increase will notify the kvm page fault that
308 * the page that is going to be mapped in the spte could have 308 * the page that is going to be mapped in the spte could have
309 * been freed. 309 * been freed.
310 */ 310 */
311 kvm->mmu_notifier_seq++; 311 kvm->mmu_notifier_seq++;
312 /* 312 /*
313 * The above sequence increase must be visible before the 313 * The above sequence increase must be visible before the
314 * below count decrease but both values are read by the kvm 314 * below count decrease but both values are read by the kvm
315 * page fault under mmu_lock spinlock so we don't need to add 315 * page fault under mmu_lock spinlock so we don't need to add
316 * a smb_wmb() here in between the two. 316 * a smb_wmb() here in between the two.
317 */ 317 */
318 kvm->mmu_notifier_count--; 318 kvm->mmu_notifier_count--;
319 spin_unlock(&kvm->mmu_lock); 319 spin_unlock(&kvm->mmu_lock);
320 320
321 BUG_ON(kvm->mmu_notifier_count < 0); 321 BUG_ON(kvm->mmu_notifier_count < 0);
322 } 322 }
323 323
324 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 324 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
325 struct mm_struct *mm, 325 struct mm_struct *mm,
326 unsigned long address) 326 unsigned long address)
327 { 327 {
328 struct kvm *kvm = mmu_notifier_to_kvm(mn); 328 struct kvm *kvm = mmu_notifier_to_kvm(mn);
329 int young, idx; 329 int young, idx;
330 330
331 idx = srcu_read_lock(&kvm->srcu); 331 idx = srcu_read_lock(&kvm->srcu);
332 spin_lock(&kvm->mmu_lock); 332 spin_lock(&kvm->mmu_lock);
333 young = kvm_age_hva(kvm, address); 333 young = kvm_age_hva(kvm, address);
334 spin_unlock(&kvm->mmu_lock); 334 spin_unlock(&kvm->mmu_lock);
335 srcu_read_unlock(&kvm->srcu, idx); 335 srcu_read_unlock(&kvm->srcu, idx);
336 336
337 if (young) 337 if (young)
338 kvm_flush_remote_tlbs(kvm); 338 kvm_flush_remote_tlbs(kvm);
339 339
340 return young; 340 return young;
341 } 341 }
342 342
343 static void kvm_mmu_notifier_release(struct mmu_notifier *mn, 343 static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
344 struct mm_struct *mm) 344 struct mm_struct *mm)
345 { 345 {
346 struct kvm *kvm = mmu_notifier_to_kvm(mn); 346 struct kvm *kvm = mmu_notifier_to_kvm(mn);
347 int idx; 347 int idx;
348 348
349 idx = srcu_read_lock(&kvm->srcu); 349 idx = srcu_read_lock(&kvm->srcu);
350 kvm_arch_flush_shadow(kvm); 350 kvm_arch_flush_shadow(kvm);
351 srcu_read_unlock(&kvm->srcu, idx); 351 srcu_read_unlock(&kvm->srcu, idx);
352 } 352 }
353 353
354 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { 354 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
355 .invalidate_page = kvm_mmu_notifier_invalidate_page, 355 .invalidate_page = kvm_mmu_notifier_invalidate_page,
356 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, 356 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
357 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, 357 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
358 .clear_flush_young = kvm_mmu_notifier_clear_flush_young, 358 .clear_flush_young = kvm_mmu_notifier_clear_flush_young,
359 .change_pte = kvm_mmu_notifier_change_pte, 359 .change_pte = kvm_mmu_notifier_change_pte,
360 .release = kvm_mmu_notifier_release, 360 .release = kvm_mmu_notifier_release,
361 }; 361 };
362 362
363 static int kvm_init_mmu_notifier(struct kvm *kvm) 363 static int kvm_init_mmu_notifier(struct kvm *kvm)
364 { 364 {
365 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; 365 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
366 return mmu_notifier_register(&kvm->mmu_notifier, current->mm); 366 return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
367 } 367 }
368 368
369 #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ 369 #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
370 370
371 static int kvm_init_mmu_notifier(struct kvm *kvm) 371 static int kvm_init_mmu_notifier(struct kvm *kvm)
372 { 372 {
373 return 0; 373 return 0;
374 } 374 }
375 375
376 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 376 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
377 377
378 static struct kvm *kvm_create_vm(void) 378 static struct kvm *kvm_create_vm(void)
379 { 379 {
380 int r = 0, i; 380 int r = 0, i;
381 struct kvm *kvm = kvm_arch_create_vm(); 381 struct kvm *kvm = kvm_arch_create_vm();
382 382
383 if (IS_ERR(kvm)) 383 if (IS_ERR(kvm))
384 goto out; 384 goto out;
385 385
386 r = hardware_enable_all(); 386 r = hardware_enable_all();
387 if (r) 387 if (r)
388 goto out_err_nodisable; 388 goto out_err_nodisable;
389 389
390 #ifdef CONFIG_HAVE_KVM_IRQCHIP 390 #ifdef CONFIG_HAVE_KVM_IRQCHIP
391 INIT_HLIST_HEAD(&kvm->mask_notifier_list); 391 INIT_HLIST_HEAD(&kvm->mask_notifier_list);
392 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 392 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
393 #endif 393 #endif
394 394
395 r = -ENOMEM; 395 r = -ENOMEM;
396 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 396 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
397 if (!kvm->memslots) 397 if (!kvm->memslots)
398 goto out_err; 398 goto out_err;
399 if (init_srcu_struct(&kvm->srcu)) 399 if (init_srcu_struct(&kvm->srcu))
400 goto out_err; 400 goto out_err;
401 for (i = 0; i < KVM_NR_BUSES; i++) { 401 for (i = 0; i < KVM_NR_BUSES; i++) {
402 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), 402 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
403 GFP_KERNEL); 403 GFP_KERNEL);
404 if (!kvm->buses[i]) { 404 if (!kvm->buses[i]) {
405 cleanup_srcu_struct(&kvm->srcu); 405 cleanup_srcu_struct(&kvm->srcu);
406 goto out_err; 406 goto out_err;
407 } 407 }
408 } 408 }
409 409
410 r = kvm_init_mmu_notifier(kvm); 410 r = kvm_init_mmu_notifier(kvm);
411 if (r) { 411 if (r) {
412 cleanup_srcu_struct(&kvm->srcu); 412 cleanup_srcu_struct(&kvm->srcu);
413 goto out_err; 413 goto out_err;
414 } 414 }
415 415
416 kvm->mm = current->mm; 416 kvm->mm = current->mm;
417 atomic_inc(&kvm->mm->mm_count); 417 atomic_inc(&kvm->mm->mm_count);
418 spin_lock_init(&kvm->mmu_lock); 418 spin_lock_init(&kvm->mmu_lock);
419 raw_spin_lock_init(&kvm->requests_lock); 419 raw_spin_lock_init(&kvm->requests_lock);
420 kvm_eventfd_init(kvm); 420 kvm_eventfd_init(kvm);
421 mutex_init(&kvm->lock); 421 mutex_init(&kvm->lock);
422 mutex_init(&kvm->irq_lock); 422 mutex_init(&kvm->irq_lock);
423 mutex_init(&kvm->slots_lock); 423 mutex_init(&kvm->slots_lock);
424 atomic_set(&kvm->users_count, 1); 424 atomic_set(&kvm->users_count, 1);
425 spin_lock(&kvm_lock); 425 spin_lock(&kvm_lock);
426 list_add(&kvm->vm_list, &vm_list); 426 list_add(&kvm->vm_list, &vm_list);
427 spin_unlock(&kvm_lock); 427 spin_unlock(&kvm_lock);
428 out: 428 out:
429 return kvm; 429 return kvm;
430 430
431 out_err: 431 out_err:
432 hardware_disable_all(); 432 hardware_disable_all();
433 out_err_nodisable: 433 out_err_nodisable:
434 for (i = 0; i < KVM_NR_BUSES; i++) 434 for (i = 0; i < KVM_NR_BUSES; i++)
435 kfree(kvm->buses[i]); 435 kfree(kvm->buses[i]);
436 kfree(kvm->memslots); 436 kfree(kvm->memslots);
437 kfree(kvm); 437 kfree(kvm);
438 return ERR_PTR(r); 438 return ERR_PTR(r);
439 } 439 }
440 440
441 /* 441 /*
442 * Free any memory in @free but not in @dont. 442 * Free any memory in @free but not in @dont.
443 */ 443 */
444 static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 444 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
445 struct kvm_memory_slot *dont) 445 struct kvm_memory_slot *dont)
446 { 446 {
447 int i; 447 int i;
448 448
449 if (!dont || free->rmap != dont->rmap) 449 if (!dont || free->rmap != dont->rmap)
450 vfree(free->rmap); 450 vfree(free->rmap);
451 451
452 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 452 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
453 vfree(free->dirty_bitmap); 453 vfree(free->dirty_bitmap);
454 454
455 455
456 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { 456 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
457 if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { 457 if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
458 vfree(free->lpage_info[i]); 458 vfree(free->lpage_info[i]);
459 free->lpage_info[i] = NULL; 459 free->lpage_info[i] = NULL;
460 } 460 }
461 } 461 }
462 462
463 free->npages = 0; 463 free->npages = 0;
464 free->dirty_bitmap = NULL; 464 free->dirty_bitmap = NULL;
465 free->rmap = NULL; 465 free->rmap = NULL;
466 } 466 }
467 467
468 void kvm_free_physmem(struct kvm *kvm) 468 void kvm_free_physmem(struct kvm *kvm)
469 { 469 {
470 int i; 470 int i;
471 struct kvm_memslots *slots = kvm->memslots; 471 struct kvm_memslots *slots = kvm->memslots;
472 472
473 for (i = 0; i < slots->nmemslots; ++i) 473 for (i = 0; i < slots->nmemslots; ++i)
474 kvm_free_physmem_slot(&slots->memslots[i], NULL); 474 kvm_free_physmem_slot(&slots->memslots[i], NULL);
475 475
476 kfree(kvm->memslots); 476 kfree(kvm->memslots);
477 } 477 }
478 478
479 static void kvm_destroy_vm(struct kvm *kvm) 479 static void kvm_destroy_vm(struct kvm *kvm)
480 { 480 {
481 int i; 481 int i;
482 struct mm_struct *mm = kvm->mm; 482 struct mm_struct *mm = kvm->mm;
483 483
484 kvm_arch_sync_events(kvm); 484 kvm_arch_sync_events(kvm);
485 spin_lock(&kvm_lock); 485 spin_lock(&kvm_lock);
486 list_del(&kvm->vm_list); 486 list_del(&kvm->vm_list);
487 spin_unlock(&kvm_lock); 487 spin_unlock(&kvm_lock);
488 kvm_free_irq_routing(kvm); 488 kvm_free_irq_routing(kvm);
489 for (i = 0; i < KVM_NR_BUSES; i++) 489 for (i = 0; i < KVM_NR_BUSES; i++)
490 kvm_io_bus_destroy(kvm->buses[i]); 490 kvm_io_bus_destroy(kvm->buses[i]);
491 kvm_coalesced_mmio_free(kvm); 491 kvm_coalesced_mmio_free(kvm);
492 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 492 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
493 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 493 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
494 #else 494 #else
495 kvm_arch_flush_shadow(kvm); 495 kvm_arch_flush_shadow(kvm);
496 #endif 496 #endif
497 kvm_arch_destroy_vm(kvm); 497 kvm_arch_destroy_vm(kvm);
498 hardware_disable_all(); 498 hardware_disable_all();
499 mmdrop(mm); 499 mmdrop(mm);
500 } 500 }
501 501
502 void kvm_get_kvm(struct kvm *kvm) 502 void kvm_get_kvm(struct kvm *kvm)
503 { 503 {
504 atomic_inc(&kvm->users_count); 504 atomic_inc(&kvm->users_count);
505 } 505 }
506 EXPORT_SYMBOL_GPL(kvm_get_kvm); 506 EXPORT_SYMBOL_GPL(kvm_get_kvm);
507 507
508 void kvm_put_kvm(struct kvm *kvm) 508 void kvm_put_kvm(struct kvm *kvm)
509 { 509 {
510 if (atomic_dec_and_test(&kvm->users_count)) 510 if (atomic_dec_and_test(&kvm->users_count))
511 kvm_destroy_vm(kvm); 511 kvm_destroy_vm(kvm);
512 } 512 }
513 EXPORT_SYMBOL_GPL(kvm_put_kvm); 513 EXPORT_SYMBOL_GPL(kvm_put_kvm);
514 514
515 515
516 static int kvm_vm_release(struct inode *inode, struct file *filp) 516 static int kvm_vm_release(struct inode *inode, struct file *filp)
517 { 517 {
518 struct kvm *kvm = filp->private_data; 518 struct kvm *kvm = filp->private_data;
519 519
520 kvm_irqfd_release(kvm); 520 kvm_irqfd_release(kvm);
521 521
522 kvm_put_kvm(kvm); 522 kvm_put_kvm(kvm);
523 return 0; 523 return 0;
524 } 524 }
525 525
526 /* 526 /*
527 * Allocate some memory and give it an address in the guest physical address 527 * Allocate some memory and give it an address in the guest physical address
528 * space. 528 * space.
529 * 529 *
530 * Discontiguous memory is allowed, mostly for framebuffers. 530 * Discontiguous memory is allowed, mostly for framebuffers.
531 * 531 *
532 * Must be called holding mmap_sem for write. 532 * Must be called holding mmap_sem for write.
533 */ 533 */
534 int __kvm_set_memory_region(struct kvm *kvm, 534 int __kvm_set_memory_region(struct kvm *kvm,
535 struct kvm_userspace_memory_region *mem, 535 struct kvm_userspace_memory_region *mem,
536 int user_alloc) 536 int user_alloc)
537 { 537 {
538 int r, flush_shadow = 0; 538 int r, flush_shadow = 0;
539 gfn_t base_gfn; 539 gfn_t base_gfn;
540 unsigned long npages; 540 unsigned long npages;
541 unsigned long i; 541 unsigned long i;
542 struct kvm_memory_slot *memslot; 542 struct kvm_memory_slot *memslot;
543 struct kvm_memory_slot old, new; 543 struct kvm_memory_slot old, new;
544 struct kvm_memslots *slots, *old_memslots; 544 struct kvm_memslots *slots, *old_memslots;
545 545
546 r = -EINVAL; 546 r = -EINVAL;
547 /* General sanity checks */ 547 /* General sanity checks */
548 if (mem->memory_size & (PAGE_SIZE - 1)) 548 if (mem->memory_size & (PAGE_SIZE - 1))
549 goto out; 549 goto out;
550 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 550 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
551 goto out; 551 goto out;
552 if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) 552 if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
553 goto out; 553 goto out;
554 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 554 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
555 goto out; 555 goto out;
556 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 556 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
557 goto out; 557 goto out;
558 558
559 memslot = &kvm->memslots->memslots[mem->slot]; 559 memslot = &kvm->memslots->memslots[mem->slot];
560 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 560 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
561 npages = mem->memory_size >> PAGE_SHIFT; 561 npages = mem->memory_size >> PAGE_SHIFT;
562 562
563 r = -EINVAL; 563 r = -EINVAL;
564 if (npages > KVM_MEM_MAX_NR_PAGES) 564 if (npages > KVM_MEM_MAX_NR_PAGES)
565 goto out; 565 goto out;
566 566
567 if (!npages) 567 if (!npages)
568 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; 568 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
569 569
570 new = old = *memslot; 570 new = old = *memslot;
571 571
572 new.base_gfn = base_gfn; 572 new.base_gfn = base_gfn;
573 new.npages = npages; 573 new.npages = npages;
574 new.flags = mem->flags; 574 new.flags = mem->flags;
575 575
576 /* Disallow changing a memory slot's size. */ 576 /* Disallow changing a memory slot's size. */
577 r = -EINVAL; 577 r = -EINVAL;
578 if (npages && old.npages && npages != old.npages) 578 if (npages && old.npages && npages != old.npages)
579 goto out_free; 579 goto out_free;
580 580
581 /* Check for overlaps */ 581 /* Check for overlaps */
582 r = -EEXIST; 582 r = -EEXIST;
583 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 583 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
584 struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; 584 struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
585 585
586 if (s == memslot || !s->npages) 586 if (s == memslot || !s->npages)
587 continue; 587 continue;
588 if (!((base_gfn + npages <= s->base_gfn) || 588 if (!((base_gfn + npages <= s->base_gfn) ||
589 (base_gfn >= s->base_gfn + s->npages))) 589 (base_gfn >= s->base_gfn + s->npages)))
590 goto out_free; 590 goto out_free;
591 } 591 }
592 592
593 /* Free page dirty bitmap if unneeded */ 593 /* Free page dirty bitmap if unneeded */
594 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) 594 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
595 new.dirty_bitmap = NULL; 595 new.dirty_bitmap = NULL;
596 596
597 r = -ENOMEM; 597 r = -ENOMEM;
598 598
599 /* Allocate if a slot is being created */ 599 /* Allocate if a slot is being created */
600 #ifndef CONFIG_S390 600 #ifndef CONFIG_S390
601 if (npages && !new.rmap) { 601 if (npages && !new.rmap) {
602 new.rmap = vmalloc(npages * sizeof(struct page *)); 602 new.rmap = vmalloc(npages * sizeof(struct page *));
603 603
604 if (!new.rmap) 604 if (!new.rmap)
605 goto out_free; 605 goto out_free;
606 606
607 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 607 memset(new.rmap, 0, npages * sizeof(*new.rmap));
608 608
609 new.user_alloc = user_alloc; 609 new.user_alloc = user_alloc;
610 new.userspace_addr = mem->userspace_addr; 610 new.userspace_addr = mem->userspace_addr;
611 } 611 }
612 if (!npages) 612 if (!npages)
613 goto skip_lpage; 613 goto skip_lpage;
614 614
615 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { 615 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
616 unsigned long ugfn; 616 unsigned long ugfn;
617 unsigned long j; 617 unsigned long j;
618 int lpages; 618 int lpages;
619 int level = i + 2; 619 int level = i + 2;
620 620
621 /* Avoid unused variable warning if no large pages */ 621 /* Avoid unused variable warning if no large pages */
622 (void)level; 622 (void)level;
623 623
624 if (new.lpage_info[i]) 624 if (new.lpage_info[i])
625 continue; 625 continue;
626 626
627 lpages = 1 + (base_gfn + npages - 1) / 627 lpages = 1 + (base_gfn + npages - 1) /
628 KVM_PAGES_PER_HPAGE(level); 628 KVM_PAGES_PER_HPAGE(level);
629 lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); 629 lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level);
630 630
631 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); 631 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
632 632
633 if (!new.lpage_info[i]) 633 if (!new.lpage_info[i])
634 goto out_free; 634 goto out_free;
635 635
636 memset(new.lpage_info[i], 0, 636 memset(new.lpage_info[i], 0,
637 lpages * sizeof(*new.lpage_info[i])); 637 lpages * sizeof(*new.lpage_info[i]));
638 638
639 if (base_gfn % KVM_PAGES_PER_HPAGE(level)) 639 if (base_gfn % KVM_PAGES_PER_HPAGE(level))
640 new.lpage_info[i][0].write_count = 1; 640 new.lpage_info[i][0].write_count = 1;
641 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) 641 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level))
642 new.lpage_info[i][lpages - 1].write_count = 1; 642 new.lpage_info[i][lpages - 1].write_count = 1;
643 ugfn = new.userspace_addr >> PAGE_SHIFT; 643 ugfn = new.userspace_addr >> PAGE_SHIFT;
644 /* 644 /*
645 * If the gfn and userspace address are not aligned wrt each 645 * If the gfn and userspace address are not aligned wrt each
646 * other, or if explicitly asked to, disable large page 646 * other, or if explicitly asked to, disable large page
647 * support for this slot 647 * support for this slot
648 */ 648 */
649 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || 649 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
650 !largepages_enabled) 650 !largepages_enabled)
651 for (j = 0; j < lpages; ++j) 651 for (j = 0; j < lpages; ++j)
652 new.lpage_info[i][j].write_count = 1; 652 new.lpage_info[i][j].write_count = 1;
653 } 653 }
654 654
655 skip_lpage: 655 skip_lpage:
656 656
657 /* Allocate page dirty bitmap if needed */ 657 /* Allocate page dirty bitmap if needed */
658 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 658 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
659 unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); 659 unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
660 660
661 new.dirty_bitmap = vmalloc(dirty_bytes); 661 new.dirty_bitmap = vmalloc(dirty_bytes);
662 if (!new.dirty_bitmap) 662 if (!new.dirty_bitmap)
663 goto out_free; 663 goto out_free;
664 memset(new.dirty_bitmap, 0, dirty_bytes); 664 memset(new.dirty_bitmap, 0, dirty_bytes);
665 /* destroy any largepage mappings for dirty tracking */ 665 /* destroy any largepage mappings for dirty tracking */
666 if (old.npages) 666 if (old.npages)
667 flush_shadow = 1; 667 flush_shadow = 1;
668 } 668 }
669 #else /* not defined CONFIG_S390 */ 669 #else /* not defined CONFIG_S390 */
670 new.user_alloc = user_alloc; 670 new.user_alloc = user_alloc;
671 if (user_alloc) 671 if (user_alloc)
672 new.userspace_addr = mem->userspace_addr; 672 new.userspace_addr = mem->userspace_addr;
673 #endif /* not defined CONFIG_S390 */ 673 #endif /* not defined CONFIG_S390 */
674 674
675 if (!npages) { 675 if (!npages) {
676 r = -ENOMEM; 676 r = -ENOMEM;
677 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 677 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
678 if (!slots) 678 if (!slots)
679 goto out_free; 679 goto out_free;
680 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 680 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
681 if (mem->slot >= slots->nmemslots) 681 if (mem->slot >= slots->nmemslots)
682 slots->nmemslots = mem->slot + 1; 682 slots->nmemslots = mem->slot + 1;
683 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; 683 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
684 684
685 old_memslots = kvm->memslots; 685 old_memslots = kvm->memslots;
686 rcu_assign_pointer(kvm->memslots, slots); 686 rcu_assign_pointer(kvm->memslots, slots);
687 synchronize_srcu_expedited(&kvm->srcu); 687 synchronize_srcu_expedited(&kvm->srcu);
688 /* From this point no new shadow pages pointing to a deleted 688 /* From this point no new shadow pages pointing to a deleted
689 * memslot will be created. 689 * memslot will be created.
690 * 690 *
691 * validation of sp->gfn happens in: 691 * validation of sp->gfn happens in:
692 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) 692 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
693 * - kvm_is_visible_gfn (mmu_check_roots) 693 * - kvm_is_visible_gfn (mmu_check_roots)
694 */ 694 */
695 kvm_arch_flush_shadow(kvm); 695 kvm_arch_flush_shadow(kvm);
696 kfree(old_memslots); 696 kfree(old_memslots);
697 } 697 }
698 698
699 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); 699 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
700 if (r) 700 if (r)
701 goto out_free; 701 goto out_free;
702 702
703 #ifdef CONFIG_DMAR 703 #ifdef CONFIG_DMAR
704 /* map the pages in iommu page table */ 704 /* map the pages in iommu page table */
705 if (npages) { 705 if (npages) {
706 r = kvm_iommu_map_pages(kvm, &new); 706 r = kvm_iommu_map_pages(kvm, &new);
707 if (r) 707 if (r)
708 goto out_free; 708 goto out_free;
709 } 709 }
710 #endif 710 #endif
711 711
712 r = -ENOMEM; 712 r = -ENOMEM;
713 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 713 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
714 if (!slots) 714 if (!slots)
715 goto out_free; 715 goto out_free;
716 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 716 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
717 if (mem->slot >= slots->nmemslots) 717 if (mem->slot >= slots->nmemslots)
718 slots->nmemslots = mem->slot + 1; 718 slots->nmemslots = mem->slot + 1;
719 719
720 /* actual memory is freed via old in kvm_free_physmem_slot below */ 720 /* actual memory is freed via old in kvm_free_physmem_slot below */
721 if (!npages) { 721 if (!npages) {
722 new.rmap = NULL; 722 new.rmap = NULL;
723 new.dirty_bitmap = NULL; 723 new.dirty_bitmap = NULL;
724 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) 724 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
725 new.lpage_info[i] = NULL; 725 new.lpage_info[i] = NULL;
726 } 726 }
727 727
728 slots->memslots[mem->slot] = new; 728 slots->memslots[mem->slot] = new;
729 old_memslots = kvm->memslots; 729 old_memslots = kvm->memslots;
730 rcu_assign_pointer(kvm->memslots, slots); 730 rcu_assign_pointer(kvm->memslots, slots);
731 synchronize_srcu_expedited(&kvm->srcu); 731 synchronize_srcu_expedited(&kvm->srcu);
732 732
733 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); 733 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
734 734
735 kvm_free_physmem_slot(&old, &new); 735 kvm_free_physmem_slot(&old, &new);
736 kfree(old_memslots); 736 kfree(old_memslots);
737 737
738 if (flush_shadow) 738 if (flush_shadow)
739 kvm_arch_flush_shadow(kvm); 739 kvm_arch_flush_shadow(kvm);
740 740
741 return 0; 741 return 0;
742 742
743 out_free: 743 out_free:
744 kvm_free_physmem_slot(&new, &old); 744 kvm_free_physmem_slot(&new, &old);
745 out: 745 out:
746 return r; 746 return r;
747 747
748 } 748 }
749 EXPORT_SYMBOL_GPL(__kvm_set_memory_region); 749 EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
750 750
751 int kvm_set_memory_region(struct kvm *kvm, 751 int kvm_set_memory_region(struct kvm *kvm,
752 struct kvm_userspace_memory_region *mem, 752 struct kvm_userspace_memory_region *mem,
753 int user_alloc) 753 int user_alloc)
754 { 754 {
755 int r; 755 int r;
756 756
757 mutex_lock(&kvm->slots_lock); 757 mutex_lock(&kvm->slots_lock);
758 r = __kvm_set_memory_region(kvm, mem, user_alloc); 758 r = __kvm_set_memory_region(kvm, mem, user_alloc);
759 mutex_unlock(&kvm->slots_lock); 759 mutex_unlock(&kvm->slots_lock);
760 return r; 760 return r;
761 } 761 }
762 EXPORT_SYMBOL_GPL(kvm_set_memory_region); 762 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
763 763
764 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, 764 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
765 struct 765 struct
766 kvm_userspace_memory_region *mem, 766 kvm_userspace_memory_region *mem,
767 int user_alloc) 767 int user_alloc)
768 { 768 {
769 if (mem->slot >= KVM_MEMORY_SLOTS) 769 if (mem->slot >= KVM_MEMORY_SLOTS)
770 return -EINVAL; 770 return -EINVAL;
771 return kvm_set_memory_region(kvm, mem, user_alloc); 771 return kvm_set_memory_region(kvm, mem, user_alloc);
772 } 772 }
773 773
774 int kvm_get_dirty_log(struct kvm *kvm, 774 int kvm_get_dirty_log(struct kvm *kvm,
775 struct kvm_dirty_log *log, int *is_dirty) 775 struct kvm_dirty_log *log, int *is_dirty)
776 { 776 {
777 struct kvm_memory_slot *memslot; 777 struct kvm_memory_slot *memslot;
778 int r, i; 778 int r, i;
779 unsigned long n; 779 unsigned long n;
780 unsigned long any = 0; 780 unsigned long any = 0;
781 781
782 r = -EINVAL; 782 r = -EINVAL;
783 if (log->slot >= KVM_MEMORY_SLOTS) 783 if (log->slot >= KVM_MEMORY_SLOTS)
784 goto out; 784 goto out;
785 785
786 memslot = &kvm->memslots->memslots[log->slot]; 786 memslot = &kvm->memslots->memslots[log->slot];
787 r = -ENOENT; 787 r = -ENOENT;
788 if (!memslot->dirty_bitmap) 788 if (!memslot->dirty_bitmap)
789 goto out; 789 goto out;
790 790
791 n = kvm_dirty_bitmap_bytes(memslot); 791 n = kvm_dirty_bitmap_bytes(memslot);
792 792
793 for (i = 0; !any && i < n/sizeof(long); ++i) 793 for (i = 0; !any && i < n/sizeof(long); ++i)
794 any = memslot->dirty_bitmap[i]; 794 any = memslot->dirty_bitmap[i];
795 795
796 r = -EFAULT; 796 r = -EFAULT;
797 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 797 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
798 goto out; 798 goto out;
799 799
800 if (any) 800 if (any)
801 *is_dirty = 1; 801 *is_dirty = 1;
802 802
803 r = 0; 803 r = 0;
804 out: 804 out:
805 return r; 805 return r;
806 } 806 }
807 807
808 void kvm_disable_largepages(void) 808 void kvm_disable_largepages(void)
809 { 809 {
810 largepages_enabled = false; 810 largepages_enabled = false;
811 } 811 }
812 EXPORT_SYMBOL_GPL(kvm_disable_largepages); 812 EXPORT_SYMBOL_GPL(kvm_disable_largepages);
813 813
814 int is_error_page(struct page *page) 814 int is_error_page(struct page *page)
815 { 815 {
816 return page == bad_page || page == hwpoison_page; 816 return page == bad_page || page == hwpoison_page;
817 } 817 }
818 EXPORT_SYMBOL_GPL(is_error_page); 818 EXPORT_SYMBOL_GPL(is_error_page);
819 819
820 int is_error_pfn(pfn_t pfn) 820 int is_error_pfn(pfn_t pfn)
821 { 821 {
822 return pfn == bad_pfn || pfn == hwpoison_pfn; 822 return pfn == bad_pfn || pfn == hwpoison_pfn;
823 } 823 }
824 EXPORT_SYMBOL_GPL(is_error_pfn); 824 EXPORT_SYMBOL_GPL(is_error_pfn);
825 825
826 int is_hwpoison_pfn(pfn_t pfn) 826 int is_hwpoison_pfn(pfn_t pfn)
827 { 827 {
828 return pfn == hwpoison_pfn; 828 return pfn == hwpoison_pfn;
829 } 829 }
830 EXPORT_SYMBOL_GPL(is_hwpoison_pfn); 830 EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
831 831
832 static inline unsigned long bad_hva(void) 832 static inline unsigned long bad_hva(void)
833 { 833 {
834 return PAGE_OFFSET; 834 return PAGE_OFFSET;
835 } 835 }
836 836
837 int kvm_is_error_hva(unsigned long addr) 837 int kvm_is_error_hva(unsigned long addr)
838 { 838 {
839 return addr == bad_hva(); 839 return addr == bad_hva();
840 } 840 }
841 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 841 EXPORT_SYMBOL_GPL(kvm_is_error_hva);
842 842
843 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 843 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
844 { 844 {
845 int i; 845 int i;
846 struct kvm_memslots *slots = kvm_memslots(kvm); 846 struct kvm_memslots *slots = kvm_memslots(kvm);
847 847
848 for (i = 0; i < slots->nmemslots; ++i) { 848 for (i = 0; i < slots->nmemslots; ++i) {
849 struct kvm_memory_slot *memslot = &slots->memslots[i]; 849 struct kvm_memory_slot *memslot = &slots->memslots[i];
850 850
851 if (gfn >= memslot->base_gfn 851 if (gfn >= memslot->base_gfn
852 && gfn < memslot->base_gfn + memslot->npages) 852 && gfn < memslot->base_gfn + memslot->npages)
853 return memslot; 853 return memslot;
854 } 854 }
855 return NULL; 855 return NULL;
856 } 856 }
857 EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); 857 EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
858 858
859 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 859 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
860 { 860 {
861 gfn = unalias_gfn(kvm, gfn); 861 gfn = unalias_gfn(kvm, gfn);
862 return gfn_to_memslot_unaliased(kvm, gfn); 862 return gfn_to_memslot_unaliased(kvm, gfn);
863 } 863 }
864 864
865 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 865 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
866 { 866 {
867 int i; 867 int i;
868 struct kvm_memslots *slots = kvm_memslots(kvm); 868 struct kvm_memslots *slots = kvm_memslots(kvm);
869 869
870 gfn = unalias_gfn_instantiation(kvm, gfn); 870 gfn = unalias_gfn_instantiation(kvm, gfn);
871 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 871 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
872 struct kvm_memory_slot *memslot = &slots->memslots[i]; 872 struct kvm_memory_slot *memslot = &slots->memslots[i];
873 873
874 if (memslot->flags & KVM_MEMSLOT_INVALID) 874 if (memslot->flags & KVM_MEMSLOT_INVALID)
875 continue; 875 continue;
876 876
877 if (gfn >= memslot->base_gfn 877 if (gfn >= memslot->base_gfn
878 && gfn < memslot->base_gfn + memslot->npages) 878 && gfn < memslot->base_gfn + memslot->npages)
879 return 1; 879 return 1;
880 } 880 }
881 return 0; 881 return 0;
882 } 882 }
883 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 883 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
884 884
885 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) 885 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
886 { 886 {
887 struct vm_area_struct *vma; 887 struct vm_area_struct *vma;
888 unsigned long addr, size; 888 unsigned long addr, size;
889 889
890 size = PAGE_SIZE; 890 size = PAGE_SIZE;
891 891
892 addr = gfn_to_hva(kvm, gfn); 892 addr = gfn_to_hva(kvm, gfn);
893 if (kvm_is_error_hva(addr)) 893 if (kvm_is_error_hva(addr))
894 return PAGE_SIZE; 894 return PAGE_SIZE;
895 895
896 down_read(&current->mm->mmap_sem); 896 down_read(&current->mm->mmap_sem);
897 vma = find_vma(current->mm, addr); 897 vma = find_vma(current->mm, addr);
898 if (!vma) 898 if (!vma)
899 goto out; 899 goto out;
900 900
901 size = vma_kernel_pagesize(vma); 901 size = vma_kernel_pagesize(vma);
902 902
903 out: 903 out:
904 up_read(&current->mm->mmap_sem); 904 up_read(&current->mm->mmap_sem);
905 905
906 return size; 906 return size;
907 } 907 }
908 908
909 int memslot_id(struct kvm *kvm, gfn_t gfn) 909 int memslot_id(struct kvm *kvm, gfn_t gfn)
910 { 910 {
911 int i; 911 int i;
912 struct kvm_memslots *slots = kvm_memslots(kvm); 912 struct kvm_memslots *slots = kvm_memslots(kvm);
913 struct kvm_memory_slot *memslot = NULL; 913 struct kvm_memory_slot *memslot = NULL;
914 914
915 gfn = unalias_gfn(kvm, gfn); 915 gfn = unalias_gfn(kvm, gfn);
916 for (i = 0; i < slots->nmemslots; ++i) { 916 for (i = 0; i < slots->nmemslots; ++i) {
917 memslot = &slots->memslots[i]; 917 memslot = &slots->memslots[i];
918 918
919 if (gfn >= memslot->base_gfn 919 if (gfn >= memslot->base_gfn
920 && gfn < memslot->base_gfn + memslot->npages) 920 && gfn < memslot->base_gfn + memslot->npages)
921 break; 921 break;
922 } 922 }
923 923
924 return memslot - slots->memslots; 924 return memslot - slots->memslots;
925 } 925 }
926 926
927 static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) 927 static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
928 { 928 {
929 return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; 929 return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
930 } 930 }
931 931
932 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 932 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
933 { 933 {
934 struct kvm_memory_slot *slot; 934 struct kvm_memory_slot *slot;
935 935
936 gfn = unalias_gfn_instantiation(kvm, gfn); 936 gfn = unalias_gfn_instantiation(kvm, gfn);
937 slot = gfn_to_memslot_unaliased(kvm, gfn); 937 slot = gfn_to_memslot_unaliased(kvm, gfn);
938 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 938 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
939 return bad_hva(); 939 return bad_hva();
940 return gfn_to_hva_memslot(slot, gfn); 940 return gfn_to_hva_memslot(slot, gfn);
941 } 941 }
942 EXPORT_SYMBOL_GPL(gfn_to_hva); 942 EXPORT_SYMBOL_GPL(gfn_to_hva);
943 943
944 static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) 944 static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
945 { 945 {
946 struct page *page[1]; 946 struct page *page[1];
947 int npages; 947 int npages;
948 pfn_t pfn; 948 pfn_t pfn;
949 949
950 might_sleep(); 950 might_sleep();
951 951
952 npages = get_user_pages_fast(addr, 1, 1, page); 952 npages = get_user_pages_fast(addr, 1, 1, page);
953 953
954 if (unlikely(npages != 1)) { 954 if (unlikely(npages != 1)) {
955 struct vm_area_struct *vma; 955 struct vm_area_struct *vma;
956 956
957 if (is_hwpoison_address(addr)) { 957 if (is_hwpoison_address(addr)) {
958 get_page(hwpoison_page); 958 get_page(hwpoison_page);
959 return page_to_pfn(hwpoison_page); 959 return page_to_pfn(hwpoison_page);
960 } 960 }
961 961
962 down_read(&current->mm->mmap_sem); 962 down_read(&current->mm->mmap_sem);
963 vma = find_vma(current->mm, addr); 963 vma = find_vma(current->mm, addr);
964 964
965 if (vma == NULL || addr < vma->vm_start || 965 if (vma == NULL || addr < vma->vm_start ||
966 !(vma->vm_flags & VM_PFNMAP)) { 966 !(vma->vm_flags & VM_PFNMAP)) {
967 up_read(&current->mm->mmap_sem); 967 up_read(&current->mm->mmap_sem);
968 get_page(bad_page); 968 get_page(bad_page);
969 return page_to_pfn(bad_page); 969 return page_to_pfn(bad_page);
970 } 970 }
971 971
972 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 972 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
973 up_read(&current->mm->mmap_sem); 973 up_read(&current->mm->mmap_sem);
974 BUG_ON(!kvm_is_mmio_pfn(pfn)); 974 BUG_ON(!kvm_is_mmio_pfn(pfn));
975 } else 975 } else
976 pfn = page_to_pfn(page[0]); 976 pfn = page_to_pfn(page[0]);
977 977
978 return pfn; 978 return pfn;
979 } 979 }
980 980
981 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 981 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
982 { 982 {
983 unsigned long addr; 983 unsigned long addr;
984 984
985 addr = gfn_to_hva(kvm, gfn); 985 addr = gfn_to_hva(kvm, gfn);
986 if (kvm_is_error_hva(addr)) { 986 if (kvm_is_error_hva(addr)) {
987 get_page(bad_page); 987 get_page(bad_page);
988 return page_to_pfn(bad_page); 988 return page_to_pfn(bad_page);
989 } 989 }
990 990
991 return hva_to_pfn(kvm, addr); 991 return hva_to_pfn(kvm, addr);
992 } 992 }
993 EXPORT_SYMBOL_GPL(gfn_to_pfn); 993 EXPORT_SYMBOL_GPL(gfn_to_pfn);
994 994
995 pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 995 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
996 struct kvm_memory_slot *slot, gfn_t gfn) 996 struct kvm_memory_slot *slot, gfn_t gfn)
997 { 997 {
998 unsigned long addr = gfn_to_hva_memslot(slot, gfn); 998 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
999 return hva_to_pfn(kvm, addr); 999 return hva_to_pfn(kvm, addr);
1000 } 1000 }
1001 1001
1002 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 1002 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
1003 { 1003 {
1004 pfn_t pfn; 1004 pfn_t pfn;
1005 1005
1006 pfn = gfn_to_pfn(kvm, gfn); 1006 pfn = gfn_to_pfn(kvm, gfn);
1007 if (!kvm_is_mmio_pfn(pfn)) 1007 if (!kvm_is_mmio_pfn(pfn))
1008 return pfn_to_page(pfn); 1008 return pfn_to_page(pfn);
1009 1009
1010 WARN_ON(kvm_is_mmio_pfn(pfn)); 1010 WARN_ON(kvm_is_mmio_pfn(pfn));
1011 1011
1012 get_page(bad_page); 1012 get_page(bad_page);
1013 return bad_page; 1013 return bad_page;
1014 } 1014 }
1015 1015
1016 EXPORT_SYMBOL_GPL(gfn_to_page); 1016 EXPORT_SYMBOL_GPL(gfn_to_page);
1017 1017
1018 void kvm_release_page_clean(struct page *page) 1018 void kvm_release_page_clean(struct page *page)
1019 { 1019 {
1020 kvm_release_pfn_clean(page_to_pfn(page)); 1020 kvm_release_pfn_clean(page_to_pfn(page));
1021 } 1021 }
1022 EXPORT_SYMBOL_GPL(kvm_release_page_clean); 1022 EXPORT_SYMBOL_GPL(kvm_release_page_clean);
1023 1023
1024 void kvm_release_pfn_clean(pfn_t pfn) 1024 void kvm_release_pfn_clean(pfn_t pfn)
1025 { 1025 {
1026 if (!kvm_is_mmio_pfn(pfn)) 1026 if (!kvm_is_mmio_pfn(pfn))
1027 put_page(pfn_to_page(pfn)); 1027 put_page(pfn_to_page(pfn));
1028 } 1028 }
1029 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 1029 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
1030 1030
1031 void kvm_release_page_dirty(struct page *page) 1031 void kvm_release_page_dirty(struct page *page)
1032 { 1032 {
1033 kvm_release_pfn_dirty(page_to_pfn(page)); 1033 kvm_release_pfn_dirty(page_to_pfn(page));
1034 } 1034 }
1035 EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 1035 EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
1036 1036
1037 void kvm_release_pfn_dirty(pfn_t pfn) 1037 void kvm_release_pfn_dirty(pfn_t pfn)
1038 { 1038 {
1039 kvm_set_pfn_dirty(pfn); 1039 kvm_set_pfn_dirty(pfn);
1040 kvm_release_pfn_clean(pfn); 1040 kvm_release_pfn_clean(pfn);
1041 } 1041 }
1042 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); 1042 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
1043 1043
1044 void kvm_set_page_dirty(struct page *page) 1044 void kvm_set_page_dirty(struct page *page)
1045 { 1045 {
1046 kvm_set_pfn_dirty(page_to_pfn(page)); 1046 kvm_set_pfn_dirty(page_to_pfn(page));
1047 } 1047 }
1048 EXPORT_SYMBOL_GPL(kvm_set_page_dirty); 1048 EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
1049 1049
1050 void kvm_set_pfn_dirty(pfn_t pfn) 1050 void kvm_set_pfn_dirty(pfn_t pfn)
1051 { 1051 {
1052 if (!kvm_is_mmio_pfn(pfn)) { 1052 if (!kvm_is_mmio_pfn(pfn)) {
1053 struct page *page = pfn_to_page(pfn); 1053 struct page *page = pfn_to_page(pfn);
1054 if (!PageReserved(page)) 1054 if (!PageReserved(page))
1055 SetPageDirty(page); 1055 SetPageDirty(page);
1056 } 1056 }
1057 } 1057 }
1058 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); 1058 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
1059 1059
1060 void kvm_set_pfn_accessed(pfn_t pfn) 1060 void kvm_set_pfn_accessed(pfn_t pfn)
1061 { 1061 {
1062 if (!kvm_is_mmio_pfn(pfn)) 1062 if (!kvm_is_mmio_pfn(pfn))
1063 mark_page_accessed(pfn_to_page(pfn)); 1063 mark_page_accessed(pfn_to_page(pfn));
1064 } 1064 }
1065 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 1065 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
1066 1066
1067 void kvm_get_pfn(pfn_t pfn) 1067 void kvm_get_pfn(pfn_t pfn)
1068 { 1068 {
1069 if (!kvm_is_mmio_pfn(pfn)) 1069 if (!kvm_is_mmio_pfn(pfn))
1070 get_page(pfn_to_page(pfn)); 1070 get_page(pfn_to_page(pfn));
1071 } 1071 }
1072 EXPORT_SYMBOL_GPL(kvm_get_pfn); 1072 EXPORT_SYMBOL_GPL(kvm_get_pfn);
1073 1073
1074 static int next_segment(unsigned long len, int offset) 1074 static int next_segment(unsigned long len, int offset)
1075 { 1075 {
1076 if (len > PAGE_SIZE - offset) 1076 if (len > PAGE_SIZE - offset)
1077 return PAGE_SIZE - offset; 1077 return PAGE_SIZE - offset;
1078 else 1078 else
1079 return len; 1079 return len;
1080 } 1080 }
1081 1081
1082 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 1082 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
1083 int len) 1083 int len)
1084 { 1084 {
1085 int r; 1085 int r;
1086 unsigned long addr; 1086 unsigned long addr;
1087 1087
1088 addr = gfn_to_hva(kvm, gfn); 1088 addr = gfn_to_hva(kvm, gfn);
1089 if (kvm_is_error_hva(addr)) 1089 if (kvm_is_error_hva(addr))
1090 return -EFAULT; 1090 return -EFAULT;
1091 r = copy_from_user(data, (void __user *)addr + offset, len); 1091 r = copy_from_user(data, (void __user *)addr + offset, len);
1092 if (r) 1092 if (r)
1093 return -EFAULT; 1093 return -EFAULT;
1094 return 0; 1094 return 0;
1095 } 1095 }
1096 EXPORT_SYMBOL_GPL(kvm_read_guest_page); 1096 EXPORT_SYMBOL_GPL(kvm_read_guest_page);
1097 1097
1098 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) 1098 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
1099 { 1099 {
1100 gfn_t gfn = gpa >> PAGE_SHIFT; 1100 gfn_t gfn = gpa >> PAGE_SHIFT;
1101 int seg; 1101 int seg;
1102 int offset = offset_in_page(gpa); 1102 int offset = offset_in_page(gpa);
1103 int ret; 1103 int ret;
1104 1104
1105 while ((seg = next_segment(len, offset)) != 0) { 1105 while ((seg = next_segment(len, offset)) != 0) {
1106 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); 1106 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg);
1107 if (ret < 0) 1107 if (ret < 0)
1108 return ret; 1108 return ret;
1109 offset = 0; 1109 offset = 0;
1110 len -= seg; 1110 len -= seg;
1111 data += seg; 1111 data += seg;
1112 ++gfn; 1112 ++gfn;
1113 } 1113 }
1114 return 0; 1114 return 0;
1115 } 1115 }
1116 EXPORT_SYMBOL_GPL(kvm_read_guest); 1116 EXPORT_SYMBOL_GPL(kvm_read_guest);
1117 1117
1118 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 1118 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
1119 unsigned long len) 1119 unsigned long len)
1120 { 1120 {
1121 int r; 1121 int r;
1122 unsigned long addr; 1122 unsigned long addr;
1123 gfn_t gfn = gpa >> PAGE_SHIFT; 1123 gfn_t gfn = gpa >> PAGE_SHIFT;
1124 int offset = offset_in_page(gpa); 1124 int offset = offset_in_page(gpa);
1125 1125
1126 addr = gfn_to_hva(kvm, gfn); 1126 addr = gfn_to_hva(kvm, gfn);
1127 if (kvm_is_error_hva(addr)) 1127 if (kvm_is_error_hva(addr))
1128 return -EFAULT; 1128 return -EFAULT;
1129 pagefault_disable(); 1129 pagefault_disable();
1130 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 1130 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
1131 pagefault_enable(); 1131 pagefault_enable();
1132 if (r) 1132 if (r)
1133 return -EFAULT; 1133 return -EFAULT;
1134 return 0; 1134 return 0;
1135 } 1135 }
1136 EXPORT_SYMBOL(kvm_read_guest_atomic); 1136 EXPORT_SYMBOL(kvm_read_guest_atomic);
1137 1137
1138 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, 1138 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
1139 int offset, int len) 1139 int offset, int len)
1140 { 1140 {
1141 int r; 1141 int r;
1142 unsigned long addr; 1142 unsigned long addr;
1143 1143
1144 addr = gfn_to_hva(kvm, gfn); 1144 addr = gfn_to_hva(kvm, gfn);
1145 if (kvm_is_error_hva(addr)) 1145 if (kvm_is_error_hva(addr))
1146 return -EFAULT; 1146 return -EFAULT;
1147 r = copy_to_user((void __user *)addr + offset, data, len); 1147 r = copy_to_user((void __user *)addr + offset, data, len);
1148 if (r) 1148 if (r)
1149 return -EFAULT; 1149 return -EFAULT;
1150 mark_page_dirty(kvm, gfn); 1150 mark_page_dirty(kvm, gfn);
1151 return 0; 1151 return 0;
1152 } 1152 }
1153 EXPORT_SYMBOL_GPL(kvm_write_guest_page); 1153 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
1154 1154
1155 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, 1155 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
1156 unsigned long len) 1156 unsigned long len)
1157 { 1157 {
1158 gfn_t gfn = gpa >> PAGE_SHIFT; 1158 gfn_t gfn = gpa >> PAGE_SHIFT;
1159 int seg; 1159 int seg;
1160 int offset = offset_in_page(gpa); 1160 int offset = offset_in_page(gpa);
1161 int ret; 1161 int ret;
1162 1162
1163 while ((seg = next_segment(len, offset)) != 0) { 1163 while ((seg = next_segment(len, offset)) != 0) {
1164 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); 1164 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg);
1165 if (ret < 0) 1165 if (ret < 0)
1166 return ret; 1166 return ret;
1167 offset = 0; 1167 offset = 0;
1168 len -= seg; 1168 len -= seg;
1169 data += seg; 1169 data += seg;
1170 ++gfn; 1170 ++gfn;
1171 } 1171 }
1172 return 0; 1172 return 0;
1173 } 1173 }
1174 1174
1175 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 1175 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
1176 { 1176 {
1177 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); 1177 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len);
1178 } 1178 }
1179 EXPORT_SYMBOL_GPL(kvm_clear_guest_page); 1179 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
1180 1180
1181 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) 1181 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
1182 { 1182 {
1183 gfn_t gfn = gpa >> PAGE_SHIFT; 1183 gfn_t gfn = gpa >> PAGE_SHIFT;
1184 int seg; 1184 int seg;
1185 int offset = offset_in_page(gpa); 1185 int offset = offset_in_page(gpa);
1186 int ret; 1186 int ret;
1187 1187
1188 while ((seg = next_segment(len, offset)) != 0) { 1188 while ((seg = next_segment(len, offset)) != 0) {
1189 ret = kvm_clear_guest_page(kvm, gfn, offset, seg); 1189 ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
1190 if (ret < 0) 1190 if (ret < 0)
1191 return ret; 1191 return ret;
1192 offset = 0; 1192 offset = 0;
1193 len -= seg; 1193 len -= seg;
1194 ++gfn; 1194 ++gfn;
1195 } 1195 }
1196 return 0; 1196 return 0;
1197 } 1197 }
1198 EXPORT_SYMBOL_GPL(kvm_clear_guest); 1198 EXPORT_SYMBOL_GPL(kvm_clear_guest);
1199 1199
1200 void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 1200 void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1201 { 1201 {
1202 struct kvm_memory_slot *memslot; 1202 struct kvm_memory_slot *memslot;
1203 1203
1204 gfn = unalias_gfn(kvm, gfn); 1204 gfn = unalias_gfn(kvm, gfn);
1205 memslot = gfn_to_memslot_unaliased(kvm, gfn); 1205 memslot = gfn_to_memslot_unaliased(kvm, gfn);
1206 if (memslot && memslot->dirty_bitmap) { 1206 if (memslot && memslot->dirty_bitmap) {
1207 unsigned long rel_gfn = gfn - memslot->base_gfn; 1207 unsigned long rel_gfn = gfn - memslot->base_gfn;
1208 1208
1209 generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); 1209 generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
1210 } 1210 }
1211 } 1211 }
1212 1212
1213 /* 1213 /*
1214 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1214 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
1215 */ 1215 */
1216 void kvm_vcpu_block(struct kvm_vcpu *vcpu) 1216 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1217 { 1217 {
1218 DEFINE_WAIT(wait); 1218 DEFINE_WAIT(wait);
1219 1219
1220 for (;;) { 1220 for (;;) {
1221 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1221 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1222 1222
1223 if (kvm_arch_vcpu_runnable(vcpu)) { 1223 if (kvm_arch_vcpu_runnable(vcpu)) {
1224 set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1224 set_bit(KVM_REQ_UNHALT, &vcpu->requests);
1225 break; 1225 break;
1226 } 1226 }
1227 if (kvm_cpu_has_pending_timer(vcpu)) 1227 if (kvm_cpu_has_pending_timer(vcpu))
1228 break; 1228 break;
1229 if (signal_pending(current)) 1229 if (signal_pending(current))
1230 break; 1230 break;
1231 1231
1232 schedule(); 1232 schedule();
1233 } 1233 }
1234 1234
1235 finish_wait(&vcpu->wq, &wait); 1235 finish_wait(&vcpu->wq, &wait);
1236 } 1236 }
1237 1237
1238 void kvm_resched(struct kvm_vcpu *vcpu) 1238 void kvm_resched(struct kvm_vcpu *vcpu)
1239 { 1239 {
1240 if (!need_resched()) 1240 if (!need_resched())
1241 return; 1241 return;
1242 cond_resched(); 1242 cond_resched();
1243 } 1243 }
1244 EXPORT_SYMBOL_GPL(kvm_resched); 1244 EXPORT_SYMBOL_GPL(kvm_resched);
1245 1245
1246 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) 1246 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu)
1247 { 1247 {
1248 ktime_t expires; 1248 ktime_t expires;
1249 DEFINE_WAIT(wait); 1249 DEFINE_WAIT(wait);
1250 1250
1251 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1251 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1252 1252
1253 /* Sleep for 100 us, and hope lock-holder got scheduled */ 1253 /* Sleep for 100 us, and hope lock-holder got scheduled */
1254 expires = ktime_add_ns(ktime_get(), 100000UL); 1254 expires = ktime_add_ns(ktime_get(), 100000UL);
1255 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); 1255 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1256 1256
1257 finish_wait(&vcpu->wq, &wait); 1257 finish_wait(&vcpu->wq, &wait);
1258 } 1258 }
1259 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); 1259 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
1260 1260
1261 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1261 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1262 { 1262 {
1263 struct kvm_vcpu *vcpu = vma->vm_file->private_data; 1263 struct kvm_vcpu *vcpu = vma->vm_file->private_data;
1264 struct page *page; 1264 struct page *page;
1265 1265
1266 if (vmf->pgoff == 0) 1266 if (vmf->pgoff == 0)
1267 page = virt_to_page(vcpu->run); 1267 page = virt_to_page(vcpu->run);
1268 #ifdef CONFIG_X86 1268 #ifdef CONFIG_X86
1269 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 1269 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
1270 page = virt_to_page(vcpu->arch.pio_data); 1270 page = virt_to_page(vcpu->arch.pio_data);
1271 #endif 1271 #endif
1272 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1272 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1273 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) 1273 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
1274 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); 1274 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
1275 #endif 1275 #endif
1276 else 1276 else
1277 return VM_FAULT_SIGBUS; 1277 return VM_FAULT_SIGBUS;
1278 get_page(page); 1278 get_page(page);
1279 vmf->page = page; 1279 vmf->page = page;
1280 return 0; 1280 return 0;
1281 } 1281 }
1282 1282
1283 static const struct vm_operations_struct kvm_vcpu_vm_ops = { 1283 static const struct vm_operations_struct kvm_vcpu_vm_ops = {
1284 .fault = kvm_vcpu_fault, 1284 .fault = kvm_vcpu_fault,
1285 }; 1285 };
1286 1286
1287 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) 1287 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
1288 { 1288 {
1289 vma->vm_ops = &kvm_vcpu_vm_ops; 1289 vma->vm_ops = &kvm_vcpu_vm_ops;
1290 return 0; 1290 return 0;
1291 } 1291 }
1292 1292
1293 static int kvm_vcpu_release(struct inode *inode, struct file *filp) 1293 static int kvm_vcpu_release(struct inode *inode, struct file *filp)
1294 { 1294 {
1295 struct kvm_vcpu *vcpu = filp->private_data; 1295 struct kvm_vcpu *vcpu = filp->private_data;
1296 1296
1297 kvm_put_kvm(vcpu->kvm); 1297 kvm_put_kvm(vcpu->kvm);
1298 return 0; 1298 return 0;
1299 } 1299 }
1300 1300
1301 static struct file_operations kvm_vcpu_fops = { 1301 static struct file_operations kvm_vcpu_fops = {
1302 .release = kvm_vcpu_release, 1302 .release = kvm_vcpu_release,
1303 .unlocked_ioctl = kvm_vcpu_ioctl, 1303 .unlocked_ioctl = kvm_vcpu_ioctl,
1304 .compat_ioctl = kvm_vcpu_ioctl, 1304 .compat_ioctl = kvm_vcpu_ioctl,
1305 .mmap = kvm_vcpu_mmap, 1305 .mmap = kvm_vcpu_mmap,
1306 }; 1306 };
1307 1307
1308 /* 1308 /*
1309 * Allocates an inode for the vcpu. 1309 * Allocates an inode for the vcpu.
1310 */ 1310 */
1311 static int create_vcpu_fd(struct kvm_vcpu *vcpu) 1311 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
1312 { 1312 {
1313 return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); 1313 return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
1314 } 1314 }
1315 1315
1316 /* 1316 /*
1317 * Creates some virtual cpus. Good luck creating more than one. 1317 * Creates some virtual cpus. Good luck creating more than one.
1318 */ 1318 */
1319 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) 1319 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
1320 { 1320 {
1321 int r; 1321 int r;
1322 struct kvm_vcpu *vcpu, *v; 1322 struct kvm_vcpu *vcpu, *v;
1323 1323
1324 vcpu = kvm_arch_vcpu_create(kvm, id); 1324 vcpu = kvm_arch_vcpu_create(kvm, id);
1325 if (IS_ERR(vcpu)) 1325 if (IS_ERR(vcpu))
1326 return PTR_ERR(vcpu); 1326 return PTR_ERR(vcpu);
1327 1327
1328 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); 1328 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
1329 1329
1330 r = kvm_arch_vcpu_setup(vcpu); 1330 r = kvm_arch_vcpu_setup(vcpu);
1331 if (r) 1331 if (r)
1332 return r; 1332 return r;
1333 1333
1334 mutex_lock(&kvm->lock); 1334 mutex_lock(&kvm->lock);
1335 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { 1335 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
1336 r = -EINVAL; 1336 r = -EINVAL;
1337 goto vcpu_destroy; 1337 goto vcpu_destroy;
1338 } 1338 }
1339 1339
1340 kvm_for_each_vcpu(r, v, kvm) 1340 kvm_for_each_vcpu(r, v, kvm)
1341 if (v->vcpu_id == id) { 1341 if (v->vcpu_id == id) {
1342 r = -EEXIST; 1342 r = -EEXIST;
1343 goto vcpu_destroy; 1343 goto vcpu_destroy;
1344 } 1344 }
1345 1345
1346 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); 1346 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
1347 1347
1348 /* Now it's all set up, let userspace reach it */ 1348 /* Now it's all set up, let userspace reach it */
1349 kvm_get_kvm(kvm); 1349 kvm_get_kvm(kvm);
1350 r = create_vcpu_fd(vcpu); 1350 r = create_vcpu_fd(vcpu);
1351 if (r < 0) { 1351 if (r < 0) {
1352 kvm_put_kvm(kvm); 1352 kvm_put_kvm(kvm);
1353 goto vcpu_destroy; 1353 goto vcpu_destroy;
1354 } 1354 }
1355 1355
1356 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; 1356 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
1357 smp_wmb(); 1357 smp_wmb();
1358 atomic_inc(&kvm->online_vcpus); 1358 atomic_inc(&kvm->online_vcpus);
1359 1359
1360 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1360 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
1361 if (kvm->bsp_vcpu_id == id) 1361 if (kvm->bsp_vcpu_id == id)
1362 kvm->bsp_vcpu = vcpu; 1362 kvm->bsp_vcpu = vcpu;
1363 #endif 1363 #endif
1364 mutex_unlock(&kvm->lock); 1364 mutex_unlock(&kvm->lock);
1365 return r; 1365 return r;
1366 1366
1367 vcpu_destroy: 1367 vcpu_destroy:
1368 mutex_unlock(&kvm->lock); 1368 mutex_unlock(&kvm->lock);
1369 kvm_arch_vcpu_destroy(vcpu); 1369 kvm_arch_vcpu_destroy(vcpu);
1370 return r; 1370 return r;
1371 } 1371 }
1372 1372
1373 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) 1373 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
1374 { 1374 {
1375 if (sigset) { 1375 if (sigset) {
1376 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1376 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
1377 vcpu->sigset_active = 1; 1377 vcpu->sigset_active = 1;
1378 vcpu->sigset = *sigset; 1378 vcpu->sigset = *sigset;
1379 } else 1379 } else
1380 vcpu->sigset_active = 0; 1380 vcpu->sigset_active = 0;
1381 return 0; 1381 return 0;
1382 } 1382 }
1383 1383
1384 static long kvm_vcpu_ioctl(struct file *filp, 1384 static long kvm_vcpu_ioctl(struct file *filp,
1385 unsigned int ioctl, unsigned long arg) 1385 unsigned int ioctl, unsigned long arg)
1386 { 1386 {
1387 struct kvm_vcpu *vcpu = filp->private_data; 1387 struct kvm_vcpu *vcpu = filp->private_data;
1388 void __user *argp = (void __user *)arg; 1388 void __user *argp = (void __user *)arg;
1389 int r; 1389 int r;
1390 struct kvm_fpu *fpu = NULL; 1390 struct kvm_fpu *fpu = NULL;
1391 struct kvm_sregs *kvm_sregs = NULL; 1391 struct kvm_sregs *kvm_sregs = NULL;
1392 1392
1393 if (vcpu->kvm->mm != current->mm) 1393 if (vcpu->kvm->mm != current->mm)
1394 return -EIO; 1394 return -EIO;
1395
1396 #if defined(CONFIG_S390) || defined(CONFIG_PPC)
1397 /*
1398 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
1399 * so vcpu_load() would break it.
1400 */
1401 if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
1402 return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
1403 #endif
1404
1405
1406 vcpu_load(vcpu);
1395 switch (ioctl) { 1407 switch (ioctl) {
1396 case KVM_RUN: 1408 case KVM_RUN:
1397 r = -EINVAL; 1409 r = -EINVAL;
1398 if (arg) 1410 if (arg)
1399 goto out; 1411 goto out;
1400 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 1412 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1401 break; 1413 break;
1402 case KVM_GET_REGS: { 1414 case KVM_GET_REGS: {
1403 struct kvm_regs *kvm_regs; 1415 struct kvm_regs *kvm_regs;
1404 1416
1405 r = -ENOMEM; 1417 r = -ENOMEM;
1406 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1418 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
1407 if (!kvm_regs) 1419 if (!kvm_regs)
1408 goto out; 1420 goto out;
1409 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 1421 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
1410 if (r) 1422 if (r)
1411 goto out_free1; 1423 goto out_free1;
1412 r = -EFAULT; 1424 r = -EFAULT;
1413 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) 1425 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
1414 goto out_free1; 1426 goto out_free1;
1415 r = 0; 1427 r = 0;
1416 out_free1: 1428 out_free1:
1417 kfree(kvm_regs); 1429 kfree(kvm_regs);
1418 break; 1430 break;
1419 } 1431 }
1420 case KVM_SET_REGS: { 1432 case KVM_SET_REGS: {
1421 struct kvm_regs *kvm_regs; 1433 struct kvm_regs *kvm_regs;
1422 1434
1423 r = -ENOMEM; 1435 r = -ENOMEM;
1424 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1436 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
1425 if (!kvm_regs) 1437 if (!kvm_regs)
1426 goto out; 1438 goto out;
1427 r = -EFAULT; 1439 r = -EFAULT;
1428 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) 1440 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
1429 goto out_free2; 1441 goto out_free2;
1430 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); 1442 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
1431 if (r) 1443 if (r)
1432 goto out_free2; 1444 goto out_free2;
1433 r = 0; 1445 r = 0;
1434 out_free2: 1446 out_free2:
1435 kfree(kvm_regs); 1447 kfree(kvm_regs);
1436 break; 1448 break;
1437 } 1449 }
1438 case KVM_GET_SREGS: { 1450 case KVM_GET_SREGS: {
1439 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1451 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
1440 r = -ENOMEM; 1452 r = -ENOMEM;
1441 if (!kvm_sregs) 1453 if (!kvm_sregs)
1442 goto out; 1454 goto out;
1443 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); 1455 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
1444 if (r) 1456 if (r)
1445 goto out; 1457 goto out;
1446 r = -EFAULT; 1458 r = -EFAULT;
1447 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) 1459 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
1448 goto out; 1460 goto out;
1449 r = 0; 1461 r = 0;
1450 break; 1462 break;
1451 } 1463 }
1452 case KVM_SET_SREGS: { 1464 case KVM_SET_SREGS: {
1453 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1465 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
1454 r = -ENOMEM; 1466 r = -ENOMEM;
1455 if (!kvm_sregs) 1467 if (!kvm_sregs)
1456 goto out; 1468 goto out;
1457 r = -EFAULT; 1469 r = -EFAULT;
1458 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) 1470 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs)))
1459 goto out; 1471 goto out;
1460 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); 1472 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
1461 if (r) 1473 if (r)
1462 goto out; 1474 goto out;
1463 r = 0; 1475 r = 0;
1464 break; 1476 break;
1465 } 1477 }
1466 case KVM_GET_MP_STATE: { 1478 case KVM_GET_MP_STATE: {
1467 struct kvm_mp_state mp_state; 1479 struct kvm_mp_state mp_state;
1468 1480
1469 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); 1481 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
1470 if (r) 1482 if (r)
1471 goto out; 1483 goto out;
1472 r = -EFAULT; 1484 r = -EFAULT;
1473 if (copy_to_user(argp, &mp_state, sizeof mp_state)) 1485 if (copy_to_user(argp, &mp_state, sizeof mp_state))
1474 goto out; 1486 goto out;
1475 r = 0; 1487 r = 0;
1476 break; 1488 break;
1477 } 1489 }
1478 case KVM_SET_MP_STATE: { 1490 case KVM_SET_MP_STATE: {
1479 struct kvm_mp_state mp_state; 1491 struct kvm_mp_state mp_state;
1480 1492
1481 r = -EFAULT; 1493 r = -EFAULT;
1482 if (copy_from_user(&mp_state, argp, sizeof mp_state)) 1494 if (copy_from_user(&mp_state, argp, sizeof mp_state))
1483 goto out; 1495 goto out;
1484 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 1496 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
1485 if (r) 1497 if (r)
1486 goto out; 1498 goto out;
1487 r = 0; 1499 r = 0;
1488 break; 1500 break;
1489 } 1501 }
1490 case KVM_TRANSLATE: { 1502 case KVM_TRANSLATE: {
1491 struct kvm_translation tr; 1503 struct kvm_translation tr;
1492 1504
1493 r = -EFAULT; 1505 r = -EFAULT;
1494 if (copy_from_user(&tr, argp, sizeof tr)) 1506 if (copy_from_user(&tr, argp, sizeof tr))
1495 goto out; 1507 goto out;
1496 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 1508 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
1497 if (r) 1509 if (r)
1498 goto out; 1510 goto out;
1499 r = -EFAULT; 1511 r = -EFAULT;
1500 if (copy_to_user(argp, &tr, sizeof tr)) 1512 if (copy_to_user(argp, &tr, sizeof tr))
1501 goto out; 1513 goto out;
1502 r = 0; 1514 r = 0;
1503 break; 1515 break;
1504 } 1516 }
1505 case KVM_SET_GUEST_DEBUG: { 1517 case KVM_SET_GUEST_DEBUG: {
1506 struct kvm_guest_debug dbg; 1518 struct kvm_guest_debug dbg;
1507 1519
1508 r = -EFAULT; 1520 r = -EFAULT;
1509 if (copy_from_user(&dbg, argp, sizeof dbg)) 1521 if (copy_from_user(&dbg, argp, sizeof dbg))
1510 goto out; 1522 goto out;
1511 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); 1523 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
1512 if (r) 1524 if (r)
1513 goto out; 1525 goto out;
1514 r = 0; 1526 r = 0;
1515 break; 1527 break;
1516 } 1528 }
1517 case KVM_SET_SIGNAL_MASK: { 1529 case KVM_SET_SIGNAL_MASK: {
1518 struct kvm_signal_mask __user *sigmask_arg = argp; 1530 struct kvm_signal_mask __user *sigmask_arg = argp;
1519 struct kvm_signal_mask kvm_sigmask; 1531 struct kvm_signal_mask kvm_sigmask;
1520 sigset_t sigset, *p; 1532 sigset_t sigset, *p;
1521 1533
1522 p = NULL; 1534 p = NULL;
1523 if (argp) { 1535 if (argp) {
1524 r = -EFAULT; 1536 r = -EFAULT;
1525 if (copy_from_user(&kvm_sigmask, argp, 1537 if (copy_from_user(&kvm_sigmask, argp,
1526 sizeof kvm_sigmask)) 1538 sizeof kvm_sigmask))
1527 goto out; 1539 goto out;
1528 r = -EINVAL; 1540 r = -EINVAL;
1529 if (kvm_sigmask.len != sizeof sigset) 1541 if (kvm_sigmask.len != sizeof sigset)
1530 goto out; 1542 goto out;
1531 r = -EFAULT; 1543 r = -EFAULT;
1532 if (copy_from_user(&sigset, sigmask_arg->sigset, 1544 if (copy_from_user(&sigset, sigmask_arg->sigset,
1533 sizeof sigset)) 1545 sizeof sigset))
1534 goto out; 1546 goto out;
1535 p = &sigset; 1547 p = &sigset;
1536 } 1548 }
1537 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 1549 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
1538 break; 1550 break;
1539 } 1551 }
1540 case KVM_GET_FPU: { 1552 case KVM_GET_FPU: {
1541 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1553 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
1542 r = -ENOMEM; 1554 r = -ENOMEM;
1543 if (!fpu) 1555 if (!fpu)
1544 goto out; 1556 goto out;
1545 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); 1557 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
1546 if (r) 1558 if (r)
1547 goto out; 1559 goto out;
1548 r = -EFAULT; 1560 r = -EFAULT;
1549 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) 1561 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
1550 goto out; 1562 goto out;
1551 r = 0; 1563 r = 0;
1552 break; 1564 break;
1553 } 1565 }
1554 case KVM_SET_FPU: { 1566 case KVM_SET_FPU: {
1555 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1567 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
1556 r = -ENOMEM; 1568 r = -ENOMEM;
1557 if (!fpu) 1569 if (!fpu)
1558 goto out; 1570 goto out;
1559 r = -EFAULT; 1571 r = -EFAULT;
1560 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) 1572 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu)))
1561 goto out; 1573 goto out;
1562 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); 1574 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
1563 if (r) 1575 if (r)
1564 goto out; 1576 goto out;
1565 r = 0; 1577 r = 0;
1566 break; 1578 break;
1567 } 1579 }
1568 default: 1580 default:
1581 vcpu_put(vcpu);
1569 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1582 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
1583 vcpu_load(vcpu);
1570 } 1584 }
1571 out: 1585 out:
1586 vcpu_put(vcpu);
1572 kfree(fpu); 1587 kfree(fpu);
1573 kfree(kvm_sregs); 1588 kfree(kvm_sregs);
1574 return r; 1589 return r;
1575 } 1590 }
1576 1591
1577 static long kvm_vm_ioctl(struct file *filp, 1592 static long kvm_vm_ioctl(struct file *filp,
1578 unsigned int ioctl, unsigned long arg) 1593 unsigned int ioctl, unsigned long arg)
1579 { 1594 {
1580 struct kvm *kvm = filp->private_data; 1595 struct kvm *kvm = filp->private_data;
1581 void __user *argp = (void __user *)arg; 1596 void __user *argp = (void __user *)arg;
1582 int r; 1597 int r;
1583 1598
1584 if (kvm->mm != current->mm) 1599 if (kvm->mm != current->mm)
1585 return -EIO; 1600 return -EIO;
1586 switch (ioctl) { 1601 switch (ioctl) {
1587 case KVM_CREATE_VCPU: 1602 case KVM_CREATE_VCPU:
1588 r = kvm_vm_ioctl_create_vcpu(kvm, arg); 1603 r = kvm_vm_ioctl_create_vcpu(kvm, arg);
1589 if (r < 0) 1604 if (r < 0)
1590 goto out; 1605 goto out;
1591 break; 1606 break;
1592 case KVM_SET_USER_MEMORY_REGION: { 1607 case KVM_SET_USER_MEMORY_REGION: {
1593 struct kvm_userspace_memory_region kvm_userspace_mem; 1608 struct kvm_userspace_memory_region kvm_userspace_mem;
1594 1609
1595 r = -EFAULT; 1610 r = -EFAULT;
1596 if (copy_from_user(&kvm_userspace_mem, argp, 1611 if (copy_from_user(&kvm_userspace_mem, argp,
1597 sizeof kvm_userspace_mem)) 1612 sizeof kvm_userspace_mem))
1598 goto out; 1613 goto out;
1599 1614
1600 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); 1615 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
1601 if (r) 1616 if (r)
1602 goto out; 1617 goto out;
1603 break; 1618 break;
1604 } 1619 }
1605 case KVM_GET_DIRTY_LOG: { 1620 case KVM_GET_DIRTY_LOG: {
1606 struct kvm_dirty_log log; 1621 struct kvm_dirty_log log;
1607 1622
1608 r = -EFAULT; 1623 r = -EFAULT;
1609 if (copy_from_user(&log, argp, sizeof log)) 1624 if (copy_from_user(&log, argp, sizeof log))
1610 goto out; 1625 goto out;
1611 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1626 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
1612 if (r) 1627 if (r)
1613 goto out; 1628 goto out;
1614 break; 1629 break;
1615 } 1630 }
1616 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1631 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1617 case KVM_REGISTER_COALESCED_MMIO: { 1632 case KVM_REGISTER_COALESCED_MMIO: {
1618 struct kvm_coalesced_mmio_zone zone; 1633 struct kvm_coalesced_mmio_zone zone;
1619 r = -EFAULT; 1634 r = -EFAULT;
1620 if (copy_from_user(&zone, argp, sizeof zone)) 1635 if (copy_from_user(&zone, argp, sizeof zone))
1621 goto out; 1636 goto out;
1622 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 1637 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
1623 if (r) 1638 if (r)
1624 goto out; 1639 goto out;
1625 r = 0; 1640 r = 0;
1626 break; 1641 break;
1627 } 1642 }
1628 case KVM_UNREGISTER_COALESCED_MMIO: { 1643 case KVM_UNREGISTER_COALESCED_MMIO: {
1629 struct kvm_coalesced_mmio_zone zone; 1644 struct kvm_coalesced_mmio_zone zone;
1630 r = -EFAULT; 1645 r = -EFAULT;
1631 if (copy_from_user(&zone, argp, sizeof zone)) 1646 if (copy_from_user(&zone, argp, sizeof zone))
1632 goto out; 1647 goto out;
1633 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 1648 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
1634 if (r) 1649 if (r)
1635 goto out; 1650 goto out;
1636 r = 0; 1651 r = 0;
1637 break; 1652 break;
1638 } 1653 }
1639 #endif 1654 #endif
1640 case KVM_IRQFD: { 1655 case KVM_IRQFD: {
1641 struct kvm_irqfd data; 1656 struct kvm_irqfd data;
1642 1657
1643 r = -EFAULT; 1658 r = -EFAULT;
1644 if (copy_from_user(&data, argp, sizeof data)) 1659 if (copy_from_user(&data, argp, sizeof data))
1645 goto out; 1660 goto out;
1646 r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); 1661 r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
1647 break; 1662 break;
1648 } 1663 }
1649 case KVM_IOEVENTFD: { 1664 case KVM_IOEVENTFD: {
1650 struct kvm_ioeventfd data; 1665 struct kvm_ioeventfd data;
1651 1666
1652 r = -EFAULT; 1667 r = -EFAULT;
1653 if (copy_from_user(&data, argp, sizeof data)) 1668 if (copy_from_user(&data, argp, sizeof data))
1654 goto out; 1669 goto out;
1655 r = kvm_ioeventfd(kvm, &data); 1670 r = kvm_ioeventfd(kvm, &data);
1656 break; 1671 break;
1657 } 1672 }
1658 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1673 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
1659 case KVM_SET_BOOT_CPU_ID: 1674 case KVM_SET_BOOT_CPU_ID:
1660 r = 0; 1675 r = 0;
1661 mutex_lock(&kvm->lock); 1676 mutex_lock(&kvm->lock);
1662 if (atomic_read(&kvm->online_vcpus) != 0) 1677 if (atomic_read(&kvm->online_vcpus) != 0)
1663 r = -EBUSY; 1678 r = -EBUSY;
1664 else 1679 else
1665 kvm->bsp_vcpu_id = arg; 1680 kvm->bsp_vcpu_id = arg;
1666 mutex_unlock(&kvm->lock); 1681 mutex_unlock(&kvm->lock);
1667 break; 1682 break;
1668 #endif 1683 #endif
1669 default: 1684 default:
1670 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1685 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1671 if (r == -ENOTTY) 1686 if (r == -ENOTTY)
1672 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); 1687 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
1673 } 1688 }
1674 out: 1689 out:
1675 return r; 1690 return r;
1676 } 1691 }
1677 1692
1678 #ifdef CONFIG_COMPAT 1693 #ifdef CONFIG_COMPAT
1679 struct compat_kvm_dirty_log { 1694 struct compat_kvm_dirty_log {
1680 __u32 slot; 1695 __u32 slot;
1681 __u32 padding1; 1696 __u32 padding1;
1682 union { 1697 union {
1683 compat_uptr_t dirty_bitmap; /* one bit per page */ 1698 compat_uptr_t dirty_bitmap; /* one bit per page */
1684 __u64 padding2; 1699 __u64 padding2;
1685 }; 1700 };
1686 }; 1701 };
1687 1702
1688 static long kvm_vm_compat_ioctl(struct file *filp, 1703 static long kvm_vm_compat_ioctl(struct file *filp,
1689 unsigned int ioctl, unsigned long arg) 1704 unsigned int ioctl, unsigned long arg)
1690 { 1705 {
1691 struct kvm *kvm = filp->private_data; 1706 struct kvm *kvm = filp->private_data;
1692 int r; 1707 int r;
1693 1708
1694 if (kvm->mm != current->mm) 1709 if (kvm->mm != current->mm)
1695 return -EIO; 1710 return -EIO;
1696 switch (ioctl) { 1711 switch (ioctl) {
1697 case KVM_GET_DIRTY_LOG: { 1712 case KVM_GET_DIRTY_LOG: {
1698 struct compat_kvm_dirty_log compat_log; 1713 struct compat_kvm_dirty_log compat_log;
1699 struct kvm_dirty_log log; 1714 struct kvm_dirty_log log;
1700 1715
1701 r = -EFAULT; 1716 r = -EFAULT;
1702 if (copy_from_user(&compat_log, (void __user *)arg, 1717 if (copy_from_user(&compat_log, (void __user *)arg,
1703 sizeof(compat_log))) 1718 sizeof(compat_log)))
1704 goto out; 1719 goto out;
1705 log.slot = compat_log.slot; 1720 log.slot = compat_log.slot;
1706 log.padding1 = compat_log.padding1; 1721 log.padding1 = compat_log.padding1;
1707 log.padding2 = compat_log.padding2; 1722 log.padding2 = compat_log.padding2;
1708 log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); 1723 log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
1709 1724
1710 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1725 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
1711 if (r) 1726 if (r)
1712 goto out; 1727 goto out;
1713 break; 1728 break;
1714 } 1729 }
1715 default: 1730 default:
1716 r = kvm_vm_ioctl(filp, ioctl, arg); 1731 r = kvm_vm_ioctl(filp, ioctl, arg);
1717 } 1732 }
1718 1733
1719 out: 1734 out:
1720 return r; 1735 return r;
1721 } 1736 }
1722 #endif 1737 #endif
1723 1738
1724 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1739 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1725 { 1740 {
1726 struct page *page[1]; 1741 struct page *page[1];
1727 unsigned long addr; 1742 unsigned long addr;
1728 int npages; 1743 int npages;
1729 gfn_t gfn = vmf->pgoff; 1744 gfn_t gfn = vmf->pgoff;
1730 struct kvm *kvm = vma->vm_file->private_data; 1745 struct kvm *kvm = vma->vm_file->private_data;
1731 1746
1732 addr = gfn_to_hva(kvm, gfn); 1747 addr = gfn_to_hva(kvm, gfn);
1733 if (kvm_is_error_hva(addr)) 1748 if (kvm_is_error_hva(addr))
1734 return VM_FAULT_SIGBUS; 1749 return VM_FAULT_SIGBUS;
1735 1750
1736 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, 1751 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
1737 NULL); 1752 NULL);
1738 if (unlikely(npages != 1)) 1753 if (unlikely(npages != 1))
1739 return VM_FAULT_SIGBUS; 1754 return VM_FAULT_SIGBUS;
1740 1755
1741 vmf->page = page[0]; 1756 vmf->page = page[0];
1742 return 0; 1757 return 0;
1743 } 1758 }
1744 1759
1745 static const struct vm_operations_struct kvm_vm_vm_ops = { 1760 static const struct vm_operations_struct kvm_vm_vm_ops = {
1746 .fault = kvm_vm_fault, 1761 .fault = kvm_vm_fault,
1747 }; 1762 };
1748 1763
1749 static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) 1764 static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
1750 { 1765 {
1751 vma->vm_ops = &kvm_vm_vm_ops; 1766 vma->vm_ops = &kvm_vm_vm_ops;
1752 return 0; 1767 return 0;
1753 } 1768 }
1754 1769
1755 static struct file_operations kvm_vm_fops = { 1770 static struct file_operations kvm_vm_fops = {
1756 .release = kvm_vm_release, 1771 .release = kvm_vm_release,
1757 .unlocked_ioctl = kvm_vm_ioctl, 1772 .unlocked_ioctl = kvm_vm_ioctl,
1758 #ifdef CONFIG_COMPAT 1773 #ifdef CONFIG_COMPAT
1759 .compat_ioctl = kvm_vm_compat_ioctl, 1774 .compat_ioctl = kvm_vm_compat_ioctl,
1760 #endif 1775 #endif
1761 .mmap = kvm_vm_mmap, 1776 .mmap = kvm_vm_mmap,
1762 }; 1777 };
1763 1778
1764 static int kvm_dev_ioctl_create_vm(void) 1779 static int kvm_dev_ioctl_create_vm(void)
1765 { 1780 {
1766 int fd, r; 1781 int fd, r;
1767 struct kvm *kvm; 1782 struct kvm *kvm;
1768 1783
1769 kvm = kvm_create_vm(); 1784 kvm = kvm_create_vm();
1770 if (IS_ERR(kvm)) 1785 if (IS_ERR(kvm))
1771 return PTR_ERR(kvm); 1786 return PTR_ERR(kvm);
1772 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1787 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1773 r = kvm_coalesced_mmio_init(kvm); 1788 r = kvm_coalesced_mmio_init(kvm);
1774 if (r < 0) { 1789 if (r < 0) {
1775 kvm_put_kvm(kvm); 1790 kvm_put_kvm(kvm);
1776 return r; 1791 return r;
1777 } 1792 }
1778 #endif 1793 #endif
1779 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); 1794 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
1780 if (fd < 0) 1795 if (fd < 0)
1781 kvm_put_kvm(kvm); 1796 kvm_put_kvm(kvm);
1782 1797
1783 return fd; 1798 return fd;
1784 } 1799 }
1785 1800
1786 static long kvm_dev_ioctl_check_extension_generic(long arg) 1801 static long kvm_dev_ioctl_check_extension_generic(long arg)
1787 { 1802 {
1788 switch (arg) { 1803 switch (arg) {
1789 case KVM_CAP_USER_MEMORY: 1804 case KVM_CAP_USER_MEMORY:
1790 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 1805 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
1791 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: 1806 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
1792 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1807 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
1793 case KVM_CAP_SET_BOOT_CPU_ID: 1808 case KVM_CAP_SET_BOOT_CPU_ID:
1794 #endif 1809 #endif
1795 case KVM_CAP_INTERNAL_ERROR_DATA: 1810 case KVM_CAP_INTERNAL_ERROR_DATA:
1796 return 1; 1811 return 1;
1797 #ifdef CONFIG_HAVE_KVM_IRQCHIP 1812 #ifdef CONFIG_HAVE_KVM_IRQCHIP
1798 case KVM_CAP_IRQ_ROUTING: 1813 case KVM_CAP_IRQ_ROUTING:
1799 return KVM_MAX_IRQ_ROUTES; 1814 return KVM_MAX_IRQ_ROUTES;
1800 #endif 1815 #endif
1801 default: 1816 default:
1802 break; 1817 break;
1803 } 1818 }
1804 return kvm_dev_ioctl_check_extension(arg); 1819 return kvm_dev_ioctl_check_extension(arg);
1805 } 1820 }
1806 1821
1807 static long kvm_dev_ioctl(struct file *filp, 1822 static long kvm_dev_ioctl(struct file *filp,
1808 unsigned int ioctl, unsigned long arg) 1823 unsigned int ioctl, unsigned long arg)
1809 { 1824 {
1810 long r = -EINVAL; 1825 long r = -EINVAL;
1811 1826
1812 switch (ioctl) { 1827 switch (ioctl) {
1813 case KVM_GET_API_VERSION: 1828 case KVM_GET_API_VERSION:
1814 r = -EINVAL; 1829 r = -EINVAL;
1815 if (arg) 1830 if (arg)
1816 goto out; 1831 goto out;
1817 r = KVM_API_VERSION; 1832 r = KVM_API_VERSION;
1818 break; 1833 break;
1819 case KVM_CREATE_VM: 1834 case KVM_CREATE_VM:
1820 r = -EINVAL; 1835 r = -EINVAL;
1821 if (arg) 1836 if (arg)
1822 goto out; 1837 goto out;
1823 r = kvm_dev_ioctl_create_vm(); 1838 r = kvm_dev_ioctl_create_vm();
1824 break; 1839 break;
1825 case KVM_CHECK_EXTENSION: 1840 case KVM_CHECK_EXTENSION:
1826 r = kvm_dev_ioctl_check_extension_generic(arg); 1841 r = kvm_dev_ioctl_check_extension_generic(arg);
1827 break; 1842 break;
1828 case KVM_GET_VCPU_MMAP_SIZE: 1843 case KVM_GET_VCPU_MMAP_SIZE:
1829 r = -EINVAL; 1844 r = -EINVAL;
1830 if (arg) 1845 if (arg)
1831 goto out; 1846 goto out;
1832 r = PAGE_SIZE; /* struct kvm_run */ 1847 r = PAGE_SIZE; /* struct kvm_run */
1833 #ifdef CONFIG_X86 1848 #ifdef CONFIG_X86
1834 r += PAGE_SIZE; /* pio data page */ 1849 r += PAGE_SIZE; /* pio data page */
1835 #endif 1850 #endif
1836 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1851 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1837 r += PAGE_SIZE; /* coalesced mmio ring page */ 1852 r += PAGE_SIZE; /* coalesced mmio ring page */
1838 #endif 1853 #endif
1839 break; 1854 break;
1840 case KVM_TRACE_ENABLE: 1855 case KVM_TRACE_ENABLE:
1841 case KVM_TRACE_PAUSE: 1856 case KVM_TRACE_PAUSE:
1842 case KVM_TRACE_DISABLE: 1857 case KVM_TRACE_DISABLE:
1843 r = -EOPNOTSUPP; 1858 r = -EOPNOTSUPP;
1844 break; 1859 break;
1845 default: 1860 default:
1846 return kvm_arch_dev_ioctl(filp, ioctl, arg); 1861 return kvm_arch_dev_ioctl(filp, ioctl, arg);
1847 } 1862 }
1848 out: 1863 out:
1849 return r; 1864 return r;
1850 } 1865 }
1851 1866
1852 static struct file_operations kvm_chardev_ops = { 1867 static struct file_operations kvm_chardev_ops = {
1853 .unlocked_ioctl = kvm_dev_ioctl, 1868 .unlocked_ioctl = kvm_dev_ioctl,
1854 .compat_ioctl = kvm_dev_ioctl, 1869 .compat_ioctl = kvm_dev_ioctl,
1855 }; 1870 };
1856 1871
1857 static struct miscdevice kvm_dev = { 1872 static struct miscdevice kvm_dev = {
1858 KVM_MINOR, 1873 KVM_MINOR,
1859 "kvm", 1874 "kvm",
1860 &kvm_chardev_ops, 1875 &kvm_chardev_ops,
1861 }; 1876 };
1862 1877
1863 static void hardware_enable(void *junk) 1878 static void hardware_enable(void *junk)
1864 { 1879 {
1865 int cpu = raw_smp_processor_id(); 1880 int cpu = raw_smp_processor_id();
1866 int r; 1881 int r;
1867 1882
1868 if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1883 if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
1869 return; 1884 return;
1870 1885
1871 cpumask_set_cpu(cpu, cpus_hardware_enabled); 1886 cpumask_set_cpu(cpu, cpus_hardware_enabled);
1872 1887
1873 r = kvm_arch_hardware_enable(NULL); 1888 r = kvm_arch_hardware_enable(NULL);
1874 1889
1875 if (r) { 1890 if (r) {
1876 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 1891 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
1877 atomic_inc(&hardware_enable_failed); 1892 atomic_inc(&hardware_enable_failed);
1878 printk(KERN_INFO "kvm: enabling virtualization on " 1893 printk(KERN_INFO "kvm: enabling virtualization on "
1879 "CPU%d failed\n", cpu); 1894 "CPU%d failed\n", cpu);
1880 } 1895 }
1881 } 1896 }
1882 1897
1883 static void hardware_disable(void *junk) 1898 static void hardware_disable(void *junk)
1884 { 1899 {
1885 int cpu = raw_smp_processor_id(); 1900 int cpu = raw_smp_processor_id();
1886 1901
1887 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1902 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
1888 return; 1903 return;
1889 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 1904 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
1890 kvm_arch_hardware_disable(NULL); 1905 kvm_arch_hardware_disable(NULL);
1891 } 1906 }
1892 1907
1893 static void hardware_disable_all_nolock(void) 1908 static void hardware_disable_all_nolock(void)
1894 { 1909 {
1895 BUG_ON(!kvm_usage_count); 1910 BUG_ON(!kvm_usage_count);
1896 1911
1897 kvm_usage_count--; 1912 kvm_usage_count--;
1898 if (!kvm_usage_count) 1913 if (!kvm_usage_count)
1899 on_each_cpu(hardware_disable, NULL, 1); 1914 on_each_cpu(hardware_disable, NULL, 1);
1900 } 1915 }
1901 1916
1902 static void hardware_disable_all(void) 1917 static void hardware_disable_all(void)
1903 { 1918 {
1904 spin_lock(&kvm_lock); 1919 spin_lock(&kvm_lock);
1905 hardware_disable_all_nolock(); 1920 hardware_disable_all_nolock();
1906 spin_unlock(&kvm_lock); 1921 spin_unlock(&kvm_lock);
1907 } 1922 }
1908 1923
1909 static int hardware_enable_all(void) 1924 static int hardware_enable_all(void)
1910 { 1925 {
1911 int r = 0; 1926 int r = 0;
1912 1927
1913 spin_lock(&kvm_lock); 1928 spin_lock(&kvm_lock);
1914 1929
1915 kvm_usage_count++; 1930 kvm_usage_count++;
1916 if (kvm_usage_count == 1) { 1931 if (kvm_usage_count == 1) {
1917 atomic_set(&hardware_enable_failed, 0); 1932 atomic_set(&hardware_enable_failed, 0);
1918 on_each_cpu(hardware_enable, NULL, 1); 1933 on_each_cpu(hardware_enable, NULL, 1);
1919 1934
1920 if (atomic_read(&hardware_enable_failed)) { 1935 if (atomic_read(&hardware_enable_failed)) {
1921 hardware_disable_all_nolock(); 1936 hardware_disable_all_nolock();
1922 r = -EBUSY; 1937 r = -EBUSY;
1923 } 1938 }
1924 } 1939 }
1925 1940
1926 spin_unlock(&kvm_lock); 1941 spin_unlock(&kvm_lock);
1927 1942
1928 return r; 1943 return r;
1929 } 1944 }
1930 1945
1931 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, 1946 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
1932 void *v) 1947 void *v)
1933 { 1948 {
1934 int cpu = (long)v; 1949 int cpu = (long)v;
1935 1950
1936 if (!kvm_usage_count) 1951 if (!kvm_usage_count)
1937 return NOTIFY_OK; 1952 return NOTIFY_OK;
1938 1953
1939 val &= ~CPU_TASKS_FROZEN; 1954 val &= ~CPU_TASKS_FROZEN;
1940 switch (val) { 1955 switch (val) {
1941 case CPU_DYING: 1956 case CPU_DYING:
1942 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1957 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
1943 cpu); 1958 cpu);
1944 hardware_disable(NULL); 1959 hardware_disable(NULL);
1945 break; 1960 break;
1946 case CPU_ONLINE: 1961 case CPU_ONLINE:
1947 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 1962 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
1948 cpu); 1963 cpu);
1949 smp_call_function_single(cpu, hardware_enable, NULL, 1); 1964 smp_call_function_single(cpu, hardware_enable, NULL, 1);
1950 break; 1965 break;
1951 } 1966 }
1952 return NOTIFY_OK; 1967 return NOTIFY_OK;
1953 } 1968 }
1954 1969
1955 1970
1956 asmlinkage void kvm_handle_fault_on_reboot(void) 1971 asmlinkage void kvm_handle_fault_on_reboot(void)
1957 { 1972 {
1958 if (kvm_rebooting) 1973 if (kvm_rebooting)
1959 /* spin while reset goes on */ 1974 /* spin while reset goes on */
1960 while (true) 1975 while (true)
1961 ; 1976 ;
1962 /* Fault while not rebooting. We want the trace. */ 1977 /* Fault while not rebooting. We want the trace. */
1963 BUG(); 1978 BUG();
1964 } 1979 }
1965 EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); 1980 EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
1966 1981
1967 static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 1982 static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
1968 void *v) 1983 void *v)
1969 { 1984 {
1970 /* 1985 /*
1971 * Some (well, at least mine) BIOSes hang on reboot if 1986 * Some (well, at least mine) BIOSes hang on reboot if
1972 * in vmx root mode. 1987 * in vmx root mode.
1973 * 1988 *
1974 * And Intel TXT required VMX off for all cpu when system shutdown. 1989 * And Intel TXT required VMX off for all cpu when system shutdown.
1975 */ 1990 */
1976 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 1991 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
1977 kvm_rebooting = true; 1992 kvm_rebooting = true;
1978 on_each_cpu(hardware_disable, NULL, 1); 1993 on_each_cpu(hardware_disable, NULL, 1);
1979 return NOTIFY_OK; 1994 return NOTIFY_OK;
1980 } 1995 }
1981 1996
1982 static struct notifier_block kvm_reboot_notifier = { 1997 static struct notifier_block kvm_reboot_notifier = {
1983 .notifier_call = kvm_reboot, 1998 .notifier_call = kvm_reboot,
1984 .priority = 0, 1999 .priority = 0,
1985 }; 2000 };
1986 2001
1987 static void kvm_io_bus_destroy(struct kvm_io_bus *bus) 2002 static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1988 { 2003 {
1989 int i; 2004 int i;
1990 2005
1991 for (i = 0; i < bus->dev_count; i++) { 2006 for (i = 0; i < bus->dev_count; i++) {
1992 struct kvm_io_device *pos = bus->devs[i]; 2007 struct kvm_io_device *pos = bus->devs[i];
1993 2008
1994 kvm_iodevice_destructor(pos); 2009 kvm_iodevice_destructor(pos);
1995 } 2010 }
1996 kfree(bus); 2011 kfree(bus);
1997 } 2012 }
1998 2013
1999 /* kvm_io_bus_write - called under kvm->slots_lock */ 2014 /* kvm_io_bus_write - called under kvm->slots_lock */
2000 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 2015 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2001 int len, const void *val) 2016 int len, const void *val)
2002 { 2017 {
2003 int i; 2018 int i;
2004 struct kvm_io_bus *bus; 2019 struct kvm_io_bus *bus;
2005 2020
2006 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 2021 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
2007 for (i = 0; i < bus->dev_count; i++) 2022 for (i = 0; i < bus->dev_count; i++)
2008 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 2023 if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
2009 return 0; 2024 return 0;
2010 return -EOPNOTSUPP; 2025 return -EOPNOTSUPP;
2011 } 2026 }
2012 2027
2013 /* kvm_io_bus_read - called under kvm->slots_lock */ 2028 /* kvm_io_bus_read - called under kvm->slots_lock */
2014 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 2029 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2015 int len, void *val) 2030 int len, void *val)
2016 { 2031 {
2017 int i; 2032 int i;
2018 struct kvm_io_bus *bus; 2033 struct kvm_io_bus *bus;
2019 2034
2020 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 2035 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
2021 for (i = 0; i < bus->dev_count; i++) 2036 for (i = 0; i < bus->dev_count; i++)
2022 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 2037 if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
2023 return 0; 2038 return 0;
2024 return -EOPNOTSUPP; 2039 return -EOPNOTSUPP;
2025 } 2040 }
2026 2041
2027 /* Caller must hold slots_lock. */ 2042 /* Caller must hold slots_lock. */
2028 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, 2043 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2029 struct kvm_io_device *dev) 2044 struct kvm_io_device *dev)
2030 { 2045 {
2031 struct kvm_io_bus *new_bus, *bus; 2046 struct kvm_io_bus *new_bus, *bus;
2032 2047
2033 bus = kvm->buses[bus_idx]; 2048 bus = kvm->buses[bus_idx];
2034 if (bus->dev_count > NR_IOBUS_DEVS-1) 2049 if (bus->dev_count > NR_IOBUS_DEVS-1)
2035 return -ENOSPC; 2050 return -ENOSPC;
2036 2051
2037 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); 2052 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
2038 if (!new_bus) 2053 if (!new_bus)
2039 return -ENOMEM; 2054 return -ENOMEM;
2040 memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); 2055 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
2041 new_bus->devs[new_bus->dev_count++] = dev; 2056 new_bus->devs[new_bus->dev_count++] = dev;
2042 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 2057 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2043 synchronize_srcu_expedited(&kvm->srcu); 2058 synchronize_srcu_expedited(&kvm->srcu);
2044 kfree(bus); 2059 kfree(bus);
2045 2060
2046 return 0; 2061 return 0;
2047 } 2062 }
2048 2063
2049 /* Caller must hold slots_lock. */ 2064 /* Caller must hold slots_lock. */
2050 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 2065 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2051 struct kvm_io_device *dev) 2066 struct kvm_io_device *dev)
2052 { 2067 {
2053 int i, r; 2068 int i, r;
2054 struct kvm_io_bus *new_bus, *bus; 2069 struct kvm_io_bus *new_bus, *bus;
2055 2070
2056 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); 2071 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
2057 if (!new_bus) 2072 if (!new_bus)
2058 return -ENOMEM; 2073 return -ENOMEM;
2059 2074
2060 bus = kvm->buses[bus_idx]; 2075 bus = kvm->buses[bus_idx];
2061 memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); 2076 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
2062 2077
2063 r = -ENOENT; 2078 r = -ENOENT;
2064 for (i = 0; i < new_bus->dev_count; i++) 2079 for (i = 0; i < new_bus->dev_count; i++)
2065 if (new_bus->devs[i] == dev) { 2080 if (new_bus->devs[i] == dev) {
2066 r = 0; 2081 r = 0;
2067 new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; 2082 new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
2068 break; 2083 break;
2069 } 2084 }
2070 2085
2071 if (r) { 2086 if (r) {
2072 kfree(new_bus); 2087 kfree(new_bus);
2073 return r; 2088 return r;
2074 } 2089 }
2075 2090
2076 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 2091 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2077 synchronize_srcu_expedited(&kvm->srcu); 2092 synchronize_srcu_expedited(&kvm->srcu);
2078 kfree(bus); 2093 kfree(bus);
2079 return r; 2094 return r;
2080 } 2095 }
2081 2096
2082 static struct notifier_block kvm_cpu_notifier = { 2097 static struct notifier_block kvm_cpu_notifier = {
2083 .notifier_call = kvm_cpu_hotplug, 2098 .notifier_call = kvm_cpu_hotplug,
2084 .priority = 20, /* must be > scheduler priority */ 2099 .priority = 20, /* must be > scheduler priority */
2085 }; 2100 };
2086 2101
2087 static int vm_stat_get(void *_offset, u64 *val) 2102 static int vm_stat_get(void *_offset, u64 *val)
2088 { 2103 {
2089 unsigned offset = (long)_offset; 2104 unsigned offset = (long)_offset;
2090 struct kvm *kvm; 2105 struct kvm *kvm;
2091 2106
2092 *val = 0; 2107 *val = 0;
2093 spin_lock(&kvm_lock); 2108 spin_lock(&kvm_lock);
2094 list_for_each_entry(kvm, &vm_list, vm_list) 2109 list_for_each_entry(kvm, &vm_list, vm_list)
2095 *val += *(u32 *)((void *)kvm + offset); 2110 *val += *(u32 *)((void *)kvm + offset);
2096 spin_unlock(&kvm_lock); 2111 spin_unlock(&kvm_lock);
2097 return 0; 2112 return 0;
2098 } 2113 }
2099 2114
2100 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); 2115 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
2101 2116
2102 static int vcpu_stat_get(void *_offset, u64 *val) 2117 static int vcpu_stat_get(void *_offset, u64 *val)
2103 { 2118 {
2104 unsigned offset = (long)_offset; 2119 unsigned offset = (long)_offset;
2105 struct kvm *kvm; 2120 struct kvm *kvm;
2106 struct kvm_vcpu *vcpu; 2121 struct kvm_vcpu *vcpu;
2107 int i; 2122 int i;
2108 2123
2109 *val = 0; 2124 *val = 0;
2110 spin_lock(&kvm_lock); 2125 spin_lock(&kvm_lock);
2111 list_for_each_entry(kvm, &vm_list, vm_list) 2126 list_for_each_entry(kvm, &vm_list, vm_list)
2112 kvm_for_each_vcpu(i, vcpu, kvm) 2127 kvm_for_each_vcpu(i, vcpu, kvm)
2113 *val += *(u32 *)((void *)vcpu + offset); 2128 *val += *(u32 *)((void *)vcpu + offset);
2114 2129
2115 spin_unlock(&kvm_lock); 2130 spin_unlock(&kvm_lock);
2116 return 0; 2131 return 0;
2117 } 2132 }
2118 2133
2119 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); 2134 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
2120 2135
2121 static const struct file_operations *stat_fops[] = { 2136 static const struct file_operations *stat_fops[] = {
2122 [KVM_STAT_VCPU] = &vcpu_stat_fops, 2137 [KVM_STAT_VCPU] = &vcpu_stat_fops,
2123 [KVM_STAT_VM] = &vm_stat_fops, 2138 [KVM_STAT_VM] = &vm_stat_fops,
2124 }; 2139 };
2125 2140
2126 static void kvm_init_debug(void) 2141 static void kvm_init_debug(void)
2127 { 2142 {
2128 struct kvm_stats_debugfs_item *p; 2143 struct kvm_stats_debugfs_item *p;
2129 2144
2130 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); 2145 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
2131 for (p = debugfs_entries; p->name; ++p) 2146 for (p = debugfs_entries; p->name; ++p)
2132 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 2147 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
2133 (void *)(long)p->offset, 2148 (void *)(long)p->offset,
2134 stat_fops[p->kind]); 2149 stat_fops[p->kind]);
2135 } 2150 }
2136 2151
2137 static void kvm_exit_debug(void) 2152 static void kvm_exit_debug(void)
2138 { 2153 {
2139 struct kvm_stats_debugfs_item *p; 2154 struct kvm_stats_debugfs_item *p;
2140 2155
2141 for (p = debugfs_entries; p->name; ++p) 2156 for (p = debugfs_entries; p->name; ++p)
2142 debugfs_remove(p->dentry); 2157 debugfs_remove(p->dentry);
2143 debugfs_remove(kvm_debugfs_dir); 2158 debugfs_remove(kvm_debugfs_dir);
2144 } 2159 }
2145 2160
2146 static int kvm_suspend(struct sys_device *dev, pm_message_t state) 2161 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
2147 { 2162 {
2148 if (kvm_usage_count) 2163 if (kvm_usage_count)
2149 hardware_disable(NULL); 2164 hardware_disable(NULL);
2150 return 0; 2165 return 0;
2151 } 2166 }
2152 2167
2153 static int kvm_resume(struct sys_device *dev) 2168 static int kvm_resume(struct sys_device *dev)
2154 { 2169 {
2155 if (kvm_usage_count) 2170 if (kvm_usage_count)
2156 hardware_enable(NULL); 2171 hardware_enable(NULL);
2157 return 0; 2172 return 0;
2158 } 2173 }
2159 2174
2160 static struct sysdev_class kvm_sysdev_class = { 2175 static struct sysdev_class kvm_sysdev_class = {
2161 .name = "kvm", 2176 .name = "kvm",
2162 .suspend = kvm_suspend, 2177 .suspend = kvm_suspend,
2163 .resume = kvm_resume, 2178 .resume = kvm_resume,
2164 }; 2179 };
2165 2180
2166 static struct sys_device kvm_sysdev = { 2181 static struct sys_device kvm_sysdev = {
2167 .id = 0, 2182 .id = 0,
2168 .cls = &kvm_sysdev_class, 2183 .cls = &kvm_sysdev_class,
2169 }; 2184 };
2170 2185
2171 struct page *bad_page; 2186 struct page *bad_page;
2172 pfn_t bad_pfn; 2187 pfn_t bad_pfn;
2173 2188
2174 static inline 2189 static inline
2175 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 2190 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
2176 { 2191 {
2177 return container_of(pn, struct kvm_vcpu, preempt_notifier); 2192 return container_of(pn, struct kvm_vcpu, preempt_notifier);
2178 } 2193 }
2179 2194
2180 static void kvm_sched_in(struct preempt_notifier *pn, int cpu) 2195 static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
2181 { 2196 {
2182 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 2197 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
2183 2198
2184 kvm_arch_vcpu_load(vcpu, cpu); 2199 kvm_arch_vcpu_load(vcpu, cpu);
2185 } 2200 }
2186 2201
2187 static void kvm_sched_out(struct preempt_notifier *pn, 2202 static void kvm_sched_out(struct preempt_notifier *pn,
2188 struct task_struct *next) 2203 struct task_struct *next)
2189 { 2204 {
2190 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 2205 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
2191 2206
2192 kvm_arch_vcpu_put(vcpu); 2207 kvm_arch_vcpu_put(vcpu);
2193 } 2208 }
2194 2209
2195 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, 2210 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2196 struct module *module) 2211 struct module *module)
2197 { 2212 {
2198 int r; 2213 int r;
2199 int cpu; 2214 int cpu;
2200 2215
2201 r = kvm_arch_init(opaque); 2216 r = kvm_arch_init(opaque);
2202 if (r) 2217 if (r)
2203 goto out_fail; 2218 goto out_fail;
2204 2219
2205 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2220 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2206 2221
2207 if (bad_page == NULL) { 2222 if (bad_page == NULL) {
2208 r = -ENOMEM; 2223 r = -ENOMEM;
2209 goto out; 2224 goto out;
2210 } 2225 }
2211 2226
2212 bad_pfn = page_to_pfn(bad_page); 2227 bad_pfn = page_to_pfn(bad_page);
2213 2228
2214 hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2229 hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2215 2230
2216 if (hwpoison_page == NULL) { 2231 if (hwpoison_page == NULL) {
2217 r = -ENOMEM; 2232 r = -ENOMEM;
2218 goto out_free_0; 2233 goto out_free_0;
2219 } 2234 }
2220 2235
2221 hwpoison_pfn = page_to_pfn(hwpoison_page); 2236 hwpoison_pfn = page_to_pfn(hwpoison_page);
2222 2237
2223 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2238 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
2224 r = -ENOMEM; 2239 r = -ENOMEM;
2225 goto out_free_0; 2240 goto out_free_0;
2226 } 2241 }
2227 2242
2228 r = kvm_arch_hardware_setup(); 2243 r = kvm_arch_hardware_setup();
2229 if (r < 0) 2244 if (r < 0)
2230 goto out_free_0a; 2245 goto out_free_0a;
2231 2246
2232 for_each_online_cpu(cpu) { 2247 for_each_online_cpu(cpu) {
2233 smp_call_function_single(cpu, 2248 smp_call_function_single(cpu,
2234 kvm_arch_check_processor_compat, 2249 kvm_arch_check_processor_compat,
2235 &r, 1); 2250 &r, 1);
2236 if (r < 0) 2251 if (r < 0)
2237 goto out_free_1; 2252 goto out_free_1;
2238 } 2253 }
2239 2254
2240 r = register_cpu_notifier(&kvm_cpu_notifier); 2255 r = register_cpu_notifier(&kvm_cpu_notifier);
2241 if (r) 2256 if (r)
2242 goto out_free_2; 2257 goto out_free_2;
2243 register_reboot_notifier(&kvm_reboot_notifier); 2258 register_reboot_notifier(&kvm_reboot_notifier);
2244 2259
2245 r = sysdev_class_register(&kvm_sysdev_class); 2260 r = sysdev_class_register(&kvm_sysdev_class);
2246 if (r) 2261 if (r)
2247 goto out_free_3; 2262 goto out_free_3;
2248 2263
2249 r = sysdev_register(&kvm_sysdev); 2264 r = sysdev_register(&kvm_sysdev);
2250 if (r) 2265 if (r)
2251 goto out_free_4; 2266 goto out_free_4;
2252 2267
2253 /* A kmem cache lets us meet the alignment requirements of fx_save. */ 2268 /* A kmem cache lets us meet the alignment requirements of fx_save. */
2254 if (!vcpu_align) 2269 if (!vcpu_align)
2255 vcpu_align = __alignof__(struct kvm_vcpu); 2270 vcpu_align = __alignof__(struct kvm_vcpu);
2256 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, 2271 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
2257 0, NULL); 2272 0, NULL);
2258 if (!kvm_vcpu_cache) { 2273 if (!kvm_vcpu_cache) {
2259 r = -ENOMEM; 2274 r = -ENOMEM;
2260 goto out_free_5; 2275 goto out_free_5;
2261 } 2276 }
2262 2277
2263 kvm_chardev_ops.owner = module; 2278 kvm_chardev_ops.owner = module;
2264 kvm_vm_fops.owner = module; 2279 kvm_vm_fops.owner = module;
2265 kvm_vcpu_fops.owner = module; 2280 kvm_vcpu_fops.owner = module;
2266 2281
2267 r = misc_register(&kvm_dev); 2282 r = misc_register(&kvm_dev);
2268 if (r) { 2283 if (r) {
2269 printk(KERN_ERR "kvm: misc device register failed\n"); 2284 printk(KERN_ERR "kvm: misc device register failed\n");
2270 goto out_free; 2285 goto out_free;
2271 } 2286 }
2272 2287
2273 kvm_preempt_ops.sched_in = kvm_sched_in; 2288 kvm_preempt_ops.sched_in = kvm_sched_in;
2274 kvm_preempt_ops.sched_out = kvm_sched_out; 2289 kvm_preempt_ops.sched_out = kvm_sched_out;
2275 2290
2276 kvm_init_debug(); 2291 kvm_init_debug();
2277 2292
2278 return 0; 2293 return 0;
2279 2294
2280 out_free: 2295 out_free:
2281 kmem_cache_destroy(kvm_vcpu_cache); 2296 kmem_cache_destroy(kvm_vcpu_cache);
2282 out_free_5: 2297 out_free_5:
2283 sysdev_unregister(&kvm_sysdev); 2298 sysdev_unregister(&kvm_sysdev);
2284 out_free_4: 2299 out_free_4:
2285 sysdev_class_unregister(&kvm_sysdev_class); 2300 sysdev_class_unregister(&kvm_sysdev_class);
2286 out_free_3: 2301 out_free_3:
2287 unregister_reboot_notifier(&kvm_reboot_notifier); 2302 unregister_reboot_notifier(&kvm_reboot_notifier);
2288 unregister_cpu_notifier(&kvm_cpu_notifier); 2303 unregister_cpu_notifier(&kvm_cpu_notifier);
2289 out_free_2: 2304 out_free_2:
2290 out_free_1: 2305 out_free_1:
2291 kvm_arch_hardware_unsetup(); 2306 kvm_arch_hardware_unsetup();
2292 out_free_0a: 2307 out_free_0a:
2293 free_cpumask_var(cpus_hardware_enabled); 2308 free_cpumask_var(cpus_hardware_enabled);
2294 out_free_0: 2309 out_free_0:
2295 if (hwpoison_page) 2310 if (hwpoison_page)
2296 __free_page(hwpoison_page); 2311 __free_page(hwpoison_page);
2297 __free_page(bad_page); 2312 __free_page(bad_page);
2298 out: 2313 out:
2299 kvm_arch_exit(); 2314 kvm_arch_exit();
2300 out_fail: 2315 out_fail:
2301 return r; 2316 return r;
2302 } 2317 }
2303 EXPORT_SYMBOL_GPL(kvm_init); 2318 EXPORT_SYMBOL_GPL(kvm_init);
2304 2319
2305 void kvm_exit(void) 2320 void kvm_exit(void)
2306 { 2321 {
2307 kvm_exit_debug(); 2322 kvm_exit_debug();
2308 misc_deregister(&kvm_dev); 2323 misc_deregister(&kvm_dev);
2309 kmem_cache_destroy(kvm_vcpu_cache); 2324 kmem_cache_destroy(kvm_vcpu_cache);
2310 sysdev_unregister(&kvm_sysdev); 2325 sysdev_unregister(&kvm_sysdev);
2311 sysdev_class_unregister(&kvm_sysdev_class); 2326 sysdev_class_unregister(&kvm_sysdev_class);
2312 unregister_reboot_notifier(&kvm_reboot_notifier); 2327 unregister_reboot_notifier(&kvm_reboot_notifier);
2313 unregister_cpu_notifier(&kvm_cpu_notifier); 2328 unregister_cpu_notifier(&kvm_cpu_notifier);
2314 on_each_cpu(hardware_disable, NULL, 1); 2329 on_each_cpu(hardware_disable, NULL, 1);
2315 kvm_arch_hardware_unsetup(); 2330 kvm_arch_hardware_unsetup();
2316 kvm_arch_exit(); 2331 kvm_arch_exit();
2317 free_cpumask_var(cpus_hardware_enabled); 2332 free_cpumask_var(cpus_hardware_enabled);
2318 __free_page(hwpoison_page); 2333 __free_page(hwpoison_page);
2319 __free_page(bad_page); 2334 __free_page(bad_page);
2320 } 2335 }
2321 EXPORT_SYMBOL_GPL(kvm_exit); 2336 EXPORT_SYMBOL_GPL(kvm_exit);
2322 2337