Commit 376d41ff2669617a1ef828466ad07a1be99d24d3

Authored by Andi Kleen
Committed by Avi Kivity
1 parent 3b5d132186

KVM: Fix KVM_SET_SIGNAL_MASK with arg == NULL

When the user passed in a NULL mask pass this on from the ioctl
handler.

Found by gcc 4.6's new warnings.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

Showing 1 changed file with 1 additions and 1 deletions Inline Diff

1 /* 1 /*
2 * Kernel-based Virtual Machine driver for Linux 2 * Kernel-based Virtual Machine driver for Linux
3 * 3 *
4 * This module enables machines with Intel VT-x extensions to run virtual 4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation. 5 * machines without emulation or binary translation.
6 * 6 *
7 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affilates. 8 * Copyright 2010 Red Hat, Inc. and/or its affilates.
9 * 9 *
10 * Authors: 10 * Authors:
11 * Avi Kivity <avi@qumranet.com> 11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com> 12 * Yaniv Kamay <yaniv@qumranet.com>
13 * 13 *
14 * This work is licensed under the terms of the GNU GPL, version 2. See 14 * This work is licensed under the terms of the GNU GPL, version 2. See
15 * the COPYING file in the top-level directory. 15 * the COPYING file in the top-level directory.
16 * 16 *
17 */ 17 */
18 18
19 #include "iodev.h" 19 #include "iodev.h"
20 20
21 #include <linux/kvm_host.h> 21 #include <linux/kvm_host.h>
22 #include <linux/kvm.h> 22 #include <linux/kvm.h>
23 #include <linux/module.h> 23 #include <linux/module.h>
24 #include <linux/errno.h> 24 #include <linux/errno.h>
25 #include <linux/percpu.h> 25 #include <linux/percpu.h>
26 #include <linux/mm.h> 26 #include <linux/mm.h>
27 #include <linux/miscdevice.h> 27 #include <linux/miscdevice.h>
28 #include <linux/vmalloc.h> 28 #include <linux/vmalloc.h>
29 #include <linux/reboot.h> 29 #include <linux/reboot.h>
30 #include <linux/debugfs.h> 30 #include <linux/debugfs.h>
31 #include <linux/highmem.h> 31 #include <linux/highmem.h>
32 #include <linux/file.h> 32 #include <linux/file.h>
33 #include <linux/sysdev.h> 33 #include <linux/sysdev.h>
34 #include <linux/cpu.h> 34 #include <linux/cpu.h>
35 #include <linux/sched.h> 35 #include <linux/sched.h>
36 #include <linux/cpumask.h> 36 #include <linux/cpumask.h>
37 #include <linux/smp.h> 37 #include <linux/smp.h>
38 #include <linux/anon_inodes.h> 38 #include <linux/anon_inodes.h>
39 #include <linux/profile.h> 39 #include <linux/profile.h>
40 #include <linux/kvm_para.h> 40 #include <linux/kvm_para.h>
41 #include <linux/pagemap.h> 41 #include <linux/pagemap.h>
42 #include <linux/mman.h> 42 #include <linux/mman.h>
43 #include <linux/swap.h> 43 #include <linux/swap.h>
44 #include <linux/bitops.h> 44 #include <linux/bitops.h>
45 #include <linux/spinlock.h> 45 #include <linux/spinlock.h>
46 #include <linux/compat.h> 46 #include <linux/compat.h>
47 #include <linux/srcu.h> 47 #include <linux/srcu.h>
48 #include <linux/hugetlb.h> 48 #include <linux/hugetlb.h>
49 #include <linux/slab.h> 49 #include <linux/slab.h>
50 50
51 #include <asm/processor.h> 51 #include <asm/processor.h>
52 #include <asm/io.h> 52 #include <asm/io.h>
53 #include <asm/uaccess.h> 53 #include <asm/uaccess.h>
54 #include <asm/pgtable.h> 54 #include <asm/pgtable.h>
55 #include <asm-generic/bitops/le.h> 55 #include <asm-generic/bitops/le.h>
56 56
57 #include "coalesced_mmio.h" 57 #include "coalesced_mmio.h"
58 58
59 #define CREATE_TRACE_POINTS 59 #define CREATE_TRACE_POINTS
60 #include <trace/events/kvm.h> 60 #include <trace/events/kvm.h>
61 61
62 MODULE_AUTHOR("Qumranet"); 62 MODULE_AUTHOR("Qumranet");
63 MODULE_LICENSE("GPL"); 63 MODULE_LICENSE("GPL");
64 64
65 /* 65 /*
66 * Ordering of locks: 66 * Ordering of locks:
67 * 67 *
68 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock 68 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
69 */ 69 */
70 70
71 DEFINE_SPINLOCK(kvm_lock); 71 DEFINE_SPINLOCK(kvm_lock);
72 LIST_HEAD(vm_list); 72 LIST_HEAD(vm_list);
73 73
74 static cpumask_var_t cpus_hardware_enabled; 74 static cpumask_var_t cpus_hardware_enabled;
75 static int kvm_usage_count = 0; 75 static int kvm_usage_count = 0;
76 static atomic_t hardware_enable_failed; 76 static atomic_t hardware_enable_failed;
77 77
78 struct kmem_cache *kvm_vcpu_cache; 78 struct kmem_cache *kvm_vcpu_cache;
79 EXPORT_SYMBOL_GPL(kvm_vcpu_cache); 79 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
80 80
81 static __read_mostly struct preempt_ops kvm_preempt_ops; 81 static __read_mostly struct preempt_ops kvm_preempt_ops;
82 82
83 struct dentry *kvm_debugfs_dir; 83 struct dentry *kvm_debugfs_dir;
84 84
85 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 85 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
86 unsigned long arg); 86 unsigned long arg);
87 static int hardware_enable_all(void); 87 static int hardware_enable_all(void);
88 static void hardware_disable_all(void); 88 static void hardware_disable_all(void);
89 89
90 static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 90 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
91 91
92 static bool kvm_rebooting; 92 static bool kvm_rebooting;
93 93
94 static bool largepages_enabled = true; 94 static bool largepages_enabled = true;
95 95
96 struct page *hwpoison_page; 96 struct page *hwpoison_page;
97 pfn_t hwpoison_pfn; 97 pfn_t hwpoison_pfn;
98 98
99 inline int kvm_is_mmio_pfn(pfn_t pfn) 99 inline int kvm_is_mmio_pfn(pfn_t pfn)
100 { 100 {
101 if (pfn_valid(pfn)) { 101 if (pfn_valid(pfn)) {
102 struct page *page = compound_head(pfn_to_page(pfn)); 102 struct page *page = compound_head(pfn_to_page(pfn));
103 return PageReserved(page); 103 return PageReserved(page);
104 } 104 }
105 105
106 return true; 106 return true;
107 } 107 }
108 108
109 /* 109 /*
110 * Switches to specified vcpu, until a matching vcpu_put() 110 * Switches to specified vcpu, until a matching vcpu_put()
111 */ 111 */
112 void vcpu_load(struct kvm_vcpu *vcpu) 112 void vcpu_load(struct kvm_vcpu *vcpu)
113 { 113 {
114 int cpu; 114 int cpu;
115 115
116 mutex_lock(&vcpu->mutex); 116 mutex_lock(&vcpu->mutex);
117 cpu = get_cpu(); 117 cpu = get_cpu();
118 preempt_notifier_register(&vcpu->preempt_notifier); 118 preempt_notifier_register(&vcpu->preempt_notifier);
119 kvm_arch_vcpu_load(vcpu, cpu); 119 kvm_arch_vcpu_load(vcpu, cpu);
120 put_cpu(); 120 put_cpu();
121 } 121 }
122 122
123 void vcpu_put(struct kvm_vcpu *vcpu) 123 void vcpu_put(struct kvm_vcpu *vcpu)
124 { 124 {
125 preempt_disable(); 125 preempt_disable();
126 kvm_arch_vcpu_put(vcpu); 126 kvm_arch_vcpu_put(vcpu);
127 preempt_notifier_unregister(&vcpu->preempt_notifier); 127 preempt_notifier_unregister(&vcpu->preempt_notifier);
128 preempt_enable(); 128 preempt_enable();
129 mutex_unlock(&vcpu->mutex); 129 mutex_unlock(&vcpu->mutex);
130 } 130 }
131 131
132 static void ack_flush(void *_completed) 132 static void ack_flush(void *_completed)
133 { 133 {
134 } 134 }
135 135
136 static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) 136 static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
137 { 137 {
138 int i, cpu, me; 138 int i, cpu, me;
139 cpumask_var_t cpus; 139 cpumask_var_t cpus;
140 bool called = true; 140 bool called = true;
141 struct kvm_vcpu *vcpu; 141 struct kvm_vcpu *vcpu;
142 142
143 zalloc_cpumask_var(&cpus, GFP_ATOMIC); 143 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
144 144
145 raw_spin_lock(&kvm->requests_lock); 145 raw_spin_lock(&kvm->requests_lock);
146 me = smp_processor_id(); 146 me = smp_processor_id();
147 kvm_for_each_vcpu(i, vcpu, kvm) { 147 kvm_for_each_vcpu(i, vcpu, kvm) {
148 if (test_and_set_bit(req, &vcpu->requests)) 148 if (test_and_set_bit(req, &vcpu->requests))
149 continue; 149 continue;
150 cpu = vcpu->cpu; 150 cpu = vcpu->cpu;
151 if (cpus != NULL && cpu != -1 && cpu != me) 151 if (cpus != NULL && cpu != -1 && cpu != me)
152 cpumask_set_cpu(cpu, cpus); 152 cpumask_set_cpu(cpu, cpus);
153 } 153 }
154 if (unlikely(cpus == NULL)) 154 if (unlikely(cpus == NULL))
155 smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); 155 smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
156 else if (!cpumask_empty(cpus)) 156 else if (!cpumask_empty(cpus))
157 smp_call_function_many(cpus, ack_flush, NULL, 1); 157 smp_call_function_many(cpus, ack_flush, NULL, 1);
158 else 158 else
159 called = false; 159 called = false;
160 raw_spin_unlock(&kvm->requests_lock); 160 raw_spin_unlock(&kvm->requests_lock);
161 free_cpumask_var(cpus); 161 free_cpumask_var(cpus);
162 return called; 162 return called;
163 } 163 }
164 164
165 void kvm_flush_remote_tlbs(struct kvm *kvm) 165 void kvm_flush_remote_tlbs(struct kvm *kvm)
166 { 166 {
167 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 167 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
168 ++kvm->stat.remote_tlb_flush; 168 ++kvm->stat.remote_tlb_flush;
169 } 169 }
170 170
171 void kvm_reload_remote_mmus(struct kvm *kvm) 171 void kvm_reload_remote_mmus(struct kvm *kvm)
172 { 172 {
173 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); 173 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
174 } 174 }
175 175
176 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 176 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
177 { 177 {
178 struct page *page; 178 struct page *page;
179 int r; 179 int r;
180 180
181 mutex_init(&vcpu->mutex); 181 mutex_init(&vcpu->mutex);
182 vcpu->cpu = -1; 182 vcpu->cpu = -1;
183 vcpu->kvm = kvm; 183 vcpu->kvm = kvm;
184 vcpu->vcpu_id = id; 184 vcpu->vcpu_id = id;
185 init_waitqueue_head(&vcpu->wq); 185 init_waitqueue_head(&vcpu->wq);
186 186
187 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 187 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
188 if (!page) { 188 if (!page) {
189 r = -ENOMEM; 189 r = -ENOMEM;
190 goto fail; 190 goto fail;
191 } 191 }
192 vcpu->run = page_address(page); 192 vcpu->run = page_address(page);
193 193
194 r = kvm_arch_vcpu_init(vcpu); 194 r = kvm_arch_vcpu_init(vcpu);
195 if (r < 0) 195 if (r < 0)
196 goto fail_free_run; 196 goto fail_free_run;
197 return 0; 197 return 0;
198 198
199 fail_free_run: 199 fail_free_run:
200 free_page((unsigned long)vcpu->run); 200 free_page((unsigned long)vcpu->run);
201 fail: 201 fail:
202 return r; 202 return r;
203 } 203 }
204 EXPORT_SYMBOL_GPL(kvm_vcpu_init); 204 EXPORT_SYMBOL_GPL(kvm_vcpu_init);
205 205
206 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) 206 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
207 { 207 {
208 kvm_arch_vcpu_uninit(vcpu); 208 kvm_arch_vcpu_uninit(vcpu);
209 free_page((unsigned long)vcpu->run); 209 free_page((unsigned long)vcpu->run);
210 } 210 }
211 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); 211 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
212 212
213 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 213 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
214 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) 214 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
215 { 215 {
216 return container_of(mn, struct kvm, mmu_notifier); 216 return container_of(mn, struct kvm, mmu_notifier);
217 } 217 }
218 218
219 static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, 219 static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
220 struct mm_struct *mm, 220 struct mm_struct *mm,
221 unsigned long address) 221 unsigned long address)
222 { 222 {
223 struct kvm *kvm = mmu_notifier_to_kvm(mn); 223 struct kvm *kvm = mmu_notifier_to_kvm(mn);
224 int need_tlb_flush, idx; 224 int need_tlb_flush, idx;
225 225
226 /* 226 /*
227 * When ->invalidate_page runs, the linux pte has been zapped 227 * When ->invalidate_page runs, the linux pte has been zapped
228 * already but the page is still allocated until 228 * already but the page is still allocated until
229 * ->invalidate_page returns. So if we increase the sequence 229 * ->invalidate_page returns. So if we increase the sequence
230 * here the kvm page fault will notice if the spte can't be 230 * here the kvm page fault will notice if the spte can't be
231 * established because the page is going to be freed. If 231 * established because the page is going to be freed. If
232 * instead the kvm page fault establishes the spte before 232 * instead the kvm page fault establishes the spte before
233 * ->invalidate_page runs, kvm_unmap_hva will release it 233 * ->invalidate_page runs, kvm_unmap_hva will release it
234 * before returning. 234 * before returning.
235 * 235 *
236 * The sequence increase only need to be seen at spin_unlock 236 * The sequence increase only need to be seen at spin_unlock
237 * time, and not at spin_lock time. 237 * time, and not at spin_lock time.
238 * 238 *
239 * Increasing the sequence after the spin_unlock would be 239 * Increasing the sequence after the spin_unlock would be
240 * unsafe because the kvm page fault could then establish the 240 * unsafe because the kvm page fault could then establish the
241 * pte after kvm_unmap_hva returned, without noticing the page 241 * pte after kvm_unmap_hva returned, without noticing the page
242 * is going to be freed. 242 * is going to be freed.
243 */ 243 */
244 idx = srcu_read_lock(&kvm->srcu); 244 idx = srcu_read_lock(&kvm->srcu);
245 spin_lock(&kvm->mmu_lock); 245 spin_lock(&kvm->mmu_lock);
246 kvm->mmu_notifier_seq++; 246 kvm->mmu_notifier_seq++;
247 need_tlb_flush = kvm_unmap_hva(kvm, address); 247 need_tlb_flush = kvm_unmap_hva(kvm, address);
248 spin_unlock(&kvm->mmu_lock); 248 spin_unlock(&kvm->mmu_lock);
249 srcu_read_unlock(&kvm->srcu, idx); 249 srcu_read_unlock(&kvm->srcu, idx);
250 250
251 /* we've to flush the tlb before the pages can be freed */ 251 /* we've to flush the tlb before the pages can be freed */
252 if (need_tlb_flush) 252 if (need_tlb_flush)
253 kvm_flush_remote_tlbs(kvm); 253 kvm_flush_remote_tlbs(kvm);
254 254
255 } 255 }
256 256
257 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, 257 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
258 struct mm_struct *mm, 258 struct mm_struct *mm,
259 unsigned long address, 259 unsigned long address,
260 pte_t pte) 260 pte_t pte)
261 { 261 {
262 struct kvm *kvm = mmu_notifier_to_kvm(mn); 262 struct kvm *kvm = mmu_notifier_to_kvm(mn);
263 int idx; 263 int idx;
264 264
265 idx = srcu_read_lock(&kvm->srcu); 265 idx = srcu_read_lock(&kvm->srcu);
266 spin_lock(&kvm->mmu_lock); 266 spin_lock(&kvm->mmu_lock);
267 kvm->mmu_notifier_seq++; 267 kvm->mmu_notifier_seq++;
268 kvm_set_spte_hva(kvm, address, pte); 268 kvm_set_spte_hva(kvm, address, pte);
269 spin_unlock(&kvm->mmu_lock); 269 spin_unlock(&kvm->mmu_lock);
270 srcu_read_unlock(&kvm->srcu, idx); 270 srcu_read_unlock(&kvm->srcu, idx);
271 } 271 }
272 272
273 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 273 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
274 struct mm_struct *mm, 274 struct mm_struct *mm,
275 unsigned long start, 275 unsigned long start,
276 unsigned long end) 276 unsigned long end)
277 { 277 {
278 struct kvm *kvm = mmu_notifier_to_kvm(mn); 278 struct kvm *kvm = mmu_notifier_to_kvm(mn);
279 int need_tlb_flush = 0, idx; 279 int need_tlb_flush = 0, idx;
280 280
281 idx = srcu_read_lock(&kvm->srcu); 281 idx = srcu_read_lock(&kvm->srcu);
282 spin_lock(&kvm->mmu_lock); 282 spin_lock(&kvm->mmu_lock);
283 /* 283 /*
284 * The count increase must become visible at unlock time as no 284 * The count increase must become visible at unlock time as no
285 * spte can be established without taking the mmu_lock and 285 * spte can be established without taking the mmu_lock and
286 * count is also read inside the mmu_lock critical section. 286 * count is also read inside the mmu_lock critical section.
287 */ 287 */
288 kvm->mmu_notifier_count++; 288 kvm->mmu_notifier_count++;
289 for (; start < end; start += PAGE_SIZE) 289 for (; start < end; start += PAGE_SIZE)
290 need_tlb_flush |= kvm_unmap_hva(kvm, start); 290 need_tlb_flush |= kvm_unmap_hva(kvm, start);
291 spin_unlock(&kvm->mmu_lock); 291 spin_unlock(&kvm->mmu_lock);
292 srcu_read_unlock(&kvm->srcu, idx); 292 srcu_read_unlock(&kvm->srcu, idx);
293 293
294 /* we've to flush the tlb before the pages can be freed */ 294 /* we've to flush the tlb before the pages can be freed */
295 if (need_tlb_flush) 295 if (need_tlb_flush)
296 kvm_flush_remote_tlbs(kvm); 296 kvm_flush_remote_tlbs(kvm);
297 } 297 }
298 298
299 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, 299 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
300 struct mm_struct *mm, 300 struct mm_struct *mm,
301 unsigned long start, 301 unsigned long start,
302 unsigned long end) 302 unsigned long end)
303 { 303 {
304 struct kvm *kvm = mmu_notifier_to_kvm(mn); 304 struct kvm *kvm = mmu_notifier_to_kvm(mn);
305 305
306 spin_lock(&kvm->mmu_lock); 306 spin_lock(&kvm->mmu_lock);
307 /* 307 /*
308 * This sequence increase will notify the kvm page fault that 308 * This sequence increase will notify the kvm page fault that
309 * the page that is going to be mapped in the spte could have 309 * the page that is going to be mapped in the spte could have
310 * been freed. 310 * been freed.
311 */ 311 */
312 kvm->mmu_notifier_seq++; 312 kvm->mmu_notifier_seq++;
313 /* 313 /*
314 * The above sequence increase must be visible before the 314 * The above sequence increase must be visible before the
315 * below count decrease but both values are read by the kvm 315 * below count decrease but both values are read by the kvm
316 * page fault under mmu_lock spinlock so we don't need to add 316 * page fault under mmu_lock spinlock so we don't need to add
317 * a smb_wmb() here in between the two. 317 * a smb_wmb() here in between the two.
318 */ 318 */
319 kvm->mmu_notifier_count--; 319 kvm->mmu_notifier_count--;
320 spin_unlock(&kvm->mmu_lock); 320 spin_unlock(&kvm->mmu_lock);
321 321
322 BUG_ON(kvm->mmu_notifier_count < 0); 322 BUG_ON(kvm->mmu_notifier_count < 0);
323 } 323 }
324 324
325 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 325 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
326 struct mm_struct *mm, 326 struct mm_struct *mm,
327 unsigned long address) 327 unsigned long address)
328 { 328 {
329 struct kvm *kvm = mmu_notifier_to_kvm(mn); 329 struct kvm *kvm = mmu_notifier_to_kvm(mn);
330 int young, idx; 330 int young, idx;
331 331
332 idx = srcu_read_lock(&kvm->srcu); 332 idx = srcu_read_lock(&kvm->srcu);
333 spin_lock(&kvm->mmu_lock); 333 spin_lock(&kvm->mmu_lock);
334 young = kvm_age_hva(kvm, address); 334 young = kvm_age_hva(kvm, address);
335 spin_unlock(&kvm->mmu_lock); 335 spin_unlock(&kvm->mmu_lock);
336 srcu_read_unlock(&kvm->srcu, idx); 336 srcu_read_unlock(&kvm->srcu, idx);
337 337
338 if (young) 338 if (young)
339 kvm_flush_remote_tlbs(kvm); 339 kvm_flush_remote_tlbs(kvm);
340 340
341 return young; 341 return young;
342 } 342 }
343 343
344 static void kvm_mmu_notifier_release(struct mmu_notifier *mn, 344 static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
345 struct mm_struct *mm) 345 struct mm_struct *mm)
346 { 346 {
347 struct kvm *kvm = mmu_notifier_to_kvm(mn); 347 struct kvm *kvm = mmu_notifier_to_kvm(mn);
348 int idx; 348 int idx;
349 349
350 idx = srcu_read_lock(&kvm->srcu); 350 idx = srcu_read_lock(&kvm->srcu);
351 kvm_arch_flush_shadow(kvm); 351 kvm_arch_flush_shadow(kvm);
352 srcu_read_unlock(&kvm->srcu, idx); 352 srcu_read_unlock(&kvm->srcu, idx);
353 } 353 }
354 354
355 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { 355 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
356 .invalidate_page = kvm_mmu_notifier_invalidate_page, 356 .invalidate_page = kvm_mmu_notifier_invalidate_page,
357 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, 357 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
358 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, 358 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
359 .clear_flush_young = kvm_mmu_notifier_clear_flush_young, 359 .clear_flush_young = kvm_mmu_notifier_clear_flush_young,
360 .change_pte = kvm_mmu_notifier_change_pte, 360 .change_pte = kvm_mmu_notifier_change_pte,
361 .release = kvm_mmu_notifier_release, 361 .release = kvm_mmu_notifier_release,
362 }; 362 };
363 363
364 static int kvm_init_mmu_notifier(struct kvm *kvm) 364 static int kvm_init_mmu_notifier(struct kvm *kvm)
365 { 365 {
366 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; 366 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
367 return mmu_notifier_register(&kvm->mmu_notifier, current->mm); 367 return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
368 } 368 }
369 369
370 #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ 370 #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
371 371
372 static int kvm_init_mmu_notifier(struct kvm *kvm) 372 static int kvm_init_mmu_notifier(struct kvm *kvm)
373 { 373 {
374 return 0; 374 return 0;
375 } 375 }
376 376
377 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 377 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
378 378
379 static struct kvm *kvm_create_vm(void) 379 static struct kvm *kvm_create_vm(void)
380 { 380 {
381 int r = 0, i; 381 int r = 0, i;
382 struct kvm *kvm = kvm_arch_create_vm(); 382 struct kvm *kvm = kvm_arch_create_vm();
383 383
384 if (IS_ERR(kvm)) 384 if (IS_ERR(kvm))
385 goto out; 385 goto out;
386 386
387 r = hardware_enable_all(); 387 r = hardware_enable_all();
388 if (r) 388 if (r)
389 goto out_err_nodisable; 389 goto out_err_nodisable;
390 390
391 #ifdef CONFIG_HAVE_KVM_IRQCHIP 391 #ifdef CONFIG_HAVE_KVM_IRQCHIP
392 INIT_HLIST_HEAD(&kvm->mask_notifier_list); 392 INIT_HLIST_HEAD(&kvm->mask_notifier_list);
393 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 393 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
394 #endif 394 #endif
395 395
396 r = -ENOMEM; 396 r = -ENOMEM;
397 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 397 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
398 if (!kvm->memslots) 398 if (!kvm->memslots)
399 goto out_err; 399 goto out_err;
400 if (init_srcu_struct(&kvm->srcu)) 400 if (init_srcu_struct(&kvm->srcu))
401 goto out_err; 401 goto out_err;
402 for (i = 0; i < KVM_NR_BUSES; i++) { 402 for (i = 0; i < KVM_NR_BUSES; i++) {
403 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), 403 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
404 GFP_KERNEL); 404 GFP_KERNEL);
405 if (!kvm->buses[i]) { 405 if (!kvm->buses[i]) {
406 cleanup_srcu_struct(&kvm->srcu); 406 cleanup_srcu_struct(&kvm->srcu);
407 goto out_err; 407 goto out_err;
408 } 408 }
409 } 409 }
410 410
411 r = kvm_init_mmu_notifier(kvm); 411 r = kvm_init_mmu_notifier(kvm);
412 if (r) { 412 if (r) {
413 cleanup_srcu_struct(&kvm->srcu); 413 cleanup_srcu_struct(&kvm->srcu);
414 goto out_err; 414 goto out_err;
415 } 415 }
416 416
417 kvm->mm = current->mm; 417 kvm->mm = current->mm;
418 atomic_inc(&kvm->mm->mm_count); 418 atomic_inc(&kvm->mm->mm_count);
419 spin_lock_init(&kvm->mmu_lock); 419 spin_lock_init(&kvm->mmu_lock);
420 raw_spin_lock_init(&kvm->requests_lock); 420 raw_spin_lock_init(&kvm->requests_lock);
421 kvm_eventfd_init(kvm); 421 kvm_eventfd_init(kvm);
422 mutex_init(&kvm->lock); 422 mutex_init(&kvm->lock);
423 mutex_init(&kvm->irq_lock); 423 mutex_init(&kvm->irq_lock);
424 mutex_init(&kvm->slots_lock); 424 mutex_init(&kvm->slots_lock);
425 atomic_set(&kvm->users_count, 1); 425 atomic_set(&kvm->users_count, 1);
426 spin_lock(&kvm_lock); 426 spin_lock(&kvm_lock);
427 list_add(&kvm->vm_list, &vm_list); 427 list_add(&kvm->vm_list, &vm_list);
428 spin_unlock(&kvm_lock); 428 spin_unlock(&kvm_lock);
429 out: 429 out:
430 return kvm; 430 return kvm;
431 431
432 out_err: 432 out_err:
433 hardware_disable_all(); 433 hardware_disable_all();
434 out_err_nodisable: 434 out_err_nodisable:
435 for (i = 0; i < KVM_NR_BUSES; i++) 435 for (i = 0; i < KVM_NR_BUSES; i++)
436 kfree(kvm->buses[i]); 436 kfree(kvm->buses[i]);
437 kfree(kvm->memslots); 437 kfree(kvm->memslots);
438 kfree(kvm); 438 kfree(kvm);
439 return ERR_PTR(r); 439 return ERR_PTR(r);
440 } 440 }
441 441
442 /* 442 /*
443 * Free any memory in @free but not in @dont. 443 * Free any memory in @free but not in @dont.
444 */ 444 */
445 static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 445 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
446 struct kvm_memory_slot *dont) 446 struct kvm_memory_slot *dont)
447 { 447 {
448 int i; 448 int i;
449 449
450 if (!dont || free->rmap != dont->rmap) 450 if (!dont || free->rmap != dont->rmap)
451 vfree(free->rmap); 451 vfree(free->rmap);
452 452
453 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 453 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
454 vfree(free->dirty_bitmap); 454 vfree(free->dirty_bitmap);
455 455
456 456
457 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { 457 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
458 if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { 458 if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
459 vfree(free->lpage_info[i]); 459 vfree(free->lpage_info[i]);
460 free->lpage_info[i] = NULL; 460 free->lpage_info[i] = NULL;
461 } 461 }
462 } 462 }
463 463
464 free->npages = 0; 464 free->npages = 0;
465 free->dirty_bitmap = NULL; 465 free->dirty_bitmap = NULL;
466 free->rmap = NULL; 466 free->rmap = NULL;
467 } 467 }
468 468
469 void kvm_free_physmem(struct kvm *kvm) 469 void kvm_free_physmem(struct kvm *kvm)
470 { 470 {
471 int i; 471 int i;
472 struct kvm_memslots *slots = kvm->memslots; 472 struct kvm_memslots *slots = kvm->memslots;
473 473
474 for (i = 0; i < slots->nmemslots; ++i) 474 for (i = 0; i < slots->nmemslots; ++i)
475 kvm_free_physmem_slot(&slots->memslots[i], NULL); 475 kvm_free_physmem_slot(&slots->memslots[i], NULL);
476 476
477 kfree(kvm->memslots); 477 kfree(kvm->memslots);
478 } 478 }
479 479
480 static void kvm_destroy_vm(struct kvm *kvm) 480 static void kvm_destroy_vm(struct kvm *kvm)
481 { 481 {
482 int i; 482 int i;
483 struct mm_struct *mm = kvm->mm; 483 struct mm_struct *mm = kvm->mm;
484 484
485 kvm_arch_sync_events(kvm); 485 kvm_arch_sync_events(kvm);
486 spin_lock(&kvm_lock); 486 spin_lock(&kvm_lock);
487 list_del(&kvm->vm_list); 487 list_del(&kvm->vm_list);
488 spin_unlock(&kvm_lock); 488 spin_unlock(&kvm_lock);
489 kvm_free_irq_routing(kvm); 489 kvm_free_irq_routing(kvm);
490 for (i = 0; i < KVM_NR_BUSES; i++) 490 for (i = 0; i < KVM_NR_BUSES; i++)
491 kvm_io_bus_destroy(kvm->buses[i]); 491 kvm_io_bus_destroy(kvm->buses[i]);
492 kvm_coalesced_mmio_free(kvm); 492 kvm_coalesced_mmio_free(kvm);
493 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 493 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
494 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 494 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
495 #else 495 #else
496 kvm_arch_flush_shadow(kvm); 496 kvm_arch_flush_shadow(kvm);
497 #endif 497 #endif
498 kvm_arch_destroy_vm(kvm); 498 kvm_arch_destroy_vm(kvm);
499 hardware_disable_all(); 499 hardware_disable_all();
500 mmdrop(mm); 500 mmdrop(mm);
501 } 501 }
502 502
503 void kvm_get_kvm(struct kvm *kvm) 503 void kvm_get_kvm(struct kvm *kvm)
504 { 504 {
505 atomic_inc(&kvm->users_count); 505 atomic_inc(&kvm->users_count);
506 } 506 }
507 EXPORT_SYMBOL_GPL(kvm_get_kvm); 507 EXPORT_SYMBOL_GPL(kvm_get_kvm);
508 508
509 void kvm_put_kvm(struct kvm *kvm) 509 void kvm_put_kvm(struct kvm *kvm)
510 { 510 {
511 if (atomic_dec_and_test(&kvm->users_count)) 511 if (atomic_dec_and_test(&kvm->users_count))
512 kvm_destroy_vm(kvm); 512 kvm_destroy_vm(kvm);
513 } 513 }
514 EXPORT_SYMBOL_GPL(kvm_put_kvm); 514 EXPORT_SYMBOL_GPL(kvm_put_kvm);
515 515
516 516
517 static int kvm_vm_release(struct inode *inode, struct file *filp) 517 static int kvm_vm_release(struct inode *inode, struct file *filp)
518 { 518 {
519 struct kvm *kvm = filp->private_data; 519 struct kvm *kvm = filp->private_data;
520 520
521 kvm_irqfd_release(kvm); 521 kvm_irqfd_release(kvm);
522 522
523 kvm_put_kvm(kvm); 523 kvm_put_kvm(kvm);
524 return 0; 524 return 0;
525 } 525 }
526 526
527 /* 527 /*
528 * Allocate some memory and give it an address in the guest physical address 528 * Allocate some memory and give it an address in the guest physical address
529 * space. 529 * space.
530 * 530 *
531 * Discontiguous memory is allowed, mostly for framebuffers. 531 * Discontiguous memory is allowed, mostly for framebuffers.
532 * 532 *
533 * Must be called holding mmap_sem for write. 533 * Must be called holding mmap_sem for write.
534 */ 534 */
535 int __kvm_set_memory_region(struct kvm *kvm, 535 int __kvm_set_memory_region(struct kvm *kvm,
536 struct kvm_userspace_memory_region *mem, 536 struct kvm_userspace_memory_region *mem,
537 int user_alloc) 537 int user_alloc)
538 { 538 {
539 int r, flush_shadow = 0; 539 int r, flush_shadow = 0;
540 gfn_t base_gfn; 540 gfn_t base_gfn;
541 unsigned long npages; 541 unsigned long npages;
542 unsigned long i; 542 unsigned long i;
543 struct kvm_memory_slot *memslot; 543 struct kvm_memory_slot *memslot;
544 struct kvm_memory_slot old, new; 544 struct kvm_memory_slot old, new;
545 struct kvm_memslots *slots, *old_memslots; 545 struct kvm_memslots *slots, *old_memslots;
546 546
547 r = -EINVAL; 547 r = -EINVAL;
548 /* General sanity checks */ 548 /* General sanity checks */
549 if (mem->memory_size & (PAGE_SIZE - 1)) 549 if (mem->memory_size & (PAGE_SIZE - 1))
550 goto out; 550 goto out;
551 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 551 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
552 goto out; 552 goto out;
553 if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) 553 if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
554 goto out; 554 goto out;
555 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 555 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
556 goto out; 556 goto out;
557 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 557 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
558 goto out; 558 goto out;
559 559
560 memslot = &kvm->memslots->memslots[mem->slot]; 560 memslot = &kvm->memslots->memslots[mem->slot];
561 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 561 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
562 npages = mem->memory_size >> PAGE_SHIFT; 562 npages = mem->memory_size >> PAGE_SHIFT;
563 563
564 r = -EINVAL; 564 r = -EINVAL;
565 if (npages > KVM_MEM_MAX_NR_PAGES) 565 if (npages > KVM_MEM_MAX_NR_PAGES)
566 goto out; 566 goto out;
567 567
568 if (!npages) 568 if (!npages)
569 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; 569 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
570 570
571 new = old = *memslot; 571 new = old = *memslot;
572 572
573 new.base_gfn = base_gfn; 573 new.base_gfn = base_gfn;
574 new.npages = npages; 574 new.npages = npages;
575 new.flags = mem->flags; 575 new.flags = mem->flags;
576 576
577 /* Disallow changing a memory slot's size. */ 577 /* Disallow changing a memory slot's size. */
578 r = -EINVAL; 578 r = -EINVAL;
579 if (npages && old.npages && npages != old.npages) 579 if (npages && old.npages && npages != old.npages)
580 goto out_free; 580 goto out_free;
581 581
582 /* Check for overlaps */ 582 /* Check for overlaps */
583 r = -EEXIST; 583 r = -EEXIST;
584 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 584 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
585 struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; 585 struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
586 586
587 if (s == memslot || !s->npages) 587 if (s == memslot || !s->npages)
588 continue; 588 continue;
589 if (!((base_gfn + npages <= s->base_gfn) || 589 if (!((base_gfn + npages <= s->base_gfn) ||
590 (base_gfn >= s->base_gfn + s->npages))) 590 (base_gfn >= s->base_gfn + s->npages)))
591 goto out_free; 591 goto out_free;
592 } 592 }
593 593
594 /* Free page dirty bitmap if unneeded */ 594 /* Free page dirty bitmap if unneeded */
595 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) 595 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
596 new.dirty_bitmap = NULL; 596 new.dirty_bitmap = NULL;
597 597
598 r = -ENOMEM; 598 r = -ENOMEM;
599 599
600 /* Allocate if a slot is being created */ 600 /* Allocate if a slot is being created */
601 #ifndef CONFIG_S390 601 #ifndef CONFIG_S390
602 if (npages && !new.rmap) { 602 if (npages && !new.rmap) {
603 new.rmap = vmalloc(npages * sizeof(*new.rmap)); 603 new.rmap = vmalloc(npages * sizeof(*new.rmap));
604 604
605 if (!new.rmap) 605 if (!new.rmap)
606 goto out_free; 606 goto out_free;
607 607
608 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 608 memset(new.rmap, 0, npages * sizeof(*new.rmap));
609 609
610 new.user_alloc = user_alloc; 610 new.user_alloc = user_alloc;
611 new.userspace_addr = mem->userspace_addr; 611 new.userspace_addr = mem->userspace_addr;
612 } 612 }
613 if (!npages) 613 if (!npages)
614 goto skip_lpage; 614 goto skip_lpage;
615 615
616 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { 616 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
617 unsigned long ugfn; 617 unsigned long ugfn;
618 unsigned long j; 618 unsigned long j;
619 int lpages; 619 int lpages;
620 int level = i + 2; 620 int level = i + 2;
621 621
622 /* Avoid unused variable warning if no large pages */ 622 /* Avoid unused variable warning if no large pages */
623 (void)level; 623 (void)level;
624 624
625 if (new.lpage_info[i]) 625 if (new.lpage_info[i])
626 continue; 626 continue;
627 627
628 lpages = 1 + (base_gfn + npages - 1) / 628 lpages = 1 + (base_gfn + npages - 1) /
629 KVM_PAGES_PER_HPAGE(level); 629 KVM_PAGES_PER_HPAGE(level);
630 lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); 630 lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level);
631 631
632 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); 632 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
633 633
634 if (!new.lpage_info[i]) 634 if (!new.lpage_info[i])
635 goto out_free; 635 goto out_free;
636 636
637 memset(new.lpage_info[i], 0, 637 memset(new.lpage_info[i], 0,
638 lpages * sizeof(*new.lpage_info[i])); 638 lpages * sizeof(*new.lpage_info[i]));
639 639
640 if (base_gfn % KVM_PAGES_PER_HPAGE(level)) 640 if (base_gfn % KVM_PAGES_PER_HPAGE(level))
641 new.lpage_info[i][0].write_count = 1; 641 new.lpage_info[i][0].write_count = 1;
642 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) 642 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level))
643 new.lpage_info[i][lpages - 1].write_count = 1; 643 new.lpage_info[i][lpages - 1].write_count = 1;
644 ugfn = new.userspace_addr >> PAGE_SHIFT; 644 ugfn = new.userspace_addr >> PAGE_SHIFT;
645 /* 645 /*
646 * If the gfn and userspace address are not aligned wrt each 646 * If the gfn and userspace address are not aligned wrt each
647 * other, or if explicitly asked to, disable large page 647 * other, or if explicitly asked to, disable large page
648 * support for this slot 648 * support for this slot
649 */ 649 */
650 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || 650 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
651 !largepages_enabled) 651 !largepages_enabled)
652 for (j = 0; j < lpages; ++j) 652 for (j = 0; j < lpages; ++j)
653 new.lpage_info[i][j].write_count = 1; 653 new.lpage_info[i][j].write_count = 1;
654 } 654 }
655 655
656 skip_lpage: 656 skip_lpage:
657 657
658 /* Allocate page dirty bitmap if needed */ 658 /* Allocate page dirty bitmap if needed */
659 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 659 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
660 unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); 660 unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
661 661
662 new.dirty_bitmap = vmalloc(dirty_bytes); 662 new.dirty_bitmap = vmalloc(dirty_bytes);
663 if (!new.dirty_bitmap) 663 if (!new.dirty_bitmap)
664 goto out_free; 664 goto out_free;
665 memset(new.dirty_bitmap, 0, dirty_bytes); 665 memset(new.dirty_bitmap, 0, dirty_bytes);
666 /* destroy any largepage mappings for dirty tracking */ 666 /* destroy any largepage mappings for dirty tracking */
667 if (old.npages) 667 if (old.npages)
668 flush_shadow = 1; 668 flush_shadow = 1;
669 } 669 }
670 #else /* not defined CONFIG_S390 */ 670 #else /* not defined CONFIG_S390 */
671 new.user_alloc = user_alloc; 671 new.user_alloc = user_alloc;
672 if (user_alloc) 672 if (user_alloc)
673 new.userspace_addr = mem->userspace_addr; 673 new.userspace_addr = mem->userspace_addr;
674 #endif /* not defined CONFIG_S390 */ 674 #endif /* not defined CONFIG_S390 */
675 675
676 if (!npages) { 676 if (!npages) {
677 r = -ENOMEM; 677 r = -ENOMEM;
678 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 678 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
679 if (!slots) 679 if (!slots)
680 goto out_free; 680 goto out_free;
681 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 681 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
682 if (mem->slot >= slots->nmemslots) 682 if (mem->slot >= slots->nmemslots)
683 slots->nmemslots = mem->slot + 1; 683 slots->nmemslots = mem->slot + 1;
684 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; 684 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
685 685
686 old_memslots = kvm->memslots; 686 old_memslots = kvm->memslots;
687 rcu_assign_pointer(kvm->memslots, slots); 687 rcu_assign_pointer(kvm->memslots, slots);
688 synchronize_srcu_expedited(&kvm->srcu); 688 synchronize_srcu_expedited(&kvm->srcu);
689 /* From this point no new shadow pages pointing to a deleted 689 /* From this point no new shadow pages pointing to a deleted
690 * memslot will be created. 690 * memslot will be created.
691 * 691 *
692 * validation of sp->gfn happens in: 692 * validation of sp->gfn happens in:
693 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) 693 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
694 * - kvm_is_visible_gfn (mmu_check_roots) 694 * - kvm_is_visible_gfn (mmu_check_roots)
695 */ 695 */
696 kvm_arch_flush_shadow(kvm); 696 kvm_arch_flush_shadow(kvm);
697 kfree(old_memslots); 697 kfree(old_memslots);
698 } 698 }
699 699
700 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); 700 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
701 if (r) 701 if (r)
702 goto out_free; 702 goto out_free;
703 703
704 #ifdef CONFIG_DMAR 704 #ifdef CONFIG_DMAR
705 /* map the pages in iommu page table */ 705 /* map the pages in iommu page table */
706 if (npages) { 706 if (npages) {
707 r = kvm_iommu_map_pages(kvm, &new); 707 r = kvm_iommu_map_pages(kvm, &new);
708 if (r) 708 if (r)
709 goto out_free; 709 goto out_free;
710 } 710 }
711 #endif 711 #endif
712 712
713 r = -ENOMEM; 713 r = -ENOMEM;
714 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 714 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
715 if (!slots) 715 if (!slots)
716 goto out_free; 716 goto out_free;
717 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 717 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
718 if (mem->slot >= slots->nmemslots) 718 if (mem->slot >= slots->nmemslots)
719 slots->nmemslots = mem->slot + 1; 719 slots->nmemslots = mem->slot + 1;
720 720
721 /* actual memory is freed via old in kvm_free_physmem_slot below */ 721 /* actual memory is freed via old in kvm_free_physmem_slot below */
722 if (!npages) { 722 if (!npages) {
723 new.rmap = NULL; 723 new.rmap = NULL;
724 new.dirty_bitmap = NULL; 724 new.dirty_bitmap = NULL;
725 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) 725 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
726 new.lpage_info[i] = NULL; 726 new.lpage_info[i] = NULL;
727 } 727 }
728 728
729 slots->memslots[mem->slot] = new; 729 slots->memslots[mem->slot] = new;
730 old_memslots = kvm->memslots; 730 old_memslots = kvm->memslots;
731 rcu_assign_pointer(kvm->memslots, slots); 731 rcu_assign_pointer(kvm->memslots, slots);
732 synchronize_srcu_expedited(&kvm->srcu); 732 synchronize_srcu_expedited(&kvm->srcu);
733 733
734 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); 734 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
735 735
736 kvm_free_physmem_slot(&old, &new); 736 kvm_free_physmem_slot(&old, &new);
737 kfree(old_memslots); 737 kfree(old_memslots);
738 738
739 if (flush_shadow) 739 if (flush_shadow)
740 kvm_arch_flush_shadow(kvm); 740 kvm_arch_flush_shadow(kvm);
741 741
742 return 0; 742 return 0;
743 743
744 out_free: 744 out_free:
745 kvm_free_physmem_slot(&new, &old); 745 kvm_free_physmem_slot(&new, &old);
746 out: 746 out:
747 return r; 747 return r;
748 748
749 } 749 }
750 EXPORT_SYMBOL_GPL(__kvm_set_memory_region); 750 EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
751 751
752 int kvm_set_memory_region(struct kvm *kvm, 752 int kvm_set_memory_region(struct kvm *kvm,
753 struct kvm_userspace_memory_region *mem, 753 struct kvm_userspace_memory_region *mem,
754 int user_alloc) 754 int user_alloc)
755 { 755 {
756 int r; 756 int r;
757 757
758 mutex_lock(&kvm->slots_lock); 758 mutex_lock(&kvm->slots_lock);
759 r = __kvm_set_memory_region(kvm, mem, user_alloc); 759 r = __kvm_set_memory_region(kvm, mem, user_alloc);
760 mutex_unlock(&kvm->slots_lock); 760 mutex_unlock(&kvm->slots_lock);
761 return r; 761 return r;
762 } 762 }
763 EXPORT_SYMBOL_GPL(kvm_set_memory_region); 763 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
764 764
765 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, 765 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
766 struct 766 struct
767 kvm_userspace_memory_region *mem, 767 kvm_userspace_memory_region *mem,
768 int user_alloc) 768 int user_alloc)
769 { 769 {
770 if (mem->slot >= KVM_MEMORY_SLOTS) 770 if (mem->slot >= KVM_MEMORY_SLOTS)
771 return -EINVAL; 771 return -EINVAL;
772 return kvm_set_memory_region(kvm, mem, user_alloc); 772 return kvm_set_memory_region(kvm, mem, user_alloc);
773 } 773 }
774 774
775 int kvm_get_dirty_log(struct kvm *kvm, 775 int kvm_get_dirty_log(struct kvm *kvm,
776 struct kvm_dirty_log *log, int *is_dirty) 776 struct kvm_dirty_log *log, int *is_dirty)
777 { 777 {
778 struct kvm_memory_slot *memslot; 778 struct kvm_memory_slot *memslot;
779 int r, i; 779 int r, i;
780 unsigned long n; 780 unsigned long n;
781 unsigned long any = 0; 781 unsigned long any = 0;
782 782
783 r = -EINVAL; 783 r = -EINVAL;
784 if (log->slot >= KVM_MEMORY_SLOTS) 784 if (log->slot >= KVM_MEMORY_SLOTS)
785 goto out; 785 goto out;
786 786
787 memslot = &kvm->memslots->memslots[log->slot]; 787 memslot = &kvm->memslots->memslots[log->slot];
788 r = -ENOENT; 788 r = -ENOENT;
789 if (!memslot->dirty_bitmap) 789 if (!memslot->dirty_bitmap)
790 goto out; 790 goto out;
791 791
792 n = kvm_dirty_bitmap_bytes(memslot); 792 n = kvm_dirty_bitmap_bytes(memslot);
793 793
794 for (i = 0; !any && i < n/sizeof(long); ++i) 794 for (i = 0; !any && i < n/sizeof(long); ++i)
795 any = memslot->dirty_bitmap[i]; 795 any = memslot->dirty_bitmap[i];
796 796
797 r = -EFAULT; 797 r = -EFAULT;
798 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 798 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
799 goto out; 799 goto out;
800 800
801 if (any) 801 if (any)
802 *is_dirty = 1; 802 *is_dirty = 1;
803 803
804 r = 0; 804 r = 0;
805 out: 805 out:
806 return r; 806 return r;
807 } 807 }
808 808
809 void kvm_disable_largepages(void) 809 void kvm_disable_largepages(void)
810 { 810 {
811 largepages_enabled = false; 811 largepages_enabled = false;
812 } 812 }
813 EXPORT_SYMBOL_GPL(kvm_disable_largepages); 813 EXPORT_SYMBOL_GPL(kvm_disable_largepages);
814 814
815 int is_error_page(struct page *page) 815 int is_error_page(struct page *page)
816 { 816 {
817 return page == bad_page || page == hwpoison_page; 817 return page == bad_page || page == hwpoison_page;
818 } 818 }
819 EXPORT_SYMBOL_GPL(is_error_page); 819 EXPORT_SYMBOL_GPL(is_error_page);
820 820
821 int is_error_pfn(pfn_t pfn) 821 int is_error_pfn(pfn_t pfn)
822 { 822 {
823 return pfn == bad_pfn || pfn == hwpoison_pfn; 823 return pfn == bad_pfn || pfn == hwpoison_pfn;
824 } 824 }
825 EXPORT_SYMBOL_GPL(is_error_pfn); 825 EXPORT_SYMBOL_GPL(is_error_pfn);
826 826
827 int is_hwpoison_pfn(pfn_t pfn) 827 int is_hwpoison_pfn(pfn_t pfn)
828 { 828 {
829 return pfn == hwpoison_pfn; 829 return pfn == hwpoison_pfn;
830 } 830 }
831 EXPORT_SYMBOL_GPL(is_hwpoison_pfn); 831 EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
832 832
833 static inline unsigned long bad_hva(void) 833 static inline unsigned long bad_hva(void)
834 { 834 {
835 return PAGE_OFFSET; 835 return PAGE_OFFSET;
836 } 836 }
837 837
838 int kvm_is_error_hva(unsigned long addr) 838 int kvm_is_error_hva(unsigned long addr)
839 { 839 {
840 return addr == bad_hva(); 840 return addr == bad_hva();
841 } 841 }
842 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 842 EXPORT_SYMBOL_GPL(kvm_is_error_hva);
843 843
844 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 844 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
845 { 845 {
846 int i; 846 int i;
847 struct kvm_memslots *slots = kvm_memslots(kvm); 847 struct kvm_memslots *slots = kvm_memslots(kvm);
848 848
849 for (i = 0; i < slots->nmemslots; ++i) { 849 for (i = 0; i < slots->nmemslots; ++i) {
850 struct kvm_memory_slot *memslot = &slots->memslots[i]; 850 struct kvm_memory_slot *memslot = &slots->memslots[i];
851 851
852 if (gfn >= memslot->base_gfn 852 if (gfn >= memslot->base_gfn
853 && gfn < memslot->base_gfn + memslot->npages) 853 && gfn < memslot->base_gfn + memslot->npages)
854 return memslot; 854 return memslot;
855 } 855 }
856 return NULL; 856 return NULL;
857 } 857 }
858 EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); 858 EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
859 859
860 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 860 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
861 { 861 {
862 gfn = unalias_gfn(kvm, gfn); 862 gfn = unalias_gfn(kvm, gfn);
863 return gfn_to_memslot_unaliased(kvm, gfn); 863 return gfn_to_memslot_unaliased(kvm, gfn);
864 } 864 }
865 865
866 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 866 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
867 { 867 {
868 int i; 868 int i;
869 struct kvm_memslots *slots = kvm_memslots(kvm); 869 struct kvm_memslots *slots = kvm_memslots(kvm);
870 870
871 gfn = unalias_gfn_instantiation(kvm, gfn); 871 gfn = unalias_gfn_instantiation(kvm, gfn);
872 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 872 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
873 struct kvm_memory_slot *memslot = &slots->memslots[i]; 873 struct kvm_memory_slot *memslot = &slots->memslots[i];
874 874
875 if (memslot->flags & KVM_MEMSLOT_INVALID) 875 if (memslot->flags & KVM_MEMSLOT_INVALID)
876 continue; 876 continue;
877 877
878 if (gfn >= memslot->base_gfn 878 if (gfn >= memslot->base_gfn
879 && gfn < memslot->base_gfn + memslot->npages) 879 && gfn < memslot->base_gfn + memslot->npages)
880 return 1; 880 return 1;
881 } 881 }
882 return 0; 882 return 0;
883 } 883 }
884 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 884 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
885 885
886 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) 886 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
887 { 887 {
888 struct vm_area_struct *vma; 888 struct vm_area_struct *vma;
889 unsigned long addr, size; 889 unsigned long addr, size;
890 890
891 size = PAGE_SIZE; 891 size = PAGE_SIZE;
892 892
893 addr = gfn_to_hva(kvm, gfn); 893 addr = gfn_to_hva(kvm, gfn);
894 if (kvm_is_error_hva(addr)) 894 if (kvm_is_error_hva(addr))
895 return PAGE_SIZE; 895 return PAGE_SIZE;
896 896
897 down_read(&current->mm->mmap_sem); 897 down_read(&current->mm->mmap_sem);
898 vma = find_vma(current->mm, addr); 898 vma = find_vma(current->mm, addr);
899 if (!vma) 899 if (!vma)
900 goto out; 900 goto out;
901 901
902 size = vma_kernel_pagesize(vma); 902 size = vma_kernel_pagesize(vma);
903 903
904 out: 904 out:
905 up_read(&current->mm->mmap_sem); 905 up_read(&current->mm->mmap_sem);
906 906
907 return size; 907 return size;
908 } 908 }
909 909
910 int memslot_id(struct kvm *kvm, gfn_t gfn) 910 int memslot_id(struct kvm *kvm, gfn_t gfn)
911 { 911 {
912 int i; 912 int i;
913 struct kvm_memslots *slots = kvm_memslots(kvm); 913 struct kvm_memslots *slots = kvm_memslots(kvm);
914 struct kvm_memory_slot *memslot = NULL; 914 struct kvm_memory_slot *memslot = NULL;
915 915
916 gfn = unalias_gfn(kvm, gfn); 916 gfn = unalias_gfn(kvm, gfn);
917 for (i = 0; i < slots->nmemslots; ++i) { 917 for (i = 0; i < slots->nmemslots; ++i) {
918 memslot = &slots->memslots[i]; 918 memslot = &slots->memslots[i];
919 919
920 if (gfn >= memslot->base_gfn 920 if (gfn >= memslot->base_gfn
921 && gfn < memslot->base_gfn + memslot->npages) 921 && gfn < memslot->base_gfn + memslot->npages)
922 break; 922 break;
923 } 923 }
924 924
925 return memslot - slots->memslots; 925 return memslot - slots->memslots;
926 } 926 }
927 927
928 static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) 928 static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
929 { 929 {
930 return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; 930 return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
931 } 931 }
932 932
933 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 933 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
934 { 934 {
935 struct kvm_memory_slot *slot; 935 struct kvm_memory_slot *slot;
936 936
937 gfn = unalias_gfn_instantiation(kvm, gfn); 937 gfn = unalias_gfn_instantiation(kvm, gfn);
938 slot = gfn_to_memslot_unaliased(kvm, gfn); 938 slot = gfn_to_memslot_unaliased(kvm, gfn);
939 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 939 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
940 return bad_hva(); 940 return bad_hva();
941 return gfn_to_hva_memslot(slot, gfn); 941 return gfn_to_hva_memslot(slot, gfn);
942 } 942 }
943 EXPORT_SYMBOL_GPL(gfn_to_hva); 943 EXPORT_SYMBOL_GPL(gfn_to_hva);
944 944
945 static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) 945 static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
946 { 946 {
947 struct page *page[1]; 947 struct page *page[1];
948 int npages; 948 int npages;
949 pfn_t pfn; 949 pfn_t pfn;
950 950
951 might_sleep(); 951 might_sleep();
952 952
953 npages = get_user_pages_fast(addr, 1, 1, page); 953 npages = get_user_pages_fast(addr, 1, 1, page);
954 954
955 if (unlikely(npages != 1)) { 955 if (unlikely(npages != 1)) {
956 struct vm_area_struct *vma; 956 struct vm_area_struct *vma;
957 957
958 if (is_hwpoison_address(addr)) { 958 if (is_hwpoison_address(addr)) {
959 get_page(hwpoison_page); 959 get_page(hwpoison_page);
960 return page_to_pfn(hwpoison_page); 960 return page_to_pfn(hwpoison_page);
961 } 961 }
962 962
963 down_read(&current->mm->mmap_sem); 963 down_read(&current->mm->mmap_sem);
964 vma = find_vma(current->mm, addr); 964 vma = find_vma(current->mm, addr);
965 965
966 if (vma == NULL || addr < vma->vm_start || 966 if (vma == NULL || addr < vma->vm_start ||
967 !(vma->vm_flags & VM_PFNMAP)) { 967 !(vma->vm_flags & VM_PFNMAP)) {
968 up_read(&current->mm->mmap_sem); 968 up_read(&current->mm->mmap_sem);
969 get_page(bad_page); 969 get_page(bad_page);
970 return page_to_pfn(bad_page); 970 return page_to_pfn(bad_page);
971 } 971 }
972 972
973 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 973 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
974 up_read(&current->mm->mmap_sem); 974 up_read(&current->mm->mmap_sem);
975 BUG_ON(!kvm_is_mmio_pfn(pfn)); 975 BUG_ON(!kvm_is_mmio_pfn(pfn));
976 } else 976 } else
977 pfn = page_to_pfn(page[0]); 977 pfn = page_to_pfn(page[0]);
978 978
979 return pfn; 979 return pfn;
980 } 980 }
981 981
982 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 982 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
983 { 983 {
984 unsigned long addr; 984 unsigned long addr;
985 985
986 addr = gfn_to_hva(kvm, gfn); 986 addr = gfn_to_hva(kvm, gfn);
987 if (kvm_is_error_hva(addr)) { 987 if (kvm_is_error_hva(addr)) {
988 get_page(bad_page); 988 get_page(bad_page);
989 return page_to_pfn(bad_page); 989 return page_to_pfn(bad_page);
990 } 990 }
991 991
992 return hva_to_pfn(kvm, addr); 992 return hva_to_pfn(kvm, addr);
993 } 993 }
994 EXPORT_SYMBOL_GPL(gfn_to_pfn); 994 EXPORT_SYMBOL_GPL(gfn_to_pfn);
995 995
996 pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 996 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
997 struct kvm_memory_slot *slot, gfn_t gfn) 997 struct kvm_memory_slot *slot, gfn_t gfn)
998 { 998 {
999 unsigned long addr = gfn_to_hva_memslot(slot, gfn); 999 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
1000 return hva_to_pfn(kvm, addr); 1000 return hva_to_pfn(kvm, addr);
1001 } 1001 }
1002 1002
1003 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 1003 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
1004 { 1004 {
1005 pfn_t pfn; 1005 pfn_t pfn;
1006 1006
1007 pfn = gfn_to_pfn(kvm, gfn); 1007 pfn = gfn_to_pfn(kvm, gfn);
1008 if (!kvm_is_mmio_pfn(pfn)) 1008 if (!kvm_is_mmio_pfn(pfn))
1009 return pfn_to_page(pfn); 1009 return pfn_to_page(pfn);
1010 1010
1011 WARN_ON(kvm_is_mmio_pfn(pfn)); 1011 WARN_ON(kvm_is_mmio_pfn(pfn));
1012 1012
1013 get_page(bad_page); 1013 get_page(bad_page);
1014 return bad_page; 1014 return bad_page;
1015 } 1015 }
1016 1016
1017 EXPORT_SYMBOL_GPL(gfn_to_page); 1017 EXPORT_SYMBOL_GPL(gfn_to_page);
1018 1018
1019 void kvm_release_page_clean(struct page *page) 1019 void kvm_release_page_clean(struct page *page)
1020 { 1020 {
1021 kvm_release_pfn_clean(page_to_pfn(page)); 1021 kvm_release_pfn_clean(page_to_pfn(page));
1022 } 1022 }
1023 EXPORT_SYMBOL_GPL(kvm_release_page_clean); 1023 EXPORT_SYMBOL_GPL(kvm_release_page_clean);
1024 1024
1025 void kvm_release_pfn_clean(pfn_t pfn) 1025 void kvm_release_pfn_clean(pfn_t pfn)
1026 { 1026 {
1027 if (!kvm_is_mmio_pfn(pfn)) 1027 if (!kvm_is_mmio_pfn(pfn))
1028 put_page(pfn_to_page(pfn)); 1028 put_page(pfn_to_page(pfn));
1029 } 1029 }
1030 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 1030 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
1031 1031
1032 void kvm_release_page_dirty(struct page *page) 1032 void kvm_release_page_dirty(struct page *page)
1033 { 1033 {
1034 kvm_release_pfn_dirty(page_to_pfn(page)); 1034 kvm_release_pfn_dirty(page_to_pfn(page));
1035 } 1035 }
1036 EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 1036 EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
1037 1037
1038 void kvm_release_pfn_dirty(pfn_t pfn) 1038 void kvm_release_pfn_dirty(pfn_t pfn)
1039 { 1039 {
1040 kvm_set_pfn_dirty(pfn); 1040 kvm_set_pfn_dirty(pfn);
1041 kvm_release_pfn_clean(pfn); 1041 kvm_release_pfn_clean(pfn);
1042 } 1042 }
1043 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); 1043 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
1044 1044
1045 void kvm_set_page_dirty(struct page *page) 1045 void kvm_set_page_dirty(struct page *page)
1046 { 1046 {
1047 kvm_set_pfn_dirty(page_to_pfn(page)); 1047 kvm_set_pfn_dirty(page_to_pfn(page));
1048 } 1048 }
1049 EXPORT_SYMBOL_GPL(kvm_set_page_dirty); 1049 EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
1050 1050
1051 void kvm_set_pfn_dirty(pfn_t pfn) 1051 void kvm_set_pfn_dirty(pfn_t pfn)
1052 { 1052 {
1053 if (!kvm_is_mmio_pfn(pfn)) { 1053 if (!kvm_is_mmio_pfn(pfn)) {
1054 struct page *page = pfn_to_page(pfn); 1054 struct page *page = pfn_to_page(pfn);
1055 if (!PageReserved(page)) 1055 if (!PageReserved(page))
1056 SetPageDirty(page); 1056 SetPageDirty(page);
1057 } 1057 }
1058 } 1058 }
1059 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); 1059 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
1060 1060
1061 void kvm_set_pfn_accessed(pfn_t pfn) 1061 void kvm_set_pfn_accessed(pfn_t pfn)
1062 { 1062 {
1063 if (!kvm_is_mmio_pfn(pfn)) 1063 if (!kvm_is_mmio_pfn(pfn))
1064 mark_page_accessed(pfn_to_page(pfn)); 1064 mark_page_accessed(pfn_to_page(pfn));
1065 } 1065 }
1066 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 1066 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
1067 1067
1068 void kvm_get_pfn(pfn_t pfn) 1068 void kvm_get_pfn(pfn_t pfn)
1069 { 1069 {
1070 if (!kvm_is_mmio_pfn(pfn)) 1070 if (!kvm_is_mmio_pfn(pfn))
1071 get_page(pfn_to_page(pfn)); 1071 get_page(pfn_to_page(pfn));
1072 } 1072 }
1073 EXPORT_SYMBOL_GPL(kvm_get_pfn); 1073 EXPORT_SYMBOL_GPL(kvm_get_pfn);
1074 1074
1075 static int next_segment(unsigned long len, int offset) 1075 static int next_segment(unsigned long len, int offset)
1076 { 1076 {
1077 if (len > PAGE_SIZE - offset) 1077 if (len > PAGE_SIZE - offset)
1078 return PAGE_SIZE - offset; 1078 return PAGE_SIZE - offset;
1079 else 1079 else
1080 return len; 1080 return len;
1081 } 1081 }
1082 1082
1083 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 1083 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
1084 int len) 1084 int len)
1085 { 1085 {
1086 int r; 1086 int r;
1087 unsigned long addr; 1087 unsigned long addr;
1088 1088
1089 addr = gfn_to_hva(kvm, gfn); 1089 addr = gfn_to_hva(kvm, gfn);
1090 if (kvm_is_error_hva(addr)) 1090 if (kvm_is_error_hva(addr))
1091 return -EFAULT; 1091 return -EFAULT;
1092 r = copy_from_user(data, (void __user *)addr + offset, len); 1092 r = copy_from_user(data, (void __user *)addr + offset, len);
1093 if (r) 1093 if (r)
1094 return -EFAULT; 1094 return -EFAULT;
1095 return 0; 1095 return 0;
1096 } 1096 }
1097 EXPORT_SYMBOL_GPL(kvm_read_guest_page); 1097 EXPORT_SYMBOL_GPL(kvm_read_guest_page);
1098 1098
1099 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) 1099 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
1100 { 1100 {
1101 gfn_t gfn = gpa >> PAGE_SHIFT; 1101 gfn_t gfn = gpa >> PAGE_SHIFT;
1102 int seg; 1102 int seg;
1103 int offset = offset_in_page(gpa); 1103 int offset = offset_in_page(gpa);
1104 int ret; 1104 int ret;
1105 1105
1106 while ((seg = next_segment(len, offset)) != 0) { 1106 while ((seg = next_segment(len, offset)) != 0) {
1107 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); 1107 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg);
1108 if (ret < 0) 1108 if (ret < 0)
1109 return ret; 1109 return ret;
1110 offset = 0; 1110 offset = 0;
1111 len -= seg; 1111 len -= seg;
1112 data += seg; 1112 data += seg;
1113 ++gfn; 1113 ++gfn;
1114 } 1114 }
1115 return 0; 1115 return 0;
1116 } 1116 }
1117 EXPORT_SYMBOL_GPL(kvm_read_guest); 1117 EXPORT_SYMBOL_GPL(kvm_read_guest);
1118 1118
1119 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 1119 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
1120 unsigned long len) 1120 unsigned long len)
1121 { 1121 {
1122 int r; 1122 int r;
1123 unsigned long addr; 1123 unsigned long addr;
1124 gfn_t gfn = gpa >> PAGE_SHIFT; 1124 gfn_t gfn = gpa >> PAGE_SHIFT;
1125 int offset = offset_in_page(gpa); 1125 int offset = offset_in_page(gpa);
1126 1126
1127 addr = gfn_to_hva(kvm, gfn); 1127 addr = gfn_to_hva(kvm, gfn);
1128 if (kvm_is_error_hva(addr)) 1128 if (kvm_is_error_hva(addr))
1129 return -EFAULT; 1129 return -EFAULT;
1130 pagefault_disable(); 1130 pagefault_disable();
1131 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 1131 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
1132 pagefault_enable(); 1132 pagefault_enable();
1133 if (r) 1133 if (r)
1134 return -EFAULT; 1134 return -EFAULT;
1135 return 0; 1135 return 0;
1136 } 1136 }
1137 EXPORT_SYMBOL(kvm_read_guest_atomic); 1137 EXPORT_SYMBOL(kvm_read_guest_atomic);
1138 1138
1139 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, 1139 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
1140 int offset, int len) 1140 int offset, int len)
1141 { 1141 {
1142 int r; 1142 int r;
1143 unsigned long addr; 1143 unsigned long addr;
1144 1144
1145 addr = gfn_to_hva(kvm, gfn); 1145 addr = gfn_to_hva(kvm, gfn);
1146 if (kvm_is_error_hva(addr)) 1146 if (kvm_is_error_hva(addr))
1147 return -EFAULT; 1147 return -EFAULT;
1148 r = copy_to_user((void __user *)addr + offset, data, len); 1148 r = copy_to_user((void __user *)addr + offset, data, len);
1149 if (r) 1149 if (r)
1150 return -EFAULT; 1150 return -EFAULT;
1151 mark_page_dirty(kvm, gfn); 1151 mark_page_dirty(kvm, gfn);
1152 return 0; 1152 return 0;
1153 } 1153 }
1154 EXPORT_SYMBOL_GPL(kvm_write_guest_page); 1154 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
1155 1155
1156 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, 1156 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
1157 unsigned long len) 1157 unsigned long len)
1158 { 1158 {
1159 gfn_t gfn = gpa >> PAGE_SHIFT; 1159 gfn_t gfn = gpa >> PAGE_SHIFT;
1160 int seg; 1160 int seg;
1161 int offset = offset_in_page(gpa); 1161 int offset = offset_in_page(gpa);
1162 int ret; 1162 int ret;
1163 1163
1164 while ((seg = next_segment(len, offset)) != 0) { 1164 while ((seg = next_segment(len, offset)) != 0) {
1165 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); 1165 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg);
1166 if (ret < 0) 1166 if (ret < 0)
1167 return ret; 1167 return ret;
1168 offset = 0; 1168 offset = 0;
1169 len -= seg; 1169 len -= seg;
1170 data += seg; 1170 data += seg;
1171 ++gfn; 1171 ++gfn;
1172 } 1172 }
1173 return 0; 1173 return 0;
1174 } 1174 }
1175 1175
1176 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 1176 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
1177 { 1177 {
1178 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); 1178 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len);
1179 } 1179 }
1180 EXPORT_SYMBOL_GPL(kvm_clear_guest_page); 1180 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
1181 1181
1182 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) 1182 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
1183 { 1183 {
1184 gfn_t gfn = gpa >> PAGE_SHIFT; 1184 gfn_t gfn = gpa >> PAGE_SHIFT;
1185 int seg; 1185 int seg;
1186 int offset = offset_in_page(gpa); 1186 int offset = offset_in_page(gpa);
1187 int ret; 1187 int ret;
1188 1188
1189 while ((seg = next_segment(len, offset)) != 0) { 1189 while ((seg = next_segment(len, offset)) != 0) {
1190 ret = kvm_clear_guest_page(kvm, gfn, offset, seg); 1190 ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
1191 if (ret < 0) 1191 if (ret < 0)
1192 return ret; 1192 return ret;
1193 offset = 0; 1193 offset = 0;
1194 len -= seg; 1194 len -= seg;
1195 ++gfn; 1195 ++gfn;
1196 } 1196 }
1197 return 0; 1197 return 0;
1198 } 1198 }
1199 EXPORT_SYMBOL_GPL(kvm_clear_guest); 1199 EXPORT_SYMBOL_GPL(kvm_clear_guest);
1200 1200
1201 void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 1201 void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1202 { 1202 {
1203 struct kvm_memory_slot *memslot; 1203 struct kvm_memory_slot *memslot;
1204 1204
1205 gfn = unalias_gfn(kvm, gfn); 1205 gfn = unalias_gfn(kvm, gfn);
1206 memslot = gfn_to_memslot_unaliased(kvm, gfn); 1206 memslot = gfn_to_memslot_unaliased(kvm, gfn);
1207 if (memslot && memslot->dirty_bitmap) { 1207 if (memslot && memslot->dirty_bitmap) {
1208 unsigned long rel_gfn = gfn - memslot->base_gfn; 1208 unsigned long rel_gfn = gfn - memslot->base_gfn;
1209 1209
1210 generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); 1210 generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
1211 } 1211 }
1212 } 1212 }
1213 1213
1214 /* 1214 /*
1215 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1215 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
1216 */ 1216 */
1217 void kvm_vcpu_block(struct kvm_vcpu *vcpu) 1217 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1218 { 1218 {
1219 DEFINE_WAIT(wait); 1219 DEFINE_WAIT(wait);
1220 1220
1221 for (;;) { 1221 for (;;) {
1222 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1222 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1223 1223
1224 if (kvm_arch_vcpu_runnable(vcpu)) { 1224 if (kvm_arch_vcpu_runnable(vcpu)) {
1225 set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1225 set_bit(KVM_REQ_UNHALT, &vcpu->requests);
1226 break; 1226 break;
1227 } 1227 }
1228 if (kvm_cpu_has_pending_timer(vcpu)) 1228 if (kvm_cpu_has_pending_timer(vcpu))
1229 break; 1229 break;
1230 if (signal_pending(current)) 1230 if (signal_pending(current))
1231 break; 1231 break;
1232 1232
1233 schedule(); 1233 schedule();
1234 } 1234 }
1235 1235
1236 finish_wait(&vcpu->wq, &wait); 1236 finish_wait(&vcpu->wq, &wait);
1237 } 1237 }
1238 1238
1239 void kvm_resched(struct kvm_vcpu *vcpu) 1239 void kvm_resched(struct kvm_vcpu *vcpu)
1240 { 1240 {
1241 if (!need_resched()) 1241 if (!need_resched())
1242 return; 1242 return;
1243 cond_resched(); 1243 cond_resched();
1244 } 1244 }
1245 EXPORT_SYMBOL_GPL(kvm_resched); 1245 EXPORT_SYMBOL_GPL(kvm_resched);
1246 1246
1247 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) 1247 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu)
1248 { 1248 {
1249 ktime_t expires; 1249 ktime_t expires;
1250 DEFINE_WAIT(wait); 1250 DEFINE_WAIT(wait);
1251 1251
1252 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1252 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1253 1253
1254 /* Sleep for 100 us, and hope lock-holder got scheduled */ 1254 /* Sleep for 100 us, and hope lock-holder got scheduled */
1255 expires = ktime_add_ns(ktime_get(), 100000UL); 1255 expires = ktime_add_ns(ktime_get(), 100000UL);
1256 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); 1256 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1257 1257
1258 finish_wait(&vcpu->wq, &wait); 1258 finish_wait(&vcpu->wq, &wait);
1259 } 1259 }
1260 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); 1260 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
1261 1261
1262 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1262 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1263 { 1263 {
1264 struct kvm_vcpu *vcpu = vma->vm_file->private_data; 1264 struct kvm_vcpu *vcpu = vma->vm_file->private_data;
1265 struct page *page; 1265 struct page *page;
1266 1266
1267 if (vmf->pgoff == 0) 1267 if (vmf->pgoff == 0)
1268 page = virt_to_page(vcpu->run); 1268 page = virt_to_page(vcpu->run);
1269 #ifdef CONFIG_X86 1269 #ifdef CONFIG_X86
1270 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 1270 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
1271 page = virt_to_page(vcpu->arch.pio_data); 1271 page = virt_to_page(vcpu->arch.pio_data);
1272 #endif 1272 #endif
1273 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1273 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1274 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) 1274 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
1275 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); 1275 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
1276 #endif 1276 #endif
1277 else 1277 else
1278 return VM_FAULT_SIGBUS; 1278 return VM_FAULT_SIGBUS;
1279 get_page(page); 1279 get_page(page);
1280 vmf->page = page; 1280 vmf->page = page;
1281 return 0; 1281 return 0;
1282 } 1282 }
1283 1283
1284 static const struct vm_operations_struct kvm_vcpu_vm_ops = { 1284 static const struct vm_operations_struct kvm_vcpu_vm_ops = {
1285 .fault = kvm_vcpu_fault, 1285 .fault = kvm_vcpu_fault,
1286 }; 1286 };
1287 1287
1288 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) 1288 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
1289 { 1289 {
1290 vma->vm_ops = &kvm_vcpu_vm_ops; 1290 vma->vm_ops = &kvm_vcpu_vm_ops;
1291 return 0; 1291 return 0;
1292 } 1292 }
1293 1293
1294 static int kvm_vcpu_release(struct inode *inode, struct file *filp) 1294 static int kvm_vcpu_release(struct inode *inode, struct file *filp)
1295 { 1295 {
1296 struct kvm_vcpu *vcpu = filp->private_data; 1296 struct kvm_vcpu *vcpu = filp->private_data;
1297 1297
1298 kvm_put_kvm(vcpu->kvm); 1298 kvm_put_kvm(vcpu->kvm);
1299 return 0; 1299 return 0;
1300 } 1300 }
1301 1301
1302 static struct file_operations kvm_vcpu_fops = { 1302 static struct file_operations kvm_vcpu_fops = {
1303 .release = kvm_vcpu_release, 1303 .release = kvm_vcpu_release,
1304 .unlocked_ioctl = kvm_vcpu_ioctl, 1304 .unlocked_ioctl = kvm_vcpu_ioctl,
1305 .compat_ioctl = kvm_vcpu_ioctl, 1305 .compat_ioctl = kvm_vcpu_ioctl,
1306 .mmap = kvm_vcpu_mmap, 1306 .mmap = kvm_vcpu_mmap,
1307 }; 1307 };
1308 1308
1309 /* 1309 /*
1310 * Allocates an inode for the vcpu. 1310 * Allocates an inode for the vcpu.
1311 */ 1311 */
1312 static int create_vcpu_fd(struct kvm_vcpu *vcpu) 1312 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
1313 { 1313 {
1314 return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); 1314 return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
1315 } 1315 }
1316 1316
1317 /* 1317 /*
1318 * Creates some virtual cpus. Good luck creating more than one. 1318 * Creates some virtual cpus. Good luck creating more than one.
1319 */ 1319 */
1320 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) 1320 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
1321 { 1321 {
1322 int r; 1322 int r;
1323 struct kvm_vcpu *vcpu, *v; 1323 struct kvm_vcpu *vcpu, *v;
1324 1324
1325 vcpu = kvm_arch_vcpu_create(kvm, id); 1325 vcpu = kvm_arch_vcpu_create(kvm, id);
1326 if (IS_ERR(vcpu)) 1326 if (IS_ERR(vcpu))
1327 return PTR_ERR(vcpu); 1327 return PTR_ERR(vcpu);
1328 1328
1329 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); 1329 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
1330 1330
1331 r = kvm_arch_vcpu_setup(vcpu); 1331 r = kvm_arch_vcpu_setup(vcpu);
1332 if (r) 1332 if (r)
1333 return r; 1333 return r;
1334 1334
1335 mutex_lock(&kvm->lock); 1335 mutex_lock(&kvm->lock);
1336 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { 1336 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
1337 r = -EINVAL; 1337 r = -EINVAL;
1338 goto vcpu_destroy; 1338 goto vcpu_destroy;
1339 } 1339 }
1340 1340
1341 kvm_for_each_vcpu(r, v, kvm) 1341 kvm_for_each_vcpu(r, v, kvm)
1342 if (v->vcpu_id == id) { 1342 if (v->vcpu_id == id) {
1343 r = -EEXIST; 1343 r = -EEXIST;
1344 goto vcpu_destroy; 1344 goto vcpu_destroy;
1345 } 1345 }
1346 1346
1347 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); 1347 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
1348 1348
1349 /* Now it's all set up, let userspace reach it */ 1349 /* Now it's all set up, let userspace reach it */
1350 kvm_get_kvm(kvm); 1350 kvm_get_kvm(kvm);
1351 r = create_vcpu_fd(vcpu); 1351 r = create_vcpu_fd(vcpu);
1352 if (r < 0) { 1352 if (r < 0) {
1353 kvm_put_kvm(kvm); 1353 kvm_put_kvm(kvm);
1354 goto vcpu_destroy; 1354 goto vcpu_destroy;
1355 } 1355 }
1356 1356
1357 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; 1357 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
1358 smp_wmb(); 1358 smp_wmb();
1359 atomic_inc(&kvm->online_vcpus); 1359 atomic_inc(&kvm->online_vcpus);
1360 1360
1361 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1361 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
1362 if (kvm->bsp_vcpu_id == id) 1362 if (kvm->bsp_vcpu_id == id)
1363 kvm->bsp_vcpu = vcpu; 1363 kvm->bsp_vcpu = vcpu;
1364 #endif 1364 #endif
1365 mutex_unlock(&kvm->lock); 1365 mutex_unlock(&kvm->lock);
1366 return r; 1366 return r;
1367 1367
1368 vcpu_destroy: 1368 vcpu_destroy:
1369 mutex_unlock(&kvm->lock); 1369 mutex_unlock(&kvm->lock);
1370 kvm_arch_vcpu_destroy(vcpu); 1370 kvm_arch_vcpu_destroy(vcpu);
1371 return r; 1371 return r;
1372 } 1372 }
1373 1373
1374 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) 1374 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
1375 { 1375 {
1376 if (sigset) { 1376 if (sigset) {
1377 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1377 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
1378 vcpu->sigset_active = 1; 1378 vcpu->sigset_active = 1;
1379 vcpu->sigset = *sigset; 1379 vcpu->sigset = *sigset;
1380 } else 1380 } else
1381 vcpu->sigset_active = 0; 1381 vcpu->sigset_active = 0;
1382 return 0; 1382 return 0;
1383 } 1383 }
1384 1384
1385 static long kvm_vcpu_ioctl(struct file *filp, 1385 static long kvm_vcpu_ioctl(struct file *filp,
1386 unsigned int ioctl, unsigned long arg) 1386 unsigned int ioctl, unsigned long arg)
1387 { 1387 {
1388 struct kvm_vcpu *vcpu = filp->private_data; 1388 struct kvm_vcpu *vcpu = filp->private_data;
1389 void __user *argp = (void __user *)arg; 1389 void __user *argp = (void __user *)arg;
1390 int r; 1390 int r;
1391 struct kvm_fpu *fpu = NULL; 1391 struct kvm_fpu *fpu = NULL;
1392 struct kvm_sregs *kvm_sregs = NULL; 1392 struct kvm_sregs *kvm_sregs = NULL;
1393 1393
1394 if (vcpu->kvm->mm != current->mm) 1394 if (vcpu->kvm->mm != current->mm)
1395 return -EIO; 1395 return -EIO;
1396 1396
1397 #if defined(CONFIG_S390) || defined(CONFIG_PPC) 1397 #if defined(CONFIG_S390) || defined(CONFIG_PPC)
1398 /* 1398 /*
1399 * Special cases: vcpu ioctls that are asynchronous to vcpu execution, 1399 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
1400 * so vcpu_load() would break it. 1400 * so vcpu_load() would break it.
1401 */ 1401 */
1402 if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) 1402 if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
1403 return kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1403 return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
1404 #endif 1404 #endif
1405 1405
1406 1406
1407 vcpu_load(vcpu); 1407 vcpu_load(vcpu);
1408 switch (ioctl) { 1408 switch (ioctl) {
1409 case KVM_RUN: 1409 case KVM_RUN:
1410 r = -EINVAL; 1410 r = -EINVAL;
1411 if (arg) 1411 if (arg)
1412 goto out; 1412 goto out;
1413 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 1413 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1414 break; 1414 break;
1415 case KVM_GET_REGS: { 1415 case KVM_GET_REGS: {
1416 struct kvm_regs *kvm_regs; 1416 struct kvm_regs *kvm_regs;
1417 1417
1418 r = -ENOMEM; 1418 r = -ENOMEM;
1419 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1419 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
1420 if (!kvm_regs) 1420 if (!kvm_regs)
1421 goto out; 1421 goto out;
1422 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 1422 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
1423 if (r) 1423 if (r)
1424 goto out_free1; 1424 goto out_free1;
1425 r = -EFAULT; 1425 r = -EFAULT;
1426 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) 1426 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
1427 goto out_free1; 1427 goto out_free1;
1428 r = 0; 1428 r = 0;
1429 out_free1: 1429 out_free1:
1430 kfree(kvm_regs); 1430 kfree(kvm_regs);
1431 break; 1431 break;
1432 } 1432 }
1433 case KVM_SET_REGS: { 1433 case KVM_SET_REGS: {
1434 struct kvm_regs *kvm_regs; 1434 struct kvm_regs *kvm_regs;
1435 1435
1436 r = -ENOMEM; 1436 r = -ENOMEM;
1437 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1437 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
1438 if (!kvm_regs) 1438 if (!kvm_regs)
1439 goto out; 1439 goto out;
1440 r = -EFAULT; 1440 r = -EFAULT;
1441 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) 1441 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
1442 goto out_free2; 1442 goto out_free2;
1443 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); 1443 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
1444 if (r) 1444 if (r)
1445 goto out_free2; 1445 goto out_free2;
1446 r = 0; 1446 r = 0;
1447 out_free2: 1447 out_free2:
1448 kfree(kvm_regs); 1448 kfree(kvm_regs);
1449 break; 1449 break;
1450 } 1450 }
1451 case KVM_GET_SREGS: { 1451 case KVM_GET_SREGS: {
1452 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1452 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
1453 r = -ENOMEM; 1453 r = -ENOMEM;
1454 if (!kvm_sregs) 1454 if (!kvm_sregs)
1455 goto out; 1455 goto out;
1456 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); 1456 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
1457 if (r) 1457 if (r)
1458 goto out; 1458 goto out;
1459 r = -EFAULT; 1459 r = -EFAULT;
1460 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) 1460 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
1461 goto out; 1461 goto out;
1462 r = 0; 1462 r = 0;
1463 break; 1463 break;
1464 } 1464 }
1465 case KVM_SET_SREGS: { 1465 case KVM_SET_SREGS: {
1466 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1466 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
1467 r = -ENOMEM; 1467 r = -ENOMEM;
1468 if (!kvm_sregs) 1468 if (!kvm_sregs)
1469 goto out; 1469 goto out;
1470 r = -EFAULT; 1470 r = -EFAULT;
1471 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) 1471 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs)))
1472 goto out; 1472 goto out;
1473 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); 1473 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
1474 if (r) 1474 if (r)
1475 goto out; 1475 goto out;
1476 r = 0; 1476 r = 0;
1477 break; 1477 break;
1478 } 1478 }
1479 case KVM_GET_MP_STATE: { 1479 case KVM_GET_MP_STATE: {
1480 struct kvm_mp_state mp_state; 1480 struct kvm_mp_state mp_state;
1481 1481
1482 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); 1482 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
1483 if (r) 1483 if (r)
1484 goto out; 1484 goto out;
1485 r = -EFAULT; 1485 r = -EFAULT;
1486 if (copy_to_user(argp, &mp_state, sizeof mp_state)) 1486 if (copy_to_user(argp, &mp_state, sizeof mp_state))
1487 goto out; 1487 goto out;
1488 r = 0; 1488 r = 0;
1489 break; 1489 break;
1490 } 1490 }
1491 case KVM_SET_MP_STATE: { 1491 case KVM_SET_MP_STATE: {
1492 struct kvm_mp_state mp_state; 1492 struct kvm_mp_state mp_state;
1493 1493
1494 r = -EFAULT; 1494 r = -EFAULT;
1495 if (copy_from_user(&mp_state, argp, sizeof mp_state)) 1495 if (copy_from_user(&mp_state, argp, sizeof mp_state))
1496 goto out; 1496 goto out;
1497 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 1497 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
1498 if (r) 1498 if (r)
1499 goto out; 1499 goto out;
1500 r = 0; 1500 r = 0;
1501 break; 1501 break;
1502 } 1502 }
1503 case KVM_TRANSLATE: { 1503 case KVM_TRANSLATE: {
1504 struct kvm_translation tr; 1504 struct kvm_translation tr;
1505 1505
1506 r = -EFAULT; 1506 r = -EFAULT;
1507 if (copy_from_user(&tr, argp, sizeof tr)) 1507 if (copy_from_user(&tr, argp, sizeof tr))
1508 goto out; 1508 goto out;
1509 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 1509 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
1510 if (r) 1510 if (r)
1511 goto out; 1511 goto out;
1512 r = -EFAULT; 1512 r = -EFAULT;
1513 if (copy_to_user(argp, &tr, sizeof tr)) 1513 if (copy_to_user(argp, &tr, sizeof tr))
1514 goto out; 1514 goto out;
1515 r = 0; 1515 r = 0;
1516 break; 1516 break;
1517 } 1517 }
1518 case KVM_SET_GUEST_DEBUG: { 1518 case KVM_SET_GUEST_DEBUG: {
1519 struct kvm_guest_debug dbg; 1519 struct kvm_guest_debug dbg;
1520 1520
1521 r = -EFAULT; 1521 r = -EFAULT;
1522 if (copy_from_user(&dbg, argp, sizeof dbg)) 1522 if (copy_from_user(&dbg, argp, sizeof dbg))
1523 goto out; 1523 goto out;
1524 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); 1524 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
1525 if (r) 1525 if (r)
1526 goto out; 1526 goto out;
1527 r = 0; 1527 r = 0;
1528 break; 1528 break;
1529 } 1529 }
1530 case KVM_SET_SIGNAL_MASK: { 1530 case KVM_SET_SIGNAL_MASK: {
1531 struct kvm_signal_mask __user *sigmask_arg = argp; 1531 struct kvm_signal_mask __user *sigmask_arg = argp;
1532 struct kvm_signal_mask kvm_sigmask; 1532 struct kvm_signal_mask kvm_sigmask;
1533 sigset_t sigset, *p; 1533 sigset_t sigset, *p;
1534 1534
1535 p = NULL; 1535 p = NULL;
1536 if (argp) { 1536 if (argp) {
1537 r = -EFAULT; 1537 r = -EFAULT;
1538 if (copy_from_user(&kvm_sigmask, argp, 1538 if (copy_from_user(&kvm_sigmask, argp,
1539 sizeof kvm_sigmask)) 1539 sizeof kvm_sigmask))
1540 goto out; 1540 goto out;
1541 r = -EINVAL; 1541 r = -EINVAL;
1542 if (kvm_sigmask.len != sizeof sigset) 1542 if (kvm_sigmask.len != sizeof sigset)
1543 goto out; 1543 goto out;
1544 r = -EFAULT; 1544 r = -EFAULT;
1545 if (copy_from_user(&sigset, sigmask_arg->sigset, 1545 if (copy_from_user(&sigset, sigmask_arg->sigset,
1546 sizeof sigset)) 1546 sizeof sigset))
1547 goto out; 1547 goto out;
1548 p = &sigset; 1548 p = &sigset;
1549 } 1549 }
1550 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 1550 r = kvm_vcpu_ioctl_set_sigmask(vcpu, p);
1551 break; 1551 break;
1552 } 1552 }
1553 case KVM_GET_FPU: { 1553 case KVM_GET_FPU: {
1554 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1554 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
1555 r = -ENOMEM; 1555 r = -ENOMEM;
1556 if (!fpu) 1556 if (!fpu)
1557 goto out; 1557 goto out;
1558 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); 1558 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
1559 if (r) 1559 if (r)
1560 goto out; 1560 goto out;
1561 r = -EFAULT; 1561 r = -EFAULT;
1562 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) 1562 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
1563 goto out; 1563 goto out;
1564 r = 0; 1564 r = 0;
1565 break; 1565 break;
1566 } 1566 }
1567 case KVM_SET_FPU: { 1567 case KVM_SET_FPU: {
1568 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1568 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
1569 r = -ENOMEM; 1569 r = -ENOMEM;
1570 if (!fpu) 1570 if (!fpu)
1571 goto out; 1571 goto out;
1572 r = -EFAULT; 1572 r = -EFAULT;
1573 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) 1573 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu)))
1574 goto out; 1574 goto out;
1575 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); 1575 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
1576 if (r) 1576 if (r)
1577 goto out; 1577 goto out;
1578 r = 0; 1578 r = 0;
1579 break; 1579 break;
1580 } 1580 }
1581 default: 1581 default:
1582 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1582 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
1583 } 1583 }
1584 out: 1584 out:
1585 vcpu_put(vcpu); 1585 vcpu_put(vcpu);
1586 kfree(fpu); 1586 kfree(fpu);
1587 kfree(kvm_sregs); 1587 kfree(kvm_sregs);
1588 return r; 1588 return r;
1589 } 1589 }
1590 1590
1591 static long kvm_vm_ioctl(struct file *filp, 1591 static long kvm_vm_ioctl(struct file *filp,
1592 unsigned int ioctl, unsigned long arg) 1592 unsigned int ioctl, unsigned long arg)
1593 { 1593 {
1594 struct kvm *kvm = filp->private_data; 1594 struct kvm *kvm = filp->private_data;
1595 void __user *argp = (void __user *)arg; 1595 void __user *argp = (void __user *)arg;
1596 int r; 1596 int r;
1597 1597
1598 if (kvm->mm != current->mm) 1598 if (kvm->mm != current->mm)
1599 return -EIO; 1599 return -EIO;
1600 switch (ioctl) { 1600 switch (ioctl) {
1601 case KVM_CREATE_VCPU: 1601 case KVM_CREATE_VCPU:
1602 r = kvm_vm_ioctl_create_vcpu(kvm, arg); 1602 r = kvm_vm_ioctl_create_vcpu(kvm, arg);
1603 if (r < 0) 1603 if (r < 0)
1604 goto out; 1604 goto out;
1605 break; 1605 break;
1606 case KVM_SET_USER_MEMORY_REGION: { 1606 case KVM_SET_USER_MEMORY_REGION: {
1607 struct kvm_userspace_memory_region kvm_userspace_mem; 1607 struct kvm_userspace_memory_region kvm_userspace_mem;
1608 1608
1609 r = -EFAULT; 1609 r = -EFAULT;
1610 if (copy_from_user(&kvm_userspace_mem, argp, 1610 if (copy_from_user(&kvm_userspace_mem, argp,
1611 sizeof kvm_userspace_mem)) 1611 sizeof kvm_userspace_mem))
1612 goto out; 1612 goto out;
1613 1613
1614 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); 1614 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
1615 if (r) 1615 if (r)
1616 goto out; 1616 goto out;
1617 break; 1617 break;
1618 } 1618 }
1619 case KVM_GET_DIRTY_LOG: { 1619 case KVM_GET_DIRTY_LOG: {
1620 struct kvm_dirty_log log; 1620 struct kvm_dirty_log log;
1621 1621
1622 r = -EFAULT; 1622 r = -EFAULT;
1623 if (copy_from_user(&log, argp, sizeof log)) 1623 if (copy_from_user(&log, argp, sizeof log))
1624 goto out; 1624 goto out;
1625 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1625 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
1626 if (r) 1626 if (r)
1627 goto out; 1627 goto out;
1628 break; 1628 break;
1629 } 1629 }
1630 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1630 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1631 case KVM_REGISTER_COALESCED_MMIO: { 1631 case KVM_REGISTER_COALESCED_MMIO: {
1632 struct kvm_coalesced_mmio_zone zone; 1632 struct kvm_coalesced_mmio_zone zone;
1633 r = -EFAULT; 1633 r = -EFAULT;
1634 if (copy_from_user(&zone, argp, sizeof zone)) 1634 if (copy_from_user(&zone, argp, sizeof zone))
1635 goto out; 1635 goto out;
1636 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 1636 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
1637 if (r) 1637 if (r)
1638 goto out; 1638 goto out;
1639 r = 0; 1639 r = 0;
1640 break; 1640 break;
1641 } 1641 }
1642 case KVM_UNREGISTER_COALESCED_MMIO: { 1642 case KVM_UNREGISTER_COALESCED_MMIO: {
1643 struct kvm_coalesced_mmio_zone zone; 1643 struct kvm_coalesced_mmio_zone zone;
1644 r = -EFAULT; 1644 r = -EFAULT;
1645 if (copy_from_user(&zone, argp, sizeof zone)) 1645 if (copy_from_user(&zone, argp, sizeof zone))
1646 goto out; 1646 goto out;
1647 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 1647 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
1648 if (r) 1648 if (r)
1649 goto out; 1649 goto out;
1650 r = 0; 1650 r = 0;
1651 break; 1651 break;
1652 } 1652 }
1653 #endif 1653 #endif
1654 case KVM_IRQFD: { 1654 case KVM_IRQFD: {
1655 struct kvm_irqfd data; 1655 struct kvm_irqfd data;
1656 1656
1657 r = -EFAULT; 1657 r = -EFAULT;
1658 if (copy_from_user(&data, argp, sizeof data)) 1658 if (copy_from_user(&data, argp, sizeof data))
1659 goto out; 1659 goto out;
1660 r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); 1660 r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
1661 break; 1661 break;
1662 } 1662 }
1663 case KVM_IOEVENTFD: { 1663 case KVM_IOEVENTFD: {
1664 struct kvm_ioeventfd data; 1664 struct kvm_ioeventfd data;
1665 1665
1666 r = -EFAULT; 1666 r = -EFAULT;
1667 if (copy_from_user(&data, argp, sizeof data)) 1667 if (copy_from_user(&data, argp, sizeof data))
1668 goto out; 1668 goto out;
1669 r = kvm_ioeventfd(kvm, &data); 1669 r = kvm_ioeventfd(kvm, &data);
1670 break; 1670 break;
1671 } 1671 }
1672 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1672 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
1673 case KVM_SET_BOOT_CPU_ID: 1673 case KVM_SET_BOOT_CPU_ID:
1674 r = 0; 1674 r = 0;
1675 mutex_lock(&kvm->lock); 1675 mutex_lock(&kvm->lock);
1676 if (atomic_read(&kvm->online_vcpus) != 0) 1676 if (atomic_read(&kvm->online_vcpus) != 0)
1677 r = -EBUSY; 1677 r = -EBUSY;
1678 else 1678 else
1679 kvm->bsp_vcpu_id = arg; 1679 kvm->bsp_vcpu_id = arg;
1680 mutex_unlock(&kvm->lock); 1680 mutex_unlock(&kvm->lock);
1681 break; 1681 break;
1682 #endif 1682 #endif
1683 default: 1683 default:
1684 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1684 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1685 if (r == -ENOTTY) 1685 if (r == -ENOTTY)
1686 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); 1686 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
1687 } 1687 }
1688 out: 1688 out:
1689 return r; 1689 return r;
1690 } 1690 }
1691 1691
1692 #ifdef CONFIG_COMPAT 1692 #ifdef CONFIG_COMPAT
1693 struct compat_kvm_dirty_log { 1693 struct compat_kvm_dirty_log {
1694 __u32 slot; 1694 __u32 slot;
1695 __u32 padding1; 1695 __u32 padding1;
1696 union { 1696 union {
1697 compat_uptr_t dirty_bitmap; /* one bit per page */ 1697 compat_uptr_t dirty_bitmap; /* one bit per page */
1698 __u64 padding2; 1698 __u64 padding2;
1699 }; 1699 };
1700 }; 1700 };
1701 1701
1702 static long kvm_vm_compat_ioctl(struct file *filp, 1702 static long kvm_vm_compat_ioctl(struct file *filp,
1703 unsigned int ioctl, unsigned long arg) 1703 unsigned int ioctl, unsigned long arg)
1704 { 1704 {
1705 struct kvm *kvm = filp->private_data; 1705 struct kvm *kvm = filp->private_data;
1706 int r; 1706 int r;
1707 1707
1708 if (kvm->mm != current->mm) 1708 if (kvm->mm != current->mm)
1709 return -EIO; 1709 return -EIO;
1710 switch (ioctl) { 1710 switch (ioctl) {
1711 case KVM_GET_DIRTY_LOG: { 1711 case KVM_GET_DIRTY_LOG: {
1712 struct compat_kvm_dirty_log compat_log; 1712 struct compat_kvm_dirty_log compat_log;
1713 struct kvm_dirty_log log; 1713 struct kvm_dirty_log log;
1714 1714
1715 r = -EFAULT; 1715 r = -EFAULT;
1716 if (copy_from_user(&compat_log, (void __user *)arg, 1716 if (copy_from_user(&compat_log, (void __user *)arg,
1717 sizeof(compat_log))) 1717 sizeof(compat_log)))
1718 goto out; 1718 goto out;
1719 log.slot = compat_log.slot; 1719 log.slot = compat_log.slot;
1720 log.padding1 = compat_log.padding1; 1720 log.padding1 = compat_log.padding1;
1721 log.padding2 = compat_log.padding2; 1721 log.padding2 = compat_log.padding2;
1722 log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); 1722 log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
1723 1723
1724 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1724 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
1725 if (r) 1725 if (r)
1726 goto out; 1726 goto out;
1727 break; 1727 break;
1728 } 1728 }
1729 default: 1729 default:
1730 r = kvm_vm_ioctl(filp, ioctl, arg); 1730 r = kvm_vm_ioctl(filp, ioctl, arg);
1731 } 1731 }
1732 1732
1733 out: 1733 out:
1734 return r; 1734 return r;
1735 } 1735 }
1736 #endif 1736 #endif
1737 1737
1738 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1738 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1739 { 1739 {
1740 struct page *page[1]; 1740 struct page *page[1];
1741 unsigned long addr; 1741 unsigned long addr;
1742 int npages; 1742 int npages;
1743 gfn_t gfn = vmf->pgoff; 1743 gfn_t gfn = vmf->pgoff;
1744 struct kvm *kvm = vma->vm_file->private_data; 1744 struct kvm *kvm = vma->vm_file->private_data;
1745 1745
1746 addr = gfn_to_hva(kvm, gfn); 1746 addr = gfn_to_hva(kvm, gfn);
1747 if (kvm_is_error_hva(addr)) 1747 if (kvm_is_error_hva(addr))
1748 return VM_FAULT_SIGBUS; 1748 return VM_FAULT_SIGBUS;
1749 1749
1750 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, 1750 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
1751 NULL); 1751 NULL);
1752 if (unlikely(npages != 1)) 1752 if (unlikely(npages != 1))
1753 return VM_FAULT_SIGBUS; 1753 return VM_FAULT_SIGBUS;
1754 1754
1755 vmf->page = page[0]; 1755 vmf->page = page[0];
1756 return 0; 1756 return 0;
1757 } 1757 }
1758 1758
1759 static const struct vm_operations_struct kvm_vm_vm_ops = { 1759 static const struct vm_operations_struct kvm_vm_vm_ops = {
1760 .fault = kvm_vm_fault, 1760 .fault = kvm_vm_fault,
1761 }; 1761 };
1762 1762
1763 static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) 1763 static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
1764 { 1764 {
1765 vma->vm_ops = &kvm_vm_vm_ops; 1765 vma->vm_ops = &kvm_vm_vm_ops;
1766 return 0; 1766 return 0;
1767 } 1767 }
1768 1768
1769 static struct file_operations kvm_vm_fops = { 1769 static struct file_operations kvm_vm_fops = {
1770 .release = kvm_vm_release, 1770 .release = kvm_vm_release,
1771 .unlocked_ioctl = kvm_vm_ioctl, 1771 .unlocked_ioctl = kvm_vm_ioctl,
1772 #ifdef CONFIG_COMPAT 1772 #ifdef CONFIG_COMPAT
1773 .compat_ioctl = kvm_vm_compat_ioctl, 1773 .compat_ioctl = kvm_vm_compat_ioctl,
1774 #endif 1774 #endif
1775 .mmap = kvm_vm_mmap, 1775 .mmap = kvm_vm_mmap,
1776 }; 1776 };
1777 1777
1778 static int kvm_dev_ioctl_create_vm(void) 1778 static int kvm_dev_ioctl_create_vm(void)
1779 { 1779 {
1780 int fd, r; 1780 int fd, r;
1781 struct kvm *kvm; 1781 struct kvm *kvm;
1782 1782
1783 kvm = kvm_create_vm(); 1783 kvm = kvm_create_vm();
1784 if (IS_ERR(kvm)) 1784 if (IS_ERR(kvm))
1785 return PTR_ERR(kvm); 1785 return PTR_ERR(kvm);
1786 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1786 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1787 r = kvm_coalesced_mmio_init(kvm); 1787 r = kvm_coalesced_mmio_init(kvm);
1788 if (r < 0) { 1788 if (r < 0) {
1789 kvm_put_kvm(kvm); 1789 kvm_put_kvm(kvm);
1790 return r; 1790 return r;
1791 } 1791 }
1792 #endif 1792 #endif
1793 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); 1793 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
1794 if (fd < 0) 1794 if (fd < 0)
1795 kvm_put_kvm(kvm); 1795 kvm_put_kvm(kvm);
1796 1796
1797 return fd; 1797 return fd;
1798 } 1798 }
1799 1799
1800 static long kvm_dev_ioctl_check_extension_generic(long arg) 1800 static long kvm_dev_ioctl_check_extension_generic(long arg)
1801 { 1801 {
1802 switch (arg) { 1802 switch (arg) {
1803 case KVM_CAP_USER_MEMORY: 1803 case KVM_CAP_USER_MEMORY:
1804 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 1804 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
1805 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: 1805 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
1806 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1806 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
1807 case KVM_CAP_SET_BOOT_CPU_ID: 1807 case KVM_CAP_SET_BOOT_CPU_ID:
1808 #endif 1808 #endif
1809 case KVM_CAP_INTERNAL_ERROR_DATA: 1809 case KVM_CAP_INTERNAL_ERROR_DATA:
1810 return 1; 1810 return 1;
1811 #ifdef CONFIG_HAVE_KVM_IRQCHIP 1811 #ifdef CONFIG_HAVE_KVM_IRQCHIP
1812 case KVM_CAP_IRQ_ROUTING: 1812 case KVM_CAP_IRQ_ROUTING:
1813 return KVM_MAX_IRQ_ROUTES; 1813 return KVM_MAX_IRQ_ROUTES;
1814 #endif 1814 #endif
1815 default: 1815 default:
1816 break; 1816 break;
1817 } 1817 }
1818 return kvm_dev_ioctl_check_extension(arg); 1818 return kvm_dev_ioctl_check_extension(arg);
1819 } 1819 }
1820 1820
1821 static long kvm_dev_ioctl(struct file *filp, 1821 static long kvm_dev_ioctl(struct file *filp,
1822 unsigned int ioctl, unsigned long arg) 1822 unsigned int ioctl, unsigned long arg)
1823 { 1823 {
1824 long r = -EINVAL; 1824 long r = -EINVAL;
1825 1825
1826 switch (ioctl) { 1826 switch (ioctl) {
1827 case KVM_GET_API_VERSION: 1827 case KVM_GET_API_VERSION:
1828 r = -EINVAL; 1828 r = -EINVAL;
1829 if (arg) 1829 if (arg)
1830 goto out; 1830 goto out;
1831 r = KVM_API_VERSION; 1831 r = KVM_API_VERSION;
1832 break; 1832 break;
1833 case KVM_CREATE_VM: 1833 case KVM_CREATE_VM:
1834 r = -EINVAL; 1834 r = -EINVAL;
1835 if (arg) 1835 if (arg)
1836 goto out; 1836 goto out;
1837 r = kvm_dev_ioctl_create_vm(); 1837 r = kvm_dev_ioctl_create_vm();
1838 break; 1838 break;
1839 case KVM_CHECK_EXTENSION: 1839 case KVM_CHECK_EXTENSION:
1840 r = kvm_dev_ioctl_check_extension_generic(arg); 1840 r = kvm_dev_ioctl_check_extension_generic(arg);
1841 break; 1841 break;
1842 case KVM_GET_VCPU_MMAP_SIZE: 1842 case KVM_GET_VCPU_MMAP_SIZE:
1843 r = -EINVAL; 1843 r = -EINVAL;
1844 if (arg) 1844 if (arg)
1845 goto out; 1845 goto out;
1846 r = PAGE_SIZE; /* struct kvm_run */ 1846 r = PAGE_SIZE; /* struct kvm_run */
1847 #ifdef CONFIG_X86 1847 #ifdef CONFIG_X86
1848 r += PAGE_SIZE; /* pio data page */ 1848 r += PAGE_SIZE; /* pio data page */
1849 #endif 1849 #endif
1850 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1850 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1851 r += PAGE_SIZE; /* coalesced mmio ring page */ 1851 r += PAGE_SIZE; /* coalesced mmio ring page */
1852 #endif 1852 #endif
1853 break; 1853 break;
1854 case KVM_TRACE_ENABLE: 1854 case KVM_TRACE_ENABLE:
1855 case KVM_TRACE_PAUSE: 1855 case KVM_TRACE_PAUSE:
1856 case KVM_TRACE_DISABLE: 1856 case KVM_TRACE_DISABLE:
1857 r = -EOPNOTSUPP; 1857 r = -EOPNOTSUPP;
1858 break; 1858 break;
1859 default: 1859 default:
1860 return kvm_arch_dev_ioctl(filp, ioctl, arg); 1860 return kvm_arch_dev_ioctl(filp, ioctl, arg);
1861 } 1861 }
1862 out: 1862 out:
1863 return r; 1863 return r;
1864 } 1864 }
1865 1865
1866 static struct file_operations kvm_chardev_ops = { 1866 static struct file_operations kvm_chardev_ops = {
1867 .unlocked_ioctl = kvm_dev_ioctl, 1867 .unlocked_ioctl = kvm_dev_ioctl,
1868 .compat_ioctl = kvm_dev_ioctl, 1868 .compat_ioctl = kvm_dev_ioctl,
1869 }; 1869 };
1870 1870
1871 static struct miscdevice kvm_dev = { 1871 static struct miscdevice kvm_dev = {
1872 KVM_MINOR, 1872 KVM_MINOR,
1873 "kvm", 1873 "kvm",
1874 &kvm_chardev_ops, 1874 &kvm_chardev_ops,
1875 }; 1875 };
1876 1876
1877 static void hardware_enable(void *junk) 1877 static void hardware_enable(void *junk)
1878 { 1878 {
1879 int cpu = raw_smp_processor_id(); 1879 int cpu = raw_smp_processor_id();
1880 int r; 1880 int r;
1881 1881
1882 if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1882 if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
1883 return; 1883 return;
1884 1884
1885 cpumask_set_cpu(cpu, cpus_hardware_enabled); 1885 cpumask_set_cpu(cpu, cpus_hardware_enabled);
1886 1886
1887 r = kvm_arch_hardware_enable(NULL); 1887 r = kvm_arch_hardware_enable(NULL);
1888 1888
1889 if (r) { 1889 if (r) {
1890 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 1890 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
1891 atomic_inc(&hardware_enable_failed); 1891 atomic_inc(&hardware_enable_failed);
1892 printk(KERN_INFO "kvm: enabling virtualization on " 1892 printk(KERN_INFO "kvm: enabling virtualization on "
1893 "CPU%d failed\n", cpu); 1893 "CPU%d failed\n", cpu);
1894 } 1894 }
1895 } 1895 }
1896 1896
1897 static void hardware_disable(void *junk) 1897 static void hardware_disable(void *junk)
1898 { 1898 {
1899 int cpu = raw_smp_processor_id(); 1899 int cpu = raw_smp_processor_id();
1900 1900
1901 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1901 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
1902 return; 1902 return;
1903 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 1903 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
1904 kvm_arch_hardware_disable(NULL); 1904 kvm_arch_hardware_disable(NULL);
1905 } 1905 }
1906 1906
1907 static void hardware_disable_all_nolock(void) 1907 static void hardware_disable_all_nolock(void)
1908 { 1908 {
1909 BUG_ON(!kvm_usage_count); 1909 BUG_ON(!kvm_usage_count);
1910 1910
1911 kvm_usage_count--; 1911 kvm_usage_count--;
1912 if (!kvm_usage_count) 1912 if (!kvm_usage_count)
1913 on_each_cpu(hardware_disable, NULL, 1); 1913 on_each_cpu(hardware_disable, NULL, 1);
1914 } 1914 }
1915 1915
1916 static void hardware_disable_all(void) 1916 static void hardware_disable_all(void)
1917 { 1917 {
1918 spin_lock(&kvm_lock); 1918 spin_lock(&kvm_lock);
1919 hardware_disable_all_nolock(); 1919 hardware_disable_all_nolock();
1920 spin_unlock(&kvm_lock); 1920 spin_unlock(&kvm_lock);
1921 } 1921 }
1922 1922
1923 static int hardware_enable_all(void) 1923 static int hardware_enable_all(void)
1924 { 1924 {
1925 int r = 0; 1925 int r = 0;
1926 1926
1927 spin_lock(&kvm_lock); 1927 spin_lock(&kvm_lock);
1928 1928
1929 kvm_usage_count++; 1929 kvm_usage_count++;
1930 if (kvm_usage_count == 1) { 1930 if (kvm_usage_count == 1) {
1931 atomic_set(&hardware_enable_failed, 0); 1931 atomic_set(&hardware_enable_failed, 0);
1932 on_each_cpu(hardware_enable, NULL, 1); 1932 on_each_cpu(hardware_enable, NULL, 1);
1933 1933
1934 if (atomic_read(&hardware_enable_failed)) { 1934 if (atomic_read(&hardware_enable_failed)) {
1935 hardware_disable_all_nolock(); 1935 hardware_disable_all_nolock();
1936 r = -EBUSY; 1936 r = -EBUSY;
1937 } 1937 }
1938 } 1938 }
1939 1939
1940 spin_unlock(&kvm_lock); 1940 spin_unlock(&kvm_lock);
1941 1941
1942 return r; 1942 return r;
1943 } 1943 }
1944 1944
1945 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, 1945 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
1946 void *v) 1946 void *v)
1947 { 1947 {
1948 int cpu = (long)v; 1948 int cpu = (long)v;
1949 1949
1950 if (!kvm_usage_count) 1950 if (!kvm_usage_count)
1951 return NOTIFY_OK; 1951 return NOTIFY_OK;
1952 1952
1953 val &= ~CPU_TASKS_FROZEN; 1953 val &= ~CPU_TASKS_FROZEN;
1954 switch (val) { 1954 switch (val) {
1955 case CPU_DYING: 1955 case CPU_DYING:
1956 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1956 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
1957 cpu); 1957 cpu);
1958 hardware_disable(NULL); 1958 hardware_disable(NULL);
1959 break; 1959 break;
1960 case CPU_ONLINE: 1960 case CPU_ONLINE:
1961 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 1961 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
1962 cpu); 1962 cpu);
1963 smp_call_function_single(cpu, hardware_enable, NULL, 1); 1963 smp_call_function_single(cpu, hardware_enable, NULL, 1);
1964 break; 1964 break;
1965 } 1965 }
1966 return NOTIFY_OK; 1966 return NOTIFY_OK;
1967 } 1967 }
1968 1968
1969 1969
1970 asmlinkage void kvm_handle_fault_on_reboot(void) 1970 asmlinkage void kvm_handle_fault_on_reboot(void)
1971 { 1971 {
1972 if (kvm_rebooting) 1972 if (kvm_rebooting)
1973 /* spin while reset goes on */ 1973 /* spin while reset goes on */
1974 while (true) 1974 while (true)
1975 ; 1975 ;
1976 /* Fault while not rebooting. We want the trace. */ 1976 /* Fault while not rebooting. We want the trace. */
1977 BUG(); 1977 BUG();
1978 } 1978 }
1979 EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); 1979 EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
1980 1980
1981 static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 1981 static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
1982 void *v) 1982 void *v)
1983 { 1983 {
1984 /* 1984 /*
1985 * Some (well, at least mine) BIOSes hang on reboot if 1985 * Some (well, at least mine) BIOSes hang on reboot if
1986 * in vmx root mode. 1986 * in vmx root mode.
1987 * 1987 *
1988 * And Intel TXT required VMX off for all cpu when system shutdown. 1988 * And Intel TXT required VMX off for all cpu when system shutdown.
1989 */ 1989 */
1990 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 1990 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
1991 kvm_rebooting = true; 1991 kvm_rebooting = true;
1992 on_each_cpu(hardware_disable, NULL, 1); 1992 on_each_cpu(hardware_disable, NULL, 1);
1993 return NOTIFY_OK; 1993 return NOTIFY_OK;
1994 } 1994 }
1995 1995
1996 static struct notifier_block kvm_reboot_notifier = { 1996 static struct notifier_block kvm_reboot_notifier = {
1997 .notifier_call = kvm_reboot, 1997 .notifier_call = kvm_reboot,
1998 .priority = 0, 1998 .priority = 0,
1999 }; 1999 };
2000 2000
2001 static void kvm_io_bus_destroy(struct kvm_io_bus *bus) 2001 static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
2002 { 2002 {
2003 int i; 2003 int i;
2004 2004
2005 for (i = 0; i < bus->dev_count; i++) { 2005 for (i = 0; i < bus->dev_count; i++) {
2006 struct kvm_io_device *pos = bus->devs[i]; 2006 struct kvm_io_device *pos = bus->devs[i];
2007 2007
2008 kvm_iodevice_destructor(pos); 2008 kvm_iodevice_destructor(pos);
2009 } 2009 }
2010 kfree(bus); 2010 kfree(bus);
2011 } 2011 }
2012 2012
2013 /* kvm_io_bus_write - called under kvm->slots_lock */ 2013 /* kvm_io_bus_write - called under kvm->slots_lock */
2014 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 2014 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2015 int len, const void *val) 2015 int len, const void *val)
2016 { 2016 {
2017 int i; 2017 int i;
2018 struct kvm_io_bus *bus; 2018 struct kvm_io_bus *bus;
2019 2019
2020 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 2020 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
2021 for (i = 0; i < bus->dev_count; i++) 2021 for (i = 0; i < bus->dev_count; i++)
2022 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 2022 if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
2023 return 0; 2023 return 0;
2024 return -EOPNOTSUPP; 2024 return -EOPNOTSUPP;
2025 } 2025 }
2026 2026
2027 /* kvm_io_bus_read - called under kvm->slots_lock */ 2027 /* kvm_io_bus_read - called under kvm->slots_lock */
2028 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 2028 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2029 int len, void *val) 2029 int len, void *val)
2030 { 2030 {
2031 int i; 2031 int i;
2032 struct kvm_io_bus *bus; 2032 struct kvm_io_bus *bus;
2033 2033
2034 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 2034 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
2035 for (i = 0; i < bus->dev_count; i++) 2035 for (i = 0; i < bus->dev_count; i++)
2036 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 2036 if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
2037 return 0; 2037 return 0;
2038 return -EOPNOTSUPP; 2038 return -EOPNOTSUPP;
2039 } 2039 }
2040 2040
2041 /* Caller must hold slots_lock. */ 2041 /* Caller must hold slots_lock. */
2042 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, 2042 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2043 struct kvm_io_device *dev) 2043 struct kvm_io_device *dev)
2044 { 2044 {
2045 struct kvm_io_bus *new_bus, *bus; 2045 struct kvm_io_bus *new_bus, *bus;
2046 2046
2047 bus = kvm->buses[bus_idx]; 2047 bus = kvm->buses[bus_idx];
2048 if (bus->dev_count > NR_IOBUS_DEVS-1) 2048 if (bus->dev_count > NR_IOBUS_DEVS-1)
2049 return -ENOSPC; 2049 return -ENOSPC;
2050 2050
2051 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); 2051 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
2052 if (!new_bus) 2052 if (!new_bus)
2053 return -ENOMEM; 2053 return -ENOMEM;
2054 memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); 2054 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
2055 new_bus->devs[new_bus->dev_count++] = dev; 2055 new_bus->devs[new_bus->dev_count++] = dev;
2056 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 2056 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2057 synchronize_srcu_expedited(&kvm->srcu); 2057 synchronize_srcu_expedited(&kvm->srcu);
2058 kfree(bus); 2058 kfree(bus);
2059 2059
2060 return 0; 2060 return 0;
2061 } 2061 }
2062 2062
2063 /* Caller must hold slots_lock. */ 2063 /* Caller must hold slots_lock. */
2064 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 2064 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2065 struct kvm_io_device *dev) 2065 struct kvm_io_device *dev)
2066 { 2066 {
2067 int i, r; 2067 int i, r;
2068 struct kvm_io_bus *new_bus, *bus; 2068 struct kvm_io_bus *new_bus, *bus;
2069 2069
2070 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); 2070 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
2071 if (!new_bus) 2071 if (!new_bus)
2072 return -ENOMEM; 2072 return -ENOMEM;
2073 2073
2074 bus = kvm->buses[bus_idx]; 2074 bus = kvm->buses[bus_idx];
2075 memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); 2075 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
2076 2076
2077 r = -ENOENT; 2077 r = -ENOENT;
2078 for (i = 0; i < new_bus->dev_count; i++) 2078 for (i = 0; i < new_bus->dev_count; i++)
2079 if (new_bus->devs[i] == dev) { 2079 if (new_bus->devs[i] == dev) {
2080 r = 0; 2080 r = 0;
2081 new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; 2081 new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
2082 break; 2082 break;
2083 } 2083 }
2084 2084
2085 if (r) { 2085 if (r) {
2086 kfree(new_bus); 2086 kfree(new_bus);
2087 return r; 2087 return r;
2088 } 2088 }
2089 2089
2090 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 2090 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2091 synchronize_srcu_expedited(&kvm->srcu); 2091 synchronize_srcu_expedited(&kvm->srcu);
2092 kfree(bus); 2092 kfree(bus);
2093 return r; 2093 return r;
2094 } 2094 }
2095 2095
2096 static struct notifier_block kvm_cpu_notifier = { 2096 static struct notifier_block kvm_cpu_notifier = {
2097 .notifier_call = kvm_cpu_hotplug, 2097 .notifier_call = kvm_cpu_hotplug,
2098 .priority = 20, /* must be > scheduler priority */ 2098 .priority = 20, /* must be > scheduler priority */
2099 }; 2099 };
2100 2100
2101 static int vm_stat_get(void *_offset, u64 *val) 2101 static int vm_stat_get(void *_offset, u64 *val)
2102 { 2102 {
2103 unsigned offset = (long)_offset; 2103 unsigned offset = (long)_offset;
2104 struct kvm *kvm; 2104 struct kvm *kvm;
2105 2105
2106 *val = 0; 2106 *val = 0;
2107 spin_lock(&kvm_lock); 2107 spin_lock(&kvm_lock);
2108 list_for_each_entry(kvm, &vm_list, vm_list) 2108 list_for_each_entry(kvm, &vm_list, vm_list)
2109 *val += *(u32 *)((void *)kvm + offset); 2109 *val += *(u32 *)((void *)kvm + offset);
2110 spin_unlock(&kvm_lock); 2110 spin_unlock(&kvm_lock);
2111 return 0; 2111 return 0;
2112 } 2112 }
2113 2113
2114 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); 2114 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
2115 2115
2116 static int vcpu_stat_get(void *_offset, u64 *val) 2116 static int vcpu_stat_get(void *_offset, u64 *val)
2117 { 2117 {
2118 unsigned offset = (long)_offset; 2118 unsigned offset = (long)_offset;
2119 struct kvm *kvm; 2119 struct kvm *kvm;
2120 struct kvm_vcpu *vcpu; 2120 struct kvm_vcpu *vcpu;
2121 int i; 2121 int i;
2122 2122
2123 *val = 0; 2123 *val = 0;
2124 spin_lock(&kvm_lock); 2124 spin_lock(&kvm_lock);
2125 list_for_each_entry(kvm, &vm_list, vm_list) 2125 list_for_each_entry(kvm, &vm_list, vm_list)
2126 kvm_for_each_vcpu(i, vcpu, kvm) 2126 kvm_for_each_vcpu(i, vcpu, kvm)
2127 *val += *(u32 *)((void *)vcpu + offset); 2127 *val += *(u32 *)((void *)vcpu + offset);
2128 2128
2129 spin_unlock(&kvm_lock); 2129 spin_unlock(&kvm_lock);
2130 return 0; 2130 return 0;
2131 } 2131 }
2132 2132
2133 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); 2133 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
2134 2134
2135 static const struct file_operations *stat_fops[] = { 2135 static const struct file_operations *stat_fops[] = {
2136 [KVM_STAT_VCPU] = &vcpu_stat_fops, 2136 [KVM_STAT_VCPU] = &vcpu_stat_fops,
2137 [KVM_STAT_VM] = &vm_stat_fops, 2137 [KVM_STAT_VM] = &vm_stat_fops,
2138 }; 2138 };
2139 2139
2140 static void kvm_init_debug(void) 2140 static void kvm_init_debug(void)
2141 { 2141 {
2142 struct kvm_stats_debugfs_item *p; 2142 struct kvm_stats_debugfs_item *p;
2143 2143
2144 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); 2144 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
2145 for (p = debugfs_entries; p->name; ++p) 2145 for (p = debugfs_entries; p->name; ++p)
2146 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 2146 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
2147 (void *)(long)p->offset, 2147 (void *)(long)p->offset,
2148 stat_fops[p->kind]); 2148 stat_fops[p->kind]);
2149 } 2149 }
2150 2150
2151 static void kvm_exit_debug(void) 2151 static void kvm_exit_debug(void)
2152 { 2152 {
2153 struct kvm_stats_debugfs_item *p; 2153 struct kvm_stats_debugfs_item *p;
2154 2154
2155 for (p = debugfs_entries; p->name; ++p) 2155 for (p = debugfs_entries; p->name; ++p)
2156 debugfs_remove(p->dentry); 2156 debugfs_remove(p->dentry);
2157 debugfs_remove(kvm_debugfs_dir); 2157 debugfs_remove(kvm_debugfs_dir);
2158 } 2158 }
2159 2159
2160 static int kvm_suspend(struct sys_device *dev, pm_message_t state) 2160 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
2161 { 2161 {
2162 if (kvm_usage_count) 2162 if (kvm_usage_count)
2163 hardware_disable(NULL); 2163 hardware_disable(NULL);
2164 return 0; 2164 return 0;
2165 } 2165 }
2166 2166
2167 static int kvm_resume(struct sys_device *dev) 2167 static int kvm_resume(struct sys_device *dev)
2168 { 2168 {
2169 if (kvm_usage_count) 2169 if (kvm_usage_count)
2170 hardware_enable(NULL); 2170 hardware_enable(NULL);
2171 return 0; 2171 return 0;
2172 } 2172 }
2173 2173
2174 static struct sysdev_class kvm_sysdev_class = { 2174 static struct sysdev_class kvm_sysdev_class = {
2175 .name = "kvm", 2175 .name = "kvm",
2176 .suspend = kvm_suspend, 2176 .suspend = kvm_suspend,
2177 .resume = kvm_resume, 2177 .resume = kvm_resume,
2178 }; 2178 };
2179 2179
2180 static struct sys_device kvm_sysdev = { 2180 static struct sys_device kvm_sysdev = {
2181 .id = 0, 2181 .id = 0,
2182 .cls = &kvm_sysdev_class, 2182 .cls = &kvm_sysdev_class,
2183 }; 2183 };
2184 2184
2185 struct page *bad_page; 2185 struct page *bad_page;
2186 pfn_t bad_pfn; 2186 pfn_t bad_pfn;
2187 2187
2188 static inline 2188 static inline
2189 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 2189 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
2190 { 2190 {
2191 return container_of(pn, struct kvm_vcpu, preempt_notifier); 2191 return container_of(pn, struct kvm_vcpu, preempt_notifier);
2192 } 2192 }
2193 2193
2194 static void kvm_sched_in(struct preempt_notifier *pn, int cpu) 2194 static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
2195 { 2195 {
2196 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 2196 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
2197 2197
2198 kvm_arch_vcpu_load(vcpu, cpu); 2198 kvm_arch_vcpu_load(vcpu, cpu);
2199 } 2199 }
2200 2200
2201 static void kvm_sched_out(struct preempt_notifier *pn, 2201 static void kvm_sched_out(struct preempt_notifier *pn,
2202 struct task_struct *next) 2202 struct task_struct *next)
2203 { 2203 {
2204 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 2204 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
2205 2205
2206 kvm_arch_vcpu_put(vcpu); 2206 kvm_arch_vcpu_put(vcpu);
2207 } 2207 }
2208 2208
2209 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, 2209 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2210 struct module *module) 2210 struct module *module)
2211 { 2211 {
2212 int r; 2212 int r;
2213 int cpu; 2213 int cpu;
2214 2214
2215 r = kvm_arch_init(opaque); 2215 r = kvm_arch_init(opaque);
2216 if (r) 2216 if (r)
2217 goto out_fail; 2217 goto out_fail;
2218 2218
2219 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2219 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2220 2220
2221 if (bad_page == NULL) { 2221 if (bad_page == NULL) {
2222 r = -ENOMEM; 2222 r = -ENOMEM;
2223 goto out; 2223 goto out;
2224 } 2224 }
2225 2225
2226 bad_pfn = page_to_pfn(bad_page); 2226 bad_pfn = page_to_pfn(bad_page);
2227 2227
2228 hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2228 hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2229 2229
2230 if (hwpoison_page == NULL) { 2230 if (hwpoison_page == NULL) {
2231 r = -ENOMEM; 2231 r = -ENOMEM;
2232 goto out_free_0; 2232 goto out_free_0;
2233 } 2233 }
2234 2234
2235 hwpoison_pfn = page_to_pfn(hwpoison_page); 2235 hwpoison_pfn = page_to_pfn(hwpoison_page);
2236 2236
2237 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2237 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
2238 r = -ENOMEM; 2238 r = -ENOMEM;
2239 goto out_free_0; 2239 goto out_free_0;
2240 } 2240 }
2241 2241
2242 r = kvm_arch_hardware_setup(); 2242 r = kvm_arch_hardware_setup();
2243 if (r < 0) 2243 if (r < 0)
2244 goto out_free_0a; 2244 goto out_free_0a;
2245 2245
2246 for_each_online_cpu(cpu) { 2246 for_each_online_cpu(cpu) {
2247 smp_call_function_single(cpu, 2247 smp_call_function_single(cpu,
2248 kvm_arch_check_processor_compat, 2248 kvm_arch_check_processor_compat,
2249 &r, 1); 2249 &r, 1);
2250 if (r < 0) 2250 if (r < 0)
2251 goto out_free_1; 2251 goto out_free_1;
2252 } 2252 }
2253 2253
2254 r = register_cpu_notifier(&kvm_cpu_notifier); 2254 r = register_cpu_notifier(&kvm_cpu_notifier);
2255 if (r) 2255 if (r)
2256 goto out_free_2; 2256 goto out_free_2;
2257 register_reboot_notifier(&kvm_reboot_notifier); 2257 register_reboot_notifier(&kvm_reboot_notifier);
2258 2258
2259 r = sysdev_class_register(&kvm_sysdev_class); 2259 r = sysdev_class_register(&kvm_sysdev_class);
2260 if (r) 2260 if (r)
2261 goto out_free_3; 2261 goto out_free_3;
2262 2262
2263 r = sysdev_register(&kvm_sysdev); 2263 r = sysdev_register(&kvm_sysdev);
2264 if (r) 2264 if (r)
2265 goto out_free_4; 2265 goto out_free_4;
2266 2266
2267 /* A kmem cache lets us meet the alignment requirements of fx_save. */ 2267 /* A kmem cache lets us meet the alignment requirements of fx_save. */
2268 if (!vcpu_align) 2268 if (!vcpu_align)
2269 vcpu_align = __alignof__(struct kvm_vcpu); 2269 vcpu_align = __alignof__(struct kvm_vcpu);
2270 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, 2270 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
2271 0, NULL); 2271 0, NULL);
2272 if (!kvm_vcpu_cache) { 2272 if (!kvm_vcpu_cache) {
2273 r = -ENOMEM; 2273 r = -ENOMEM;
2274 goto out_free_5; 2274 goto out_free_5;
2275 } 2275 }
2276 2276
2277 kvm_chardev_ops.owner = module; 2277 kvm_chardev_ops.owner = module;
2278 kvm_vm_fops.owner = module; 2278 kvm_vm_fops.owner = module;
2279 kvm_vcpu_fops.owner = module; 2279 kvm_vcpu_fops.owner = module;
2280 2280
2281 r = misc_register(&kvm_dev); 2281 r = misc_register(&kvm_dev);
2282 if (r) { 2282 if (r) {
2283 printk(KERN_ERR "kvm: misc device register failed\n"); 2283 printk(KERN_ERR "kvm: misc device register failed\n");
2284 goto out_free; 2284 goto out_free;
2285 } 2285 }
2286 2286
2287 kvm_preempt_ops.sched_in = kvm_sched_in; 2287 kvm_preempt_ops.sched_in = kvm_sched_in;
2288 kvm_preempt_ops.sched_out = kvm_sched_out; 2288 kvm_preempt_ops.sched_out = kvm_sched_out;
2289 2289
2290 kvm_init_debug(); 2290 kvm_init_debug();
2291 2291
2292 return 0; 2292 return 0;
2293 2293
2294 out_free: 2294 out_free:
2295 kmem_cache_destroy(kvm_vcpu_cache); 2295 kmem_cache_destroy(kvm_vcpu_cache);
2296 out_free_5: 2296 out_free_5:
2297 sysdev_unregister(&kvm_sysdev); 2297 sysdev_unregister(&kvm_sysdev);
2298 out_free_4: 2298 out_free_4:
2299 sysdev_class_unregister(&kvm_sysdev_class); 2299 sysdev_class_unregister(&kvm_sysdev_class);
2300 out_free_3: 2300 out_free_3:
2301 unregister_reboot_notifier(&kvm_reboot_notifier); 2301 unregister_reboot_notifier(&kvm_reboot_notifier);
2302 unregister_cpu_notifier(&kvm_cpu_notifier); 2302 unregister_cpu_notifier(&kvm_cpu_notifier);
2303 out_free_2: 2303 out_free_2:
2304 out_free_1: 2304 out_free_1:
2305 kvm_arch_hardware_unsetup(); 2305 kvm_arch_hardware_unsetup();
2306 out_free_0a: 2306 out_free_0a:
2307 free_cpumask_var(cpus_hardware_enabled); 2307 free_cpumask_var(cpus_hardware_enabled);
2308 out_free_0: 2308 out_free_0:
2309 if (hwpoison_page) 2309 if (hwpoison_page)
2310 __free_page(hwpoison_page); 2310 __free_page(hwpoison_page);
2311 __free_page(bad_page); 2311 __free_page(bad_page);
2312 out: 2312 out:
2313 kvm_arch_exit(); 2313 kvm_arch_exit();
2314 out_fail: 2314 out_fail:
2315 return r; 2315 return r;
2316 } 2316 }
2317 EXPORT_SYMBOL_GPL(kvm_init); 2317 EXPORT_SYMBOL_GPL(kvm_init);
2318 2318
2319 void kvm_exit(void) 2319 void kvm_exit(void)
2320 { 2320 {
2321 kvm_exit_debug(); 2321 kvm_exit_debug();
2322 misc_deregister(&kvm_dev); 2322 misc_deregister(&kvm_dev);
2323 kmem_cache_destroy(kvm_vcpu_cache); 2323 kmem_cache_destroy(kvm_vcpu_cache);
2324 sysdev_unregister(&kvm_sysdev); 2324 sysdev_unregister(&kvm_sysdev);
2325 sysdev_class_unregister(&kvm_sysdev_class); 2325 sysdev_class_unregister(&kvm_sysdev_class);
2326 unregister_reboot_notifier(&kvm_reboot_notifier); 2326 unregister_reboot_notifier(&kvm_reboot_notifier);
2327 unregister_cpu_notifier(&kvm_cpu_notifier); 2327 unregister_cpu_notifier(&kvm_cpu_notifier);
2328 on_each_cpu(hardware_disable, NULL, 1); 2328 on_each_cpu(hardware_disable, NULL, 1);
2329 kvm_arch_hardware_unsetup(); 2329 kvm_arch_hardware_unsetup();
2330 kvm_arch_exit(); 2330 kvm_arch_exit();
2331 free_cpumask_var(cpus_hardware_enabled); 2331 free_cpumask_var(cpus_hardware_enabled);
2332 __free_page(hwpoison_page); 2332 __free_page(hwpoison_page);
2333 __free_page(bad_page); 2333 __free_page(bad_page);
2334 } 2334 }
2335 EXPORT_SYMBOL_GPL(kvm_exit); 2335 EXPORT_SYMBOL_GPL(kvm_exit);
2336 2336