Commit 02512b2bd63385d1f34f6956860dedbfc9ac20d7

Authored by Paolo Bonzini

Merge tag 'kvm-arm-fixes-3.19-2' of git://git.kernel.org/pub/scm/linux/kernel/gi…

…t/kvmarm/kvmarm into kvm-master

Second round of fixes for KVM/ARM for 3.19.

Fixes memory corruption issues on APM platforms and swapping issues on
DMA-coherent systems.

Showing 14 changed files Side-by-side Diff

arch/arm/include/asm/kvm_emulate.h
... ... @@ -38,6 +38,16 @@
38 38 vcpu->arch.hcr = HCR_GUEST_MASK;
39 39 }
40 40  
  41 +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
  42 +{
  43 + return vcpu->arch.hcr;
  44 +}
  45 +
  46 +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
  47 +{
  48 + vcpu->arch.hcr = hcr;
  49 +}
  50 +
41 51 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
42 52 {
43 53 return 1;
arch/arm/include/asm/kvm_host.h
... ... @@ -125,9 +125,6 @@
125 125 * Anything that is not used directly from assembly code goes
126 126 * here.
127 127 */
128   - /* dcache set/way operation pending */
129   - int last_pcpu;
130   - cpumask_t require_dcache_flush;
131 128  
132 129 /* Don't run the guest on this vcpu */
133 130 bool pause;
arch/arm/include/asm/kvm_mmu.h
... ... @@ -44,6 +44,7 @@
44 44  
45 45 #ifndef __ASSEMBLY__
46 46  
  47 +#include <linux/highmem.h>
47 48 #include <asm/cacheflush.h>
48 49 #include <asm/pgalloc.h>
49 50  
50 51  
... ... @@ -161,13 +162,10 @@
161 162 return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
162 163 }
163 164  
164   -static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
165   - unsigned long size,
166   - bool ipa_uncached)
  165 +static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
  166 + unsigned long size,
  167 + bool ipa_uncached)
167 168 {
168   - if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
169   - kvm_flush_dcache_to_poc((void *)hva, size);
170   -
171 169 /*
172 170 * If we are going to insert an instruction page and the icache is
173 171 * either VIPT or PIPT, there is a potential problem where the host
174 172  
175 173  
176 174  
... ... @@ -179,18 +177,77 @@
179 177 *
180 178 * VIVT caches are tagged using both the ASID and the VMID and doesn't
181 179 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
  180 + *
  181 + * We need to do this through a kernel mapping (using the
  182 + * user-space mapping has proved to be the wrong
  183 + * solution). For that, we need to kmap one page at a time,
  184 + * and iterate over the range.
182 185 */
183   - if (icache_is_pipt()) {
184   - __cpuc_coherent_user_range(hva, hva + size);
185   - } else if (!icache_is_vivt_asid_tagged()) {
  186 +
  187 + bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
  188 +
  189 + VM_BUG_ON(size & PAGE_MASK);
  190 +
  191 + if (!need_flush && !icache_is_pipt())
  192 + goto vipt_cache;
  193 +
  194 + while (size) {
  195 + void *va = kmap_atomic_pfn(pfn);
  196 +
  197 + if (need_flush)
  198 + kvm_flush_dcache_to_poc(va, PAGE_SIZE);
  199 +
  200 + if (icache_is_pipt())
  201 + __cpuc_coherent_user_range((unsigned long)va,
  202 + (unsigned long)va + PAGE_SIZE);
  203 +
  204 + size -= PAGE_SIZE;
  205 + pfn++;
  206 +
  207 + kunmap_atomic(va);
  208 + }
  209 +
  210 +vipt_cache:
  211 + if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
186 212 /* any kind of VIPT cache */
187 213 __flush_icache_all();
188 214 }
189 215 }
190 216  
  217 +static inline void __kvm_flush_dcache_pte(pte_t pte)
  218 +{
  219 + void *va = kmap_atomic(pte_page(pte));
  220 +
  221 + kvm_flush_dcache_to_poc(va, PAGE_SIZE);
  222 +
  223 + kunmap_atomic(va);
  224 +}
  225 +
  226 +static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
  227 +{
  228 + unsigned long size = PMD_SIZE;
  229 + pfn_t pfn = pmd_pfn(pmd);
  230 +
  231 + while (size) {
  232 + void *va = kmap_atomic_pfn(pfn);
  233 +
  234 + kvm_flush_dcache_to_poc(va, PAGE_SIZE);
  235 +
  236 + pfn++;
  237 + size -= PAGE_SIZE;
  238 +
  239 + kunmap_atomic(va);
  240 + }
  241 +}
  242 +
  243 +static inline void __kvm_flush_dcache_pud(pud_t pud)
  244 +{
  245 +}
  246 +
191 247 #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x))
192 248  
193   -void stage2_flush_vm(struct kvm *kvm);
  249 +void kvm_set_way_flush(struct kvm_vcpu *vcpu);
  250 +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
194 251  
195 252 #endif /* !__ASSEMBLY__ */
196 253  
... ... @@ -281,15 +281,6 @@
281 281 vcpu->cpu = cpu;
282 282 vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
283 283  
284   - /*
285   - * Check whether this vcpu requires the cache to be flushed on
286   - * this physical CPU. This is a consequence of doing dcache
287   - * operations by set/way on this vcpu. We do it here to be in
288   - * a non-preemptible section.
289   - */
290   - if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
291   - flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
292   -
293 284 kvm_arm_set_running_vcpu(vcpu);
294 285 }
295 286  
... ... @@ -541,7 +532,6 @@
541 532 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
542 533  
543 534 vcpu->mode = OUTSIDE_GUEST_MODE;
544   - vcpu->arch.last_pcpu = smp_processor_id();
545 535 kvm_guest_exit();
546 536 trace_kvm_exit(*vcpu_pc(vcpu));
547 537 /*
arch/arm/kvm/coproc.c
... ... @@ -189,82 +189,40 @@
189 189 return true;
190 190 }
191 191  
192   -/* See note at ARM ARM B1.14.4 */
  192 +/*
  193 + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
  194 + */
193 195 static bool access_dcsw(struct kvm_vcpu *vcpu,
194 196 const struct coproc_params *p,
195 197 const struct coproc_reg *r)
196 198 {
197   - unsigned long val;
198   - int cpu;
199   -
200 199 if (!p->is_write)
201 200 return read_from_write_only(vcpu, p);
202 201  
203   - cpu = get_cpu();
204   -
205   - cpumask_setall(&vcpu->arch.require_dcache_flush);
206   - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
207   -
208   - /* If we were already preempted, take the long way around */
209   - if (cpu != vcpu->arch.last_pcpu) {
210   - flush_cache_all();
211   - goto done;
212   - }
213   -
214   - val = *vcpu_reg(vcpu, p->Rt1);
215   -
216   - switch (p->CRm) {
217   - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
218   - case 14: /* DCCISW */
219   - asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
220   - break;
221   -
222   - case 10: /* DCCSW */
223   - asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
224   - break;
225   - }
226   -
227   -done:
228   - put_cpu();
229   -
  202 + kvm_set_way_flush(vcpu);
230 203 return true;
231 204 }
232 205  
233 206 /*
234 207 * Generic accessor for VM registers. Only called as long as HCR_TVM
235   - * is set.
  208 + * is set. If the guest enables the MMU, we stop trapping the VM
  209 + * sys_regs and leave it in complete control of the caches.
  210 + *
  211 + * Used by the cpu-specific code.
236 212 */
237   -static bool access_vm_reg(struct kvm_vcpu *vcpu,
238   - const struct coproc_params *p,
239   - const struct coproc_reg *r)
  213 +bool access_vm_reg(struct kvm_vcpu *vcpu,
  214 + const struct coproc_params *p,
  215 + const struct coproc_reg *r)
240 216 {
  217 + bool was_enabled = vcpu_has_cache_enabled(vcpu);
  218 +
241 219 BUG_ON(!p->is_write);
242 220  
243 221 vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
244 222 if (p->is_64bit)
245 223 vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
246 224  
247   - return true;
248   -}
249   -
250   -/*
251   - * SCTLR accessor. Only called as long as HCR_TVM is set. If the
252   - * guest enables the MMU, we stop trapping the VM sys_regs and leave
253   - * it in complete control of the caches.
254   - *
255   - * Used by the cpu-specific code.
256   - */
257   -bool access_sctlr(struct kvm_vcpu *vcpu,
258   - const struct coproc_params *p,
259   - const struct coproc_reg *r)
260   -{
261   - access_vm_reg(vcpu, p, r);
262   -
263   - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
264   - vcpu->arch.hcr &= ~HCR_TVM;
265   - stage2_flush_vm(vcpu->kvm);
266   - }
267   -
  225 + kvm_toggle_cache(vcpu, was_enabled);
268 226 return true;
269 227 }
270 228  
arch/arm/kvm/coproc.h
... ... @@ -153,9 +153,9 @@
153 153 #define is64 .is_64 = true
154 154 #define is32 .is_64 = false
155 155  
156   -bool access_sctlr(struct kvm_vcpu *vcpu,
157   - const struct coproc_params *p,
158   - const struct coproc_reg *r);
  156 +bool access_vm_reg(struct kvm_vcpu *vcpu,
  157 + const struct coproc_params *p,
  158 + const struct coproc_reg *r);
159 159  
160 160 #endif /* __ARM_KVM_COPROC_LOCAL_H__ */
arch/arm/kvm/coproc_a15.c
... ... @@ -34,7 +34,7 @@
34 34 static const struct coproc_reg a15_regs[] = {
35 35 /* SCTLR: swapped by interrupt.S. */
36 36 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
37   - access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
  37 + access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 },
38 38 };
39 39  
40 40 static struct kvm_coproc_target_table a15_target_table = {
arch/arm/kvm/coproc_a7.c
... ... @@ -37,7 +37,7 @@
37 37 static const struct coproc_reg a7_regs[] = {
38 38 /* SCTLR: swapped by interrupt.S. */
39 39 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
40   - access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
  40 + access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 },
41 41 };
42 42  
43 43 static struct kvm_coproc_target_table a7_target_table = {
... ... @@ -58,6 +58,26 @@
58 58 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
59 59 }
60 60  
  61 +/*
  62 + * D-Cache management functions. They take the page table entries by
  63 + * value, as they are flushing the cache using the kernel mapping (or
  64 + * kmap on 32bit).
  65 + */
  66 +static void kvm_flush_dcache_pte(pte_t pte)
  67 +{
  68 + __kvm_flush_dcache_pte(pte);
  69 +}
  70 +
  71 +static void kvm_flush_dcache_pmd(pmd_t pmd)
  72 +{
  73 + __kvm_flush_dcache_pmd(pmd);
  74 +}
  75 +
  76 +static void kvm_flush_dcache_pud(pud_t pud)
  77 +{
  78 + __kvm_flush_dcache_pud(pud);
  79 +}
  80 +
61 81 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
62 82 int min, int max)
63 83 {
... ... @@ -119,6 +139,26 @@
119 139 put_page(virt_to_page(pmd));
120 140 }
121 141  
  142 +/*
  143 + * Unmapping vs dcache management:
  144 + *
  145 + * If a guest maps certain memory pages as uncached, all writes will
  146 + * bypass the data cache and go directly to RAM. However, the CPUs
  147 + * can still speculate reads (not writes) and fill cache lines with
  148 + * data.
  149 + *
  150 + * Those cache lines will be *clean* cache lines though, so a
  151 + * clean+invalidate operation is equivalent to an invalidate
  152 + * operation, because no cache lines are marked dirty.
  153 + *
  154 + * Those clean cache lines could be filled prior to an uncached write
  155 + * by the guest, and the cache coherent IO subsystem would therefore
  156 + * end up writing old data to disk.
  157 + *
  158 + * This is why right after unmapping a page/section and invalidating
  159 + * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
  160 + * the IO subsystem will never hit in the cache.
  161 + */
122 162 static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
123 163 phys_addr_t addr, phys_addr_t end)
124 164 {
125 165  
126 166  
... ... @@ -128,9 +168,16 @@
128 168 start_pte = pte = pte_offset_kernel(pmd, addr);
129 169 do {
130 170 if (!pte_none(*pte)) {
  171 + pte_t old_pte = *pte;
  172 +
131 173 kvm_set_pte(pte, __pte(0));
132   - put_page(virt_to_page(pte));
133 174 kvm_tlb_flush_vmid_ipa(kvm, addr);
  175 +
  176 + /* No need to invalidate the cache for device mappings */
  177 + if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
  178 + kvm_flush_dcache_pte(old_pte);
  179 +
  180 + put_page(virt_to_page(pte));
134 181 }
135 182 } while (pte++, addr += PAGE_SIZE, addr != end);
136 183  
137 184  
... ... @@ -149,8 +196,13 @@
149 196 next = kvm_pmd_addr_end(addr, end);
150 197 if (!pmd_none(*pmd)) {
151 198 if (kvm_pmd_huge(*pmd)) {
  199 + pmd_t old_pmd = *pmd;
  200 +
152 201 pmd_clear(pmd);
153 202 kvm_tlb_flush_vmid_ipa(kvm, addr);
  203 +
  204 + kvm_flush_dcache_pmd(old_pmd);
  205 +
154 206 put_page(virt_to_page(pmd));
155 207 } else {
156 208 unmap_ptes(kvm, pmd, addr, next);
157 209  
... ... @@ -173,8 +225,13 @@
173 225 next = kvm_pud_addr_end(addr, end);
174 226 if (!pud_none(*pud)) {
175 227 if (pud_huge(*pud)) {
  228 + pud_t old_pud = *pud;
  229 +
176 230 pud_clear(pud);
177 231 kvm_tlb_flush_vmid_ipa(kvm, addr);
  232 +
  233 + kvm_flush_dcache_pud(old_pud);
  234 +
178 235 put_page(virt_to_page(pud));
179 236 } else {
180 237 unmap_pmds(kvm, pud, addr, next);
... ... @@ -209,10 +266,9 @@
209 266  
210 267 pte = pte_offset_kernel(pmd, addr);
211 268 do {
212   - if (!pte_none(*pte)) {
213   - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
214   - kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
215   - }
  269 + if (!pte_none(*pte) &&
  270 + (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
  271 + kvm_flush_dcache_pte(*pte);
216 272 } while (pte++, addr += PAGE_SIZE, addr != end);
217 273 }
218 274  
219 275  
... ... @@ -226,12 +282,10 @@
226 282 do {
227 283 next = kvm_pmd_addr_end(addr, end);
228 284 if (!pmd_none(*pmd)) {
229   - if (kvm_pmd_huge(*pmd)) {
230   - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
231   - kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
232   - } else {
  285 + if (kvm_pmd_huge(*pmd))
  286 + kvm_flush_dcache_pmd(*pmd);
  287 + else
233 288 stage2_flush_ptes(kvm, pmd, addr, next);
234   - }
235 289 }
236 290 } while (pmd++, addr = next, addr != end);
237 291 }
238 292  
... ... @@ -246,12 +300,10 @@
246 300 do {
247 301 next = kvm_pud_addr_end(addr, end);
248 302 if (!pud_none(*pud)) {
249   - if (pud_huge(*pud)) {
250   - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
251   - kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
252   - } else {
  303 + if (pud_huge(*pud))
  304 + kvm_flush_dcache_pud(*pud);
  305 + else
253 306 stage2_flush_pmds(kvm, pud, addr, next);
254   - }
255 307 }
256 308 } while (pud++, addr = next, addr != end);
257 309 }
... ... @@ -278,7 +330,7 @@
278 330 * Go through the stage 2 page tables and invalidate any cache lines
279 331 * backing memory already mapped to the VM.
280 332 */
281   -void stage2_flush_vm(struct kvm *kvm)
  333 +static void stage2_flush_vm(struct kvm *kvm)
282 334 {
283 335 struct kvm_memslots *slots;
284 336 struct kvm_memory_slot *memslot;
... ... @@ -905,6 +957,12 @@
905 957 return !pfn_valid(pfn);
906 958 }
907 959  
  960 +static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
  961 + unsigned long size, bool uncached)
  962 +{
  963 + __coherent_cache_guest_page(vcpu, pfn, size, uncached);
  964 +}
  965 +
908 966 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
909 967 struct kvm_memory_slot *memslot, unsigned long hva,
910 968 unsigned long fault_status)
... ... @@ -994,8 +1052,7 @@
994 1052 kvm_set_s2pmd_writable(&new_pmd);
995 1053 kvm_set_pfn_dirty(pfn);
996 1054 }
997   - coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
998   - fault_ipa_uncached);
  1055 + coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
999 1056 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1000 1057 } else {
1001 1058 pte_t new_pte = pfn_pte(pfn, mem_type);
... ... @@ -1003,8 +1060,7 @@
1003 1060 kvm_set_s2pte_writable(&new_pte);
1004 1061 kvm_set_pfn_dirty(pfn);
1005 1062 }
1006   - coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
1007   - fault_ipa_uncached);
  1063 + coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
1008 1064 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
1009 1065 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
1010 1066 }
... ... @@ -1410,5 +1466,73 @@
1410 1466 spin_lock(&kvm->mmu_lock);
1411 1467 unmap_stage2_range(kvm, gpa, size);
1412 1468 spin_unlock(&kvm->mmu_lock);
  1469 +}
  1470 +
  1471 +/*
  1472 + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
  1473 + *
  1474 + * Main problems:
  1475 + * - S/W ops are local to a CPU (not broadcast)
  1476 + * - We have line migration behind our back (speculation)
  1477 + * - System caches don't support S/W at all (damn!)
  1478 + *
  1479 + * In the face of the above, the best we can do is to try and convert
  1480 + * S/W ops to VA ops. Because the guest is not allowed to infer the
  1481 + * S/W to PA mapping, it can only use S/W to nuke the whole cache,
  1482 + * which is a rather good thing for us.
  1483 + *
  1484 + * Also, it is only used when turning caches on/off ("The expected
  1485 + * usage of the cache maintenance instructions that operate by set/way
  1486 + * is associated with the cache maintenance instructions associated
  1487 + * with the powerdown and powerup of caches, if this is required by
  1488 + * the implementation.").
  1489 + *
  1490 + * We use the following policy:
  1491 + *
  1492 + * - If we trap a S/W operation, we enable VM trapping to detect
  1493 + * caches being turned on/off, and do a full clean.
  1494 + *
  1495 + * - We flush the caches on both caches being turned on and off.
  1496 + *
  1497 + * - Once the caches are enabled, we stop trapping VM ops.
  1498 + */
  1499 +void kvm_set_way_flush(struct kvm_vcpu *vcpu)
  1500 +{
  1501 + unsigned long hcr = vcpu_get_hcr(vcpu);
  1502 +
  1503 + /*
  1504 + * If this is the first time we do a S/W operation
  1505 + * (i.e. HCR_TVM not set) flush the whole memory, and set the
  1506 + * VM trapping.
  1507 + *
  1508 + * Otherwise, rely on the VM trapping to wait for the MMU +
  1509 + * Caches to be turned off. At that point, we'll be able to
  1510 + * clean the caches again.
  1511 + */
  1512 + if (!(hcr & HCR_TVM)) {
  1513 + trace_kvm_set_way_flush(*vcpu_pc(vcpu),
  1514 + vcpu_has_cache_enabled(vcpu));
  1515 + stage2_flush_vm(vcpu->kvm);
  1516 + vcpu_set_hcr(vcpu, hcr | HCR_TVM);
  1517 + }
  1518 +}
  1519 +
  1520 +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
  1521 +{
  1522 + bool now_enabled = vcpu_has_cache_enabled(vcpu);
  1523 +
  1524 + /*
  1525 + * If switching the MMU+caches on, need to invalidate the caches.
  1526 + * If switching it off, need to clean the caches.
  1527 + * Clean + invalidate does the trick always.
  1528 + */
  1529 + if (now_enabled != was_enabled)
  1530 + stage2_flush_vm(vcpu->kvm);
  1531 +
  1532 + /* Caches are now on, stop trapping VM ops (until a S/W op) */
  1533 + if (now_enabled)
  1534 + vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
  1535 +
  1536 + trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
1413 1537 }
arch/arm/kvm/trace.h
... ... @@ -223,6 +223,45 @@
223 223 __entry->vcpu_pc, __entry->r0, __entry->imm)
224 224 );
225 225  
  226 +TRACE_EVENT(kvm_set_way_flush,
  227 + TP_PROTO(unsigned long vcpu_pc, bool cache),
  228 + TP_ARGS(vcpu_pc, cache),
  229 +
  230 + TP_STRUCT__entry(
  231 + __field( unsigned long, vcpu_pc )
  232 + __field( bool, cache )
  233 + ),
  234 +
  235 + TP_fast_assign(
  236 + __entry->vcpu_pc = vcpu_pc;
  237 + __entry->cache = cache;
  238 + ),
  239 +
  240 + TP_printk("S/W flush at 0x%016lx (cache %s)",
  241 + __entry->vcpu_pc, __entry->cache ? "on" : "off")
  242 +);
  243 +
  244 +TRACE_EVENT(kvm_toggle_cache,
  245 + TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
  246 + TP_ARGS(vcpu_pc, was, now),
  247 +
  248 + TP_STRUCT__entry(
  249 + __field( unsigned long, vcpu_pc )
  250 + __field( bool, was )
  251 + __field( bool, now )
  252 + ),
  253 +
  254 + TP_fast_assign(
  255 + __entry->vcpu_pc = vcpu_pc;
  256 + __entry->was = was;
  257 + __entry->now = now;
  258 + ),
  259 +
  260 + TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
  261 + __entry->vcpu_pc, __entry->was ? "on" : "off",
  262 + __entry->now ? "on" : "off")
  263 +);
  264 +
226 265 #endif /* _TRACE_KVM_H */
227 266  
228 267 #undef TRACE_INCLUDE_PATH
arch/arm64/include/asm/kvm_emulate.h
... ... @@ -45,6 +45,16 @@
45 45 vcpu->arch.hcr_el2 &= ~HCR_RW;
46 46 }
47 47  
  48 +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
  49 +{
  50 + return vcpu->arch.hcr_el2;
  51 +}
  52 +
  53 +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
  54 +{
  55 + vcpu->arch.hcr_el2 = hcr;
  56 +}
  57 +
48 58 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
49 59 {
50 60 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
arch/arm64/include/asm/kvm_host.h
... ... @@ -116,9 +116,6 @@
116 116 * Anything that is not used directly from assembly code goes
117 117 * here.
118 118 */
119   - /* dcache set/way operation pending */
120   - int last_pcpu;
121   - cpumask_t require_dcache_flush;
122 119  
123 120 /* Don't run the guest */
124 121 bool pause;
arch/arm64/include/asm/kvm_mmu.h
... ... @@ -243,24 +243,46 @@
243 243 return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
244 244 }
245 245  
246   -static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
247   - unsigned long size,
248   - bool ipa_uncached)
  246 +static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
  247 + unsigned long size,
  248 + bool ipa_uncached)
249 249 {
  250 + void *va = page_address(pfn_to_page(pfn));
  251 +
250 252 if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
251   - kvm_flush_dcache_to_poc((void *)hva, size);
  253 + kvm_flush_dcache_to_poc(va, size);
252 254  
253 255 if (!icache_is_aliasing()) { /* PIPT */
254   - flush_icache_range(hva, hva + size);
  256 + flush_icache_range((unsigned long)va,
  257 + (unsigned long)va + size);
255 258 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
256 259 /* any kind of VIPT cache */
257 260 __flush_icache_all();
258 261 }
259 262 }
260 263  
  264 +static inline void __kvm_flush_dcache_pte(pte_t pte)
  265 +{
  266 + struct page *page = pte_page(pte);
  267 + kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
  268 +}
  269 +
  270 +static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
  271 +{
  272 + struct page *page = pmd_page(pmd);
  273 + kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
  274 +}
  275 +
  276 +static inline void __kvm_flush_dcache_pud(pud_t pud)
  277 +{
  278 + struct page *page = pud_page(pud);
  279 + kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
  280 +}
  281 +
261 282 #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x))
262 283  
263   -void stage2_flush_vm(struct kvm *kvm);
  284 +void kvm_set_way_flush(struct kvm_vcpu *vcpu);
  285 +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
264 286  
265 287 #endif /* __ASSEMBLY__ */
266 288 #endif /* __ARM64_KVM_MMU_H__ */
arch/arm64/kvm/sys_regs.c
... ... @@ -69,68 +69,31 @@
69 69 return ccsidr;
70 70 }
71 71  
72   -static void do_dc_cisw(u32 val)
73   -{
74   - asm volatile("dc cisw, %x0" : : "r" (val));
75   - dsb(ish);
76   -}
77   -
78   -static void do_dc_csw(u32 val)
79   -{
80   - asm volatile("dc csw, %x0" : : "r" (val));
81   - dsb(ish);
82   -}
83   -
84   -/* See note at ARM ARM B1.14.4 */
  72 +/*
  73 + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
  74 + */
85 75 static bool access_dcsw(struct kvm_vcpu *vcpu,
86 76 const struct sys_reg_params *p,
87 77 const struct sys_reg_desc *r)
88 78 {
89   - unsigned long val;
90   - int cpu;
91   -
92 79 if (!p->is_write)
93 80 return read_from_write_only(vcpu, p);
94 81  
95   - cpu = get_cpu();
96   -
97   - cpumask_setall(&vcpu->arch.require_dcache_flush);
98   - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
99   -
100   - /* If we were already preempted, take the long way around */
101   - if (cpu != vcpu->arch.last_pcpu) {
102   - flush_cache_all();
103   - goto done;
104   - }
105   -
106   - val = *vcpu_reg(vcpu, p->Rt);
107   -
108   - switch (p->CRm) {
109   - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
110   - case 14: /* DCCISW */
111   - do_dc_cisw(val);
112   - break;
113   -
114   - case 10: /* DCCSW */
115   - do_dc_csw(val);
116   - break;
117   - }
118   -
119   -done:
120   - put_cpu();
121   -
  82 + kvm_set_way_flush(vcpu);
122 83 return true;
123 84 }
124 85  
125 86 /*
126 87 * Generic accessor for VM registers. Only called as long as HCR_TVM
127   - * is set.
  88 + * is set. If the guest enables the MMU, we stop trapping the VM
  89 + * sys_regs and leave it in complete control of the caches.
128 90 */
129 91 static bool access_vm_reg(struct kvm_vcpu *vcpu,
130 92 const struct sys_reg_params *p,
131 93 const struct sys_reg_desc *r)
132 94 {
133 95 unsigned long val;
  96 + bool was_enabled = vcpu_has_cache_enabled(vcpu);
134 97  
135 98 BUG_ON(!p->is_write);
136 99  
137 100  
... ... @@ -143,28 +106,10 @@
143 106 vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
144 107 }
145 108  
  109 + kvm_toggle_cache(vcpu, was_enabled);
146 110 return true;
147 111 }
148 112  
149   -/*
150   - * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the
151   - * guest enables the MMU, we stop trapping the VM sys_regs and leave
152   - * it in complete control of the caches.
153   - */
154   -static bool access_sctlr(struct kvm_vcpu *vcpu,
155   - const struct sys_reg_params *p,
156   - const struct sys_reg_desc *r)
157   -{
158   - access_vm_reg(vcpu, p, r);
159   -
160   - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
161   - vcpu->arch.hcr_el2 &= ~HCR_TVM;
162   - stage2_flush_vm(vcpu->kvm);
163   - }
164   -
165   - return true;
166   -}
167   -
168 113 static bool trap_raz_wi(struct kvm_vcpu *vcpu,
169 114 const struct sys_reg_params *p,
170 115 const struct sys_reg_desc *r)
... ... @@ -377,7 +322,7 @@
377 322 NULL, reset_mpidr, MPIDR_EL1 },
378 323 /* SCTLR_EL1 */
379 324 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
380   - access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
  325 + access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
381 326 /* CPACR_EL1 */
382 327 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
383 328 NULL, reset_val, CPACR_EL1, 0 },
... ... @@ -657,7 +602,7 @@
657 602 * register).
658 603 */
659 604 static const struct sys_reg_desc cp15_regs[] = {
660   - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
  605 + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
661 606 { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
662 607 { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
663 608 { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },