Commit 02512b2bd63385d1f34f6956860dedbfc9ac20d7
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
Merge tag 'kvm-arm-fixes-3.19-2' of git://git.kernel.org/pub/scm/linux/kernel/gi…
…t/kvmarm/kvmarm into kvm-master Second round of fixes for KVM/ARM for 3.19. Fixes memory corruption issues on APM platforms and swapping issues on DMA-coherent systems.
Showing 14 changed files Side-by-side Diff
- arch/arm/include/asm/kvm_emulate.h
- arch/arm/include/asm/kvm_host.h
- arch/arm/include/asm/kvm_mmu.h
- arch/arm/kvm/arm.c
- arch/arm/kvm/coproc.c
- arch/arm/kvm/coproc.h
- arch/arm/kvm/coproc_a15.c
- arch/arm/kvm/coproc_a7.c
- arch/arm/kvm/mmu.c
- arch/arm/kvm/trace.h
- arch/arm64/include/asm/kvm_emulate.h
- arch/arm64/include/asm/kvm_host.h
- arch/arm64/include/asm/kvm_mmu.h
- arch/arm64/kvm/sys_regs.c
arch/arm/include/asm/kvm_emulate.h
... | ... | @@ -38,6 +38,16 @@ |
38 | 38 | vcpu->arch.hcr = HCR_GUEST_MASK; |
39 | 39 | } |
40 | 40 | |
41 | +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) | |
42 | +{ | |
43 | + return vcpu->arch.hcr; | |
44 | +} | |
45 | + | |
46 | +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) | |
47 | +{ | |
48 | + vcpu->arch.hcr = hcr; | |
49 | +} | |
50 | + | |
41 | 51 | static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) |
42 | 52 | { |
43 | 53 | return 1; |
arch/arm/include/asm/kvm_host.h
... | ... | @@ -125,9 +125,6 @@ |
125 | 125 | * Anything that is not used directly from assembly code goes |
126 | 126 | * here. |
127 | 127 | */ |
128 | - /* dcache set/way operation pending */ | |
129 | - int last_pcpu; | |
130 | - cpumask_t require_dcache_flush; | |
131 | 128 | |
132 | 129 | /* Don't run the guest on this vcpu */ |
133 | 130 | bool pause; |
arch/arm/include/asm/kvm_mmu.h
... | ... | @@ -44,6 +44,7 @@ |
44 | 44 | |
45 | 45 | #ifndef __ASSEMBLY__ |
46 | 46 | |
47 | +#include <linux/highmem.h> | |
47 | 48 | #include <asm/cacheflush.h> |
48 | 49 | #include <asm/pgalloc.h> |
49 | 50 | |
50 | 51 | |
... | ... | @@ -161,13 +162,10 @@ |
161 | 162 | return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; |
162 | 163 | } |
163 | 164 | |
164 | -static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, | |
165 | - unsigned long size, | |
166 | - bool ipa_uncached) | |
165 | +static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, | |
166 | + unsigned long size, | |
167 | + bool ipa_uncached) | |
167 | 168 | { |
168 | - if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) | |
169 | - kvm_flush_dcache_to_poc((void *)hva, size); | |
170 | - | |
171 | 169 | /* |
172 | 170 | * If we are going to insert an instruction page and the icache is |
173 | 171 | * either VIPT or PIPT, there is a potential problem where the host |
174 | 172 | |
175 | 173 | |
176 | 174 | |
... | ... | @@ -179,18 +177,77 @@ |
179 | 177 | * |
180 | 178 | * VIVT caches are tagged using both the ASID and the VMID and doesn't |
181 | 179 | * need any kind of flushing (DDI 0406C.b - Page B3-1392). |
180 | + * | |
181 | + * We need to do this through a kernel mapping (using the | |
182 | + * user-space mapping has proved to be the wrong | |
183 | + * solution). For that, we need to kmap one page at a time, | |
184 | + * and iterate over the range. | |
182 | 185 | */ |
183 | - if (icache_is_pipt()) { | |
184 | - __cpuc_coherent_user_range(hva, hva + size); | |
185 | - } else if (!icache_is_vivt_asid_tagged()) { | |
186 | + | |
187 | + bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached; | |
188 | + | |
189 | + VM_BUG_ON(size & PAGE_MASK); | |
190 | + | |
191 | + if (!need_flush && !icache_is_pipt()) | |
192 | + goto vipt_cache; | |
193 | + | |
194 | + while (size) { | |
195 | + void *va = kmap_atomic_pfn(pfn); | |
196 | + | |
197 | + if (need_flush) | |
198 | + kvm_flush_dcache_to_poc(va, PAGE_SIZE); | |
199 | + | |
200 | + if (icache_is_pipt()) | |
201 | + __cpuc_coherent_user_range((unsigned long)va, | |
202 | + (unsigned long)va + PAGE_SIZE); | |
203 | + | |
204 | + size -= PAGE_SIZE; | |
205 | + pfn++; | |
206 | + | |
207 | + kunmap_atomic(va); | |
208 | + } | |
209 | + | |
210 | +vipt_cache: | |
211 | + if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { | |
186 | 212 | /* any kind of VIPT cache */ |
187 | 213 | __flush_icache_all(); |
188 | 214 | } |
189 | 215 | } |
190 | 216 | |
217 | +static inline void __kvm_flush_dcache_pte(pte_t pte) | |
218 | +{ | |
219 | + void *va = kmap_atomic(pte_page(pte)); | |
220 | + | |
221 | + kvm_flush_dcache_to_poc(va, PAGE_SIZE); | |
222 | + | |
223 | + kunmap_atomic(va); | |
224 | +} | |
225 | + | |
226 | +static inline void __kvm_flush_dcache_pmd(pmd_t pmd) | |
227 | +{ | |
228 | + unsigned long size = PMD_SIZE; | |
229 | + pfn_t pfn = pmd_pfn(pmd); | |
230 | + | |
231 | + while (size) { | |
232 | + void *va = kmap_atomic_pfn(pfn); | |
233 | + | |
234 | + kvm_flush_dcache_to_poc(va, PAGE_SIZE); | |
235 | + | |
236 | + pfn++; | |
237 | + size -= PAGE_SIZE; | |
238 | + | |
239 | + kunmap_atomic(va); | |
240 | + } | |
241 | +} | |
242 | + | |
243 | +static inline void __kvm_flush_dcache_pud(pud_t pud) | |
244 | +{ | |
245 | +} | |
246 | + | |
191 | 247 | #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) |
192 | 248 | |
193 | -void stage2_flush_vm(struct kvm *kvm); | |
249 | +void kvm_set_way_flush(struct kvm_vcpu *vcpu); | |
250 | +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); | |
194 | 251 | |
195 | 252 | #endif /* !__ASSEMBLY__ */ |
196 | 253 |
arch/arm/kvm/arm.c
... | ... | @@ -281,15 +281,6 @@ |
281 | 281 | vcpu->cpu = cpu; |
282 | 282 | vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); |
283 | 283 | |
284 | - /* | |
285 | - * Check whether this vcpu requires the cache to be flushed on | |
286 | - * this physical CPU. This is a consequence of doing dcache | |
287 | - * operations by set/way on this vcpu. We do it here to be in | |
288 | - * a non-preemptible section. | |
289 | - */ | |
290 | - if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush)) | |
291 | - flush_cache_all(); /* We'd really want v7_flush_dcache_all() */ | |
292 | - | |
293 | 284 | kvm_arm_set_running_vcpu(vcpu); |
294 | 285 | } |
295 | 286 | |
... | ... | @@ -541,7 +532,6 @@ |
541 | 532 | ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); |
542 | 533 | |
543 | 534 | vcpu->mode = OUTSIDE_GUEST_MODE; |
544 | - vcpu->arch.last_pcpu = smp_processor_id(); | |
545 | 535 | kvm_guest_exit(); |
546 | 536 | trace_kvm_exit(*vcpu_pc(vcpu)); |
547 | 537 | /* |
arch/arm/kvm/coproc.c
... | ... | @@ -189,82 +189,40 @@ |
189 | 189 | return true; |
190 | 190 | } |
191 | 191 | |
192 | -/* See note at ARM ARM B1.14.4 */ | |
192 | +/* | |
193 | + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). | |
194 | + */ | |
193 | 195 | static bool access_dcsw(struct kvm_vcpu *vcpu, |
194 | 196 | const struct coproc_params *p, |
195 | 197 | const struct coproc_reg *r) |
196 | 198 | { |
197 | - unsigned long val; | |
198 | - int cpu; | |
199 | - | |
200 | 199 | if (!p->is_write) |
201 | 200 | return read_from_write_only(vcpu, p); |
202 | 201 | |
203 | - cpu = get_cpu(); | |
204 | - | |
205 | - cpumask_setall(&vcpu->arch.require_dcache_flush); | |
206 | - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); | |
207 | - | |
208 | - /* If we were already preempted, take the long way around */ | |
209 | - if (cpu != vcpu->arch.last_pcpu) { | |
210 | - flush_cache_all(); | |
211 | - goto done; | |
212 | - } | |
213 | - | |
214 | - val = *vcpu_reg(vcpu, p->Rt1); | |
215 | - | |
216 | - switch (p->CRm) { | |
217 | - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ | |
218 | - case 14: /* DCCISW */ | |
219 | - asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val)); | |
220 | - break; | |
221 | - | |
222 | - case 10: /* DCCSW */ | |
223 | - asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val)); | |
224 | - break; | |
225 | - } | |
226 | - | |
227 | -done: | |
228 | - put_cpu(); | |
229 | - | |
202 | + kvm_set_way_flush(vcpu); | |
230 | 203 | return true; |
231 | 204 | } |
232 | 205 | |
233 | 206 | /* |
234 | 207 | * Generic accessor for VM registers. Only called as long as HCR_TVM |
235 | - * is set. | |
208 | + * is set. If the guest enables the MMU, we stop trapping the VM | |
209 | + * sys_regs and leave it in complete control of the caches. | |
210 | + * | |
211 | + * Used by the cpu-specific code. | |
236 | 212 | */ |
237 | -static bool access_vm_reg(struct kvm_vcpu *vcpu, | |
238 | - const struct coproc_params *p, | |
239 | - const struct coproc_reg *r) | |
213 | +bool access_vm_reg(struct kvm_vcpu *vcpu, | |
214 | + const struct coproc_params *p, | |
215 | + const struct coproc_reg *r) | |
240 | 216 | { |
217 | + bool was_enabled = vcpu_has_cache_enabled(vcpu); | |
218 | + | |
241 | 219 | BUG_ON(!p->is_write); |
242 | 220 | |
243 | 221 | vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); |
244 | 222 | if (p->is_64bit) |
245 | 223 | vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); |
246 | 224 | |
247 | - return true; | |
248 | -} | |
249 | - | |
250 | -/* | |
251 | - * SCTLR accessor. Only called as long as HCR_TVM is set. If the | |
252 | - * guest enables the MMU, we stop trapping the VM sys_regs and leave | |
253 | - * it in complete control of the caches. | |
254 | - * | |
255 | - * Used by the cpu-specific code. | |
256 | - */ | |
257 | -bool access_sctlr(struct kvm_vcpu *vcpu, | |
258 | - const struct coproc_params *p, | |
259 | - const struct coproc_reg *r) | |
260 | -{ | |
261 | - access_vm_reg(vcpu, p, r); | |
262 | - | |
263 | - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ | |
264 | - vcpu->arch.hcr &= ~HCR_TVM; | |
265 | - stage2_flush_vm(vcpu->kvm); | |
266 | - } | |
267 | - | |
225 | + kvm_toggle_cache(vcpu, was_enabled); | |
268 | 226 | return true; |
269 | 227 | } |
270 | 228 |
arch/arm/kvm/coproc.h
... | ... | @@ -153,9 +153,9 @@ |
153 | 153 | #define is64 .is_64 = true |
154 | 154 | #define is32 .is_64 = false |
155 | 155 | |
156 | -bool access_sctlr(struct kvm_vcpu *vcpu, | |
157 | - const struct coproc_params *p, | |
158 | - const struct coproc_reg *r); | |
156 | +bool access_vm_reg(struct kvm_vcpu *vcpu, | |
157 | + const struct coproc_params *p, | |
158 | + const struct coproc_reg *r); | |
159 | 159 | |
160 | 160 | #endif /* __ARM_KVM_COPROC_LOCAL_H__ */ |
arch/arm/kvm/coproc_a15.c
... | ... | @@ -34,7 +34,7 @@ |
34 | 34 | static const struct coproc_reg a15_regs[] = { |
35 | 35 | /* SCTLR: swapped by interrupt.S. */ |
36 | 36 | { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, |
37 | - access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, | |
37 | + access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 }, | |
38 | 38 | }; |
39 | 39 | |
40 | 40 | static struct kvm_coproc_target_table a15_target_table = { |
arch/arm/kvm/coproc_a7.c
... | ... | @@ -37,7 +37,7 @@ |
37 | 37 | static const struct coproc_reg a7_regs[] = { |
38 | 38 | /* SCTLR: swapped by interrupt.S. */ |
39 | 39 | { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, |
40 | - access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, | |
40 | + access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 }, | |
41 | 41 | }; |
42 | 42 | |
43 | 43 | static struct kvm_coproc_target_table a7_target_table = { |
arch/arm/kvm/mmu.c
... | ... | @@ -58,6 +58,26 @@ |
58 | 58 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); |
59 | 59 | } |
60 | 60 | |
61 | +/* | |
62 | + * D-Cache management functions. They take the page table entries by | |
63 | + * value, as they are flushing the cache using the kernel mapping (or | |
64 | + * kmap on 32bit). | |
65 | + */ | |
66 | +static void kvm_flush_dcache_pte(pte_t pte) | |
67 | +{ | |
68 | + __kvm_flush_dcache_pte(pte); | |
69 | +} | |
70 | + | |
71 | +static void kvm_flush_dcache_pmd(pmd_t pmd) | |
72 | +{ | |
73 | + __kvm_flush_dcache_pmd(pmd); | |
74 | +} | |
75 | + | |
76 | +static void kvm_flush_dcache_pud(pud_t pud) | |
77 | +{ | |
78 | + __kvm_flush_dcache_pud(pud); | |
79 | +} | |
80 | + | |
61 | 81 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
62 | 82 | int min, int max) |
63 | 83 | { |
... | ... | @@ -119,6 +139,26 @@ |
119 | 139 | put_page(virt_to_page(pmd)); |
120 | 140 | } |
121 | 141 | |
142 | +/* | |
143 | + * Unmapping vs dcache management: | |
144 | + * | |
145 | + * If a guest maps certain memory pages as uncached, all writes will | |
146 | + * bypass the data cache and go directly to RAM. However, the CPUs | |
147 | + * can still speculate reads (not writes) and fill cache lines with | |
148 | + * data. | |
149 | + * | |
150 | + * Those cache lines will be *clean* cache lines though, so a | |
151 | + * clean+invalidate operation is equivalent to an invalidate | |
152 | + * operation, because no cache lines are marked dirty. | |
153 | + * | |
154 | + * Those clean cache lines could be filled prior to an uncached write | |
155 | + * by the guest, and the cache coherent IO subsystem would therefore | |
156 | + * end up writing old data to disk. | |
157 | + * | |
158 | + * This is why right after unmapping a page/section and invalidating | |
159 | + * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure | |
160 | + * the IO subsystem will never hit in the cache. | |
161 | + */ | |
122 | 162 | static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, |
123 | 163 | phys_addr_t addr, phys_addr_t end) |
124 | 164 | { |
125 | 165 | |
126 | 166 | |
... | ... | @@ -128,9 +168,16 @@ |
128 | 168 | start_pte = pte = pte_offset_kernel(pmd, addr); |
129 | 169 | do { |
130 | 170 | if (!pte_none(*pte)) { |
171 | + pte_t old_pte = *pte; | |
172 | + | |
131 | 173 | kvm_set_pte(pte, __pte(0)); |
132 | - put_page(virt_to_page(pte)); | |
133 | 174 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
175 | + | |
176 | + /* No need to invalidate the cache for device mappings */ | |
177 | + if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) | |
178 | + kvm_flush_dcache_pte(old_pte); | |
179 | + | |
180 | + put_page(virt_to_page(pte)); | |
134 | 181 | } |
135 | 182 | } while (pte++, addr += PAGE_SIZE, addr != end); |
136 | 183 | |
137 | 184 | |
... | ... | @@ -149,8 +196,13 @@ |
149 | 196 | next = kvm_pmd_addr_end(addr, end); |
150 | 197 | if (!pmd_none(*pmd)) { |
151 | 198 | if (kvm_pmd_huge(*pmd)) { |
199 | + pmd_t old_pmd = *pmd; | |
200 | + | |
152 | 201 | pmd_clear(pmd); |
153 | 202 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
203 | + | |
204 | + kvm_flush_dcache_pmd(old_pmd); | |
205 | + | |
154 | 206 | put_page(virt_to_page(pmd)); |
155 | 207 | } else { |
156 | 208 | unmap_ptes(kvm, pmd, addr, next); |
157 | 209 | |
... | ... | @@ -173,8 +225,13 @@ |
173 | 225 | next = kvm_pud_addr_end(addr, end); |
174 | 226 | if (!pud_none(*pud)) { |
175 | 227 | if (pud_huge(*pud)) { |
228 | + pud_t old_pud = *pud; | |
229 | + | |
176 | 230 | pud_clear(pud); |
177 | 231 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
232 | + | |
233 | + kvm_flush_dcache_pud(old_pud); | |
234 | + | |
178 | 235 | put_page(virt_to_page(pud)); |
179 | 236 | } else { |
180 | 237 | unmap_pmds(kvm, pud, addr, next); |
... | ... | @@ -209,10 +266,9 @@ |
209 | 266 | |
210 | 267 | pte = pte_offset_kernel(pmd, addr); |
211 | 268 | do { |
212 | - if (!pte_none(*pte)) { | |
213 | - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); | |
214 | - kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); | |
215 | - } | |
269 | + if (!pte_none(*pte) && | |
270 | + (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) | |
271 | + kvm_flush_dcache_pte(*pte); | |
216 | 272 | } while (pte++, addr += PAGE_SIZE, addr != end); |
217 | 273 | } |
218 | 274 | |
219 | 275 | |
... | ... | @@ -226,12 +282,10 @@ |
226 | 282 | do { |
227 | 283 | next = kvm_pmd_addr_end(addr, end); |
228 | 284 | if (!pmd_none(*pmd)) { |
229 | - if (kvm_pmd_huge(*pmd)) { | |
230 | - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); | |
231 | - kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); | |
232 | - } else { | |
285 | + if (kvm_pmd_huge(*pmd)) | |
286 | + kvm_flush_dcache_pmd(*pmd); | |
287 | + else | |
233 | 288 | stage2_flush_ptes(kvm, pmd, addr, next); |
234 | - } | |
235 | 289 | } |
236 | 290 | } while (pmd++, addr = next, addr != end); |
237 | 291 | } |
238 | 292 | |
... | ... | @@ -246,12 +300,10 @@ |
246 | 300 | do { |
247 | 301 | next = kvm_pud_addr_end(addr, end); |
248 | 302 | if (!pud_none(*pud)) { |
249 | - if (pud_huge(*pud)) { | |
250 | - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); | |
251 | - kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); | |
252 | - } else { | |
303 | + if (pud_huge(*pud)) | |
304 | + kvm_flush_dcache_pud(*pud); | |
305 | + else | |
253 | 306 | stage2_flush_pmds(kvm, pud, addr, next); |
254 | - } | |
255 | 307 | } |
256 | 308 | } while (pud++, addr = next, addr != end); |
257 | 309 | } |
... | ... | @@ -278,7 +330,7 @@ |
278 | 330 | * Go through the stage 2 page tables and invalidate any cache lines |
279 | 331 | * backing memory already mapped to the VM. |
280 | 332 | */ |
281 | -void stage2_flush_vm(struct kvm *kvm) | |
333 | +static void stage2_flush_vm(struct kvm *kvm) | |
282 | 334 | { |
283 | 335 | struct kvm_memslots *slots; |
284 | 336 | struct kvm_memory_slot *memslot; |
... | ... | @@ -905,6 +957,12 @@ |
905 | 957 | return !pfn_valid(pfn); |
906 | 958 | } |
907 | 959 | |
960 | +static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, | |
961 | + unsigned long size, bool uncached) | |
962 | +{ | |
963 | + __coherent_cache_guest_page(vcpu, pfn, size, uncached); | |
964 | +} | |
965 | + | |
908 | 966 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, |
909 | 967 | struct kvm_memory_slot *memslot, unsigned long hva, |
910 | 968 | unsigned long fault_status) |
... | ... | @@ -994,8 +1052,7 @@ |
994 | 1052 | kvm_set_s2pmd_writable(&new_pmd); |
995 | 1053 | kvm_set_pfn_dirty(pfn); |
996 | 1054 | } |
997 | - coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, | |
998 | - fault_ipa_uncached); | |
1055 | + coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); | |
999 | 1056 | ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); |
1000 | 1057 | } else { |
1001 | 1058 | pte_t new_pte = pfn_pte(pfn, mem_type); |
... | ... | @@ -1003,8 +1060,7 @@ |
1003 | 1060 | kvm_set_s2pte_writable(&new_pte); |
1004 | 1061 | kvm_set_pfn_dirty(pfn); |
1005 | 1062 | } |
1006 | - coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, | |
1007 | - fault_ipa_uncached); | |
1063 | + coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); | |
1008 | 1064 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, |
1009 | 1065 | pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); |
1010 | 1066 | } |
... | ... | @@ -1410,5 +1466,73 @@ |
1410 | 1466 | spin_lock(&kvm->mmu_lock); |
1411 | 1467 | unmap_stage2_range(kvm, gpa, size); |
1412 | 1468 | spin_unlock(&kvm->mmu_lock); |
1469 | +} | |
1470 | + | |
1471 | +/* | |
1472 | + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). | |
1473 | + * | |
1474 | + * Main problems: | |
1475 | + * - S/W ops are local to a CPU (not broadcast) | |
1476 | + * - We have line migration behind our back (speculation) | |
1477 | + * - System caches don't support S/W at all (damn!) | |
1478 | + * | |
1479 | + * In the face of the above, the best we can do is to try and convert | |
1480 | + * S/W ops to VA ops. Because the guest is not allowed to infer the | |
1481 | + * S/W to PA mapping, it can only use S/W to nuke the whole cache, | |
1482 | + * which is a rather good thing for us. | |
1483 | + * | |
1484 | + * Also, it is only used when turning caches on/off ("The expected | |
1485 | + * usage of the cache maintenance instructions that operate by set/way | |
1486 | + * is associated with the cache maintenance instructions associated | |
1487 | + * with the powerdown and powerup of caches, if this is required by | |
1488 | + * the implementation."). | |
1489 | + * | |
1490 | + * We use the following policy: | |
1491 | + * | |
1492 | + * - If we trap a S/W operation, we enable VM trapping to detect | |
1493 | + * caches being turned on/off, and do a full clean. | |
1494 | + * | |
1495 | + * - We flush the caches on both caches being turned on and off. | |
1496 | + * | |
1497 | + * - Once the caches are enabled, we stop trapping VM ops. | |
1498 | + */ | |
1499 | +void kvm_set_way_flush(struct kvm_vcpu *vcpu) | |
1500 | +{ | |
1501 | + unsigned long hcr = vcpu_get_hcr(vcpu); | |
1502 | + | |
1503 | + /* | |
1504 | + * If this is the first time we do a S/W operation | |
1505 | + * (i.e. HCR_TVM not set) flush the whole memory, and set the | |
1506 | + * VM trapping. | |
1507 | + * | |
1508 | + * Otherwise, rely on the VM trapping to wait for the MMU + | |
1509 | + * Caches to be turned off. At that point, we'll be able to | |
1510 | + * clean the caches again. | |
1511 | + */ | |
1512 | + if (!(hcr & HCR_TVM)) { | |
1513 | + trace_kvm_set_way_flush(*vcpu_pc(vcpu), | |
1514 | + vcpu_has_cache_enabled(vcpu)); | |
1515 | + stage2_flush_vm(vcpu->kvm); | |
1516 | + vcpu_set_hcr(vcpu, hcr | HCR_TVM); | |
1517 | + } | |
1518 | +} | |
1519 | + | |
1520 | +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) | |
1521 | +{ | |
1522 | + bool now_enabled = vcpu_has_cache_enabled(vcpu); | |
1523 | + | |
1524 | + /* | |
1525 | + * If switching the MMU+caches on, need to invalidate the caches. | |
1526 | + * If switching it off, need to clean the caches. | |
1527 | + * Clean + invalidate does the trick always. | |
1528 | + */ | |
1529 | + if (now_enabled != was_enabled) | |
1530 | + stage2_flush_vm(vcpu->kvm); | |
1531 | + | |
1532 | + /* Caches are now on, stop trapping VM ops (until a S/W op) */ | |
1533 | + if (now_enabled) | |
1534 | + vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); | |
1535 | + | |
1536 | + trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); | |
1413 | 1537 | } |
arch/arm/kvm/trace.h
... | ... | @@ -223,6 +223,45 @@ |
223 | 223 | __entry->vcpu_pc, __entry->r0, __entry->imm) |
224 | 224 | ); |
225 | 225 | |
226 | +TRACE_EVENT(kvm_set_way_flush, | |
227 | + TP_PROTO(unsigned long vcpu_pc, bool cache), | |
228 | + TP_ARGS(vcpu_pc, cache), | |
229 | + | |
230 | + TP_STRUCT__entry( | |
231 | + __field( unsigned long, vcpu_pc ) | |
232 | + __field( bool, cache ) | |
233 | + ), | |
234 | + | |
235 | + TP_fast_assign( | |
236 | + __entry->vcpu_pc = vcpu_pc; | |
237 | + __entry->cache = cache; | |
238 | + ), | |
239 | + | |
240 | + TP_printk("S/W flush at 0x%016lx (cache %s)", | |
241 | + __entry->vcpu_pc, __entry->cache ? "on" : "off") | |
242 | +); | |
243 | + | |
244 | +TRACE_EVENT(kvm_toggle_cache, | |
245 | + TP_PROTO(unsigned long vcpu_pc, bool was, bool now), | |
246 | + TP_ARGS(vcpu_pc, was, now), | |
247 | + | |
248 | + TP_STRUCT__entry( | |
249 | + __field( unsigned long, vcpu_pc ) | |
250 | + __field( bool, was ) | |
251 | + __field( bool, now ) | |
252 | + ), | |
253 | + | |
254 | + TP_fast_assign( | |
255 | + __entry->vcpu_pc = vcpu_pc; | |
256 | + __entry->was = was; | |
257 | + __entry->now = now; | |
258 | + ), | |
259 | + | |
260 | + TP_printk("VM op at 0x%016lx (cache was %s, now %s)", | |
261 | + __entry->vcpu_pc, __entry->was ? "on" : "off", | |
262 | + __entry->now ? "on" : "off") | |
263 | +); | |
264 | + | |
226 | 265 | #endif /* _TRACE_KVM_H */ |
227 | 266 | |
228 | 267 | #undef TRACE_INCLUDE_PATH |
arch/arm64/include/asm/kvm_emulate.h
... | ... | @@ -45,6 +45,16 @@ |
45 | 45 | vcpu->arch.hcr_el2 &= ~HCR_RW; |
46 | 46 | } |
47 | 47 | |
48 | +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) | |
49 | +{ | |
50 | + return vcpu->arch.hcr_el2; | |
51 | +} | |
52 | + | |
53 | +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) | |
54 | +{ | |
55 | + vcpu->arch.hcr_el2 = hcr; | |
56 | +} | |
57 | + | |
48 | 58 | static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) |
49 | 59 | { |
50 | 60 | return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; |
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
... | ... | @@ -243,24 +243,46 @@ |
243 | 243 | return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; |
244 | 244 | } |
245 | 245 | |
246 | -static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, | |
247 | - unsigned long size, | |
248 | - bool ipa_uncached) | |
246 | +static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, | |
247 | + unsigned long size, | |
248 | + bool ipa_uncached) | |
249 | 249 | { |
250 | + void *va = page_address(pfn_to_page(pfn)); | |
251 | + | |
250 | 252 | if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) |
251 | - kvm_flush_dcache_to_poc((void *)hva, size); | |
253 | + kvm_flush_dcache_to_poc(va, size); | |
252 | 254 | |
253 | 255 | if (!icache_is_aliasing()) { /* PIPT */ |
254 | - flush_icache_range(hva, hva + size); | |
256 | + flush_icache_range((unsigned long)va, | |
257 | + (unsigned long)va + size); | |
255 | 258 | } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ |
256 | 259 | /* any kind of VIPT cache */ |
257 | 260 | __flush_icache_all(); |
258 | 261 | } |
259 | 262 | } |
260 | 263 | |
264 | +static inline void __kvm_flush_dcache_pte(pte_t pte) | |
265 | +{ | |
266 | + struct page *page = pte_page(pte); | |
267 | + kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); | |
268 | +} | |
269 | + | |
270 | +static inline void __kvm_flush_dcache_pmd(pmd_t pmd) | |
271 | +{ | |
272 | + struct page *page = pmd_page(pmd); | |
273 | + kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); | |
274 | +} | |
275 | + | |
276 | +static inline void __kvm_flush_dcache_pud(pud_t pud) | |
277 | +{ | |
278 | + struct page *page = pud_page(pud); | |
279 | + kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); | |
280 | +} | |
281 | + | |
261 | 282 | #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) |
262 | 283 | |
263 | -void stage2_flush_vm(struct kvm *kvm); | |
284 | +void kvm_set_way_flush(struct kvm_vcpu *vcpu); | |
285 | +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); | |
264 | 286 | |
265 | 287 | #endif /* __ASSEMBLY__ */ |
266 | 288 | #endif /* __ARM64_KVM_MMU_H__ */ |
arch/arm64/kvm/sys_regs.c
... | ... | @@ -69,68 +69,31 @@ |
69 | 69 | return ccsidr; |
70 | 70 | } |
71 | 71 | |
72 | -static void do_dc_cisw(u32 val) | |
73 | -{ | |
74 | - asm volatile("dc cisw, %x0" : : "r" (val)); | |
75 | - dsb(ish); | |
76 | -} | |
77 | - | |
78 | -static void do_dc_csw(u32 val) | |
79 | -{ | |
80 | - asm volatile("dc csw, %x0" : : "r" (val)); | |
81 | - dsb(ish); | |
82 | -} | |
83 | - | |
84 | -/* See note at ARM ARM B1.14.4 */ | |
72 | +/* | |
73 | + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). | |
74 | + */ | |
85 | 75 | static bool access_dcsw(struct kvm_vcpu *vcpu, |
86 | 76 | const struct sys_reg_params *p, |
87 | 77 | const struct sys_reg_desc *r) |
88 | 78 | { |
89 | - unsigned long val; | |
90 | - int cpu; | |
91 | - | |
92 | 79 | if (!p->is_write) |
93 | 80 | return read_from_write_only(vcpu, p); |
94 | 81 | |
95 | - cpu = get_cpu(); | |
96 | - | |
97 | - cpumask_setall(&vcpu->arch.require_dcache_flush); | |
98 | - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); | |
99 | - | |
100 | - /* If we were already preempted, take the long way around */ | |
101 | - if (cpu != vcpu->arch.last_pcpu) { | |
102 | - flush_cache_all(); | |
103 | - goto done; | |
104 | - } | |
105 | - | |
106 | - val = *vcpu_reg(vcpu, p->Rt); | |
107 | - | |
108 | - switch (p->CRm) { | |
109 | - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ | |
110 | - case 14: /* DCCISW */ | |
111 | - do_dc_cisw(val); | |
112 | - break; | |
113 | - | |
114 | - case 10: /* DCCSW */ | |
115 | - do_dc_csw(val); | |
116 | - break; | |
117 | - } | |
118 | - | |
119 | -done: | |
120 | - put_cpu(); | |
121 | - | |
82 | + kvm_set_way_flush(vcpu); | |
122 | 83 | return true; |
123 | 84 | } |
124 | 85 | |
125 | 86 | /* |
126 | 87 | * Generic accessor for VM registers. Only called as long as HCR_TVM |
127 | - * is set. | |
88 | + * is set. If the guest enables the MMU, we stop trapping the VM | |
89 | + * sys_regs and leave it in complete control of the caches. | |
128 | 90 | */ |
129 | 91 | static bool access_vm_reg(struct kvm_vcpu *vcpu, |
130 | 92 | const struct sys_reg_params *p, |
131 | 93 | const struct sys_reg_desc *r) |
132 | 94 | { |
133 | 95 | unsigned long val; |
96 | + bool was_enabled = vcpu_has_cache_enabled(vcpu); | |
134 | 97 | |
135 | 98 | BUG_ON(!p->is_write); |
136 | 99 | |
137 | 100 | |
... | ... | @@ -143,28 +106,10 @@ |
143 | 106 | vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; |
144 | 107 | } |
145 | 108 | |
109 | + kvm_toggle_cache(vcpu, was_enabled); | |
146 | 110 | return true; |
147 | 111 | } |
148 | 112 | |
149 | -/* | |
150 | - * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the | |
151 | - * guest enables the MMU, we stop trapping the VM sys_regs and leave | |
152 | - * it in complete control of the caches. | |
153 | - */ | |
154 | -static bool access_sctlr(struct kvm_vcpu *vcpu, | |
155 | - const struct sys_reg_params *p, | |
156 | - const struct sys_reg_desc *r) | |
157 | -{ | |
158 | - access_vm_reg(vcpu, p, r); | |
159 | - | |
160 | - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ | |
161 | - vcpu->arch.hcr_el2 &= ~HCR_TVM; | |
162 | - stage2_flush_vm(vcpu->kvm); | |
163 | - } | |
164 | - | |
165 | - return true; | |
166 | -} | |
167 | - | |
168 | 113 | static bool trap_raz_wi(struct kvm_vcpu *vcpu, |
169 | 114 | const struct sys_reg_params *p, |
170 | 115 | const struct sys_reg_desc *r) |
... | ... | @@ -377,7 +322,7 @@ |
377 | 322 | NULL, reset_mpidr, MPIDR_EL1 }, |
378 | 323 | /* SCTLR_EL1 */ |
379 | 324 | { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), |
380 | - access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 }, | |
325 | + access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, | |
381 | 326 | /* CPACR_EL1 */ |
382 | 327 | { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), |
383 | 328 | NULL, reset_val, CPACR_EL1, 0 }, |
... | ... | @@ -657,7 +602,7 @@ |
657 | 602 | * register). |
658 | 603 | */ |
659 | 604 | static const struct sys_reg_desc cp15_regs[] = { |
660 | - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, | |
605 | + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, | |
661 | 606 | { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, |
662 | 607 | { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, |
663 | 608 | { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, |