Commit dfd2bb8426e203a7a97cd9b2d494d43d8df2cf8a

Authored by Marcelo Tosatti

Merge branch 'kvm-arm-for-3.10' of git://github.com/columbia/linux-kvm-arm into queue

* 'kvm-arm-for-3.10' of git://github.com/columbia/linux-kvm-arm:
  ARM: KVM: iterate over all CPUs for CPU compatibility check
  KVM: ARM: Fix spelling in error message
  ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally
  KVM: ARM: Fix API documentation for ONE_REG encoding
  ARM: KVM: promote vfp_host pointer to generic host cpu context
  ARM: KVM: add architecture specific hook for capabilities
  ARM: KVM: perform HYP initilization for hotplugged CPUs
  ARM: KVM: switch to a dual-step HYP init code
  ARM: KVM: rework HYP page table freeing
  ARM: KVM: enforce maximum size for identity mapped code
  ARM: KVM: move to a KVM provided HYP idmap
  ARM: KVM: fix HYP mapping limitations around zero
  ARM: KVM: simplify HYP mapping population
  ARM: KVM: arch_timer: use symbolic constants
  ARM: KVM: add support for minimal host vs guest profiling

Showing 15 changed files Side-by-side Diff

Documentation/virtual/kvm/api.txt
... ... @@ -1814,22 +1814,22 @@
1814 1814 is the register group type, or coprocessor number:
1815 1815  
1816 1816 ARM core registers have the following id bit patterns:
1817   - 0x4002 0000 0010 <index into the kvm_regs struct:16>
  1817 + 0x4020 0000 0010 <index into the kvm_regs struct:16>
1818 1818  
1819 1819 ARM 32-bit CP15 registers have the following id bit patterns:
1820   - 0x4002 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
  1820 + 0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
1821 1821  
1822 1822 ARM 64-bit CP15 registers have the following id bit patterns:
1823   - 0x4003 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
  1823 + 0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
1824 1824  
1825 1825 ARM CCSIDR registers are demultiplexed by CSSELR value:
1826   - 0x4002 0000 0011 00 <csselr:8>
  1826 + 0x4020 0000 0011 00 <csselr:8>
1827 1827  
1828 1828 ARM 32-bit VFP control registers have the following id bit patterns:
1829   - 0x4002 0000 0012 1 <regno:12>
  1829 + 0x4020 0000 0012 1 <regno:12>
1830 1830  
1831 1831 ARM 64-bit FP registers have the following id bit patterns:
1832   - 0x4002 0000 0012 0 <regno:12>
  1832 + 0x4030 0000 0012 0 <regno:12>
1833 1833  
1834 1834 4.69 KVM_GET_ONE_REG
1835 1835  
arch/arm/include/asm/idmap.h
... ... @@ -8,7 +8,6 @@
8 8 #define __idmap __section(.idmap.text) noinline notrace
9 9  
10 10 extern pgd_t *idmap_pgd;
11   -extern pgd_t *hyp_pgd;
12 11  
13 12 void setup_mm_for_reboot(void);
14 13  
arch/arm/include/asm/kvm_host.h
... ... @@ -87,7 +87,7 @@
87 87 u32 hyp_pc; /* PC when exception was taken from Hyp mode */
88 88 };
89 89  
90   -typedef struct vfp_hard_struct kvm_kernel_vfp_t;
  90 +typedef struct vfp_hard_struct kvm_cpu_context_t;
91 91  
92 92 struct kvm_vcpu_arch {
93 93 struct kvm_regs regs;
94 94  
... ... @@ -105,9 +105,11 @@
105 105 struct kvm_vcpu_fault_info fault;
106 106  
107 107 /* Floating point registers (VFP and Advanced SIMD/NEON) */
108   - kvm_kernel_vfp_t vfp_guest;
109   - kvm_kernel_vfp_t *vfp_host;
  108 + struct vfp_hard_struct vfp_guest;
110 109  
  110 + /* Host FP context */
  111 + kvm_cpu_context_t *host_cpu_context;
  112 +
111 113 /* VGIC state */
112 114 struct vgic_cpu vgic_cpu;
113 115 struct arch_timer_cpu timer_cpu;
114 116  
115 117  
116 118  
117 119  
... ... @@ -188,24 +190,39 @@
188 190 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
189 191 int exception_index);
190 192  
191   -static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr,
  193 +static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
  194 + unsigned long long pgd_ptr,
192 195 unsigned long hyp_stack_ptr,
193 196 unsigned long vector_ptr)
194 197 {
195   - unsigned long pgd_low, pgd_high;
196   -
197   - pgd_low = (pgd_ptr & ((1ULL << 32) - 1));
198   - pgd_high = (pgd_ptr >> 32ULL);
199   -
200 198 /*
201   - * Call initialization code, and switch to the full blown
202   - * HYP code. The init code doesn't need to preserve these registers as
203   - * r1-r3 and r12 are already callee save according to the AAPCS.
204   - * Note that we slightly misuse the prototype by casing the pgd_low to
205   - * a void *.
  199 + * Call initialization code, and switch to the full blown HYP
  200 + * code. The init code doesn't need to preserve these
  201 + * registers as r0-r3 are already callee saved according to
  202 + * the AAPCS.
  203 + * Note that we slightly misuse the prototype by casing the
  204 + * stack pointer to a void *.
  205 + *
  206 + * We don't have enough registers to perform the full init in
  207 + * one go. Install the boot PGD first, and then install the
  208 + * runtime PGD, stack pointer and vectors. The PGDs are always
  209 + * passed as the third argument, in order to be passed into
  210 + * r2-r3 to the init code (yes, this is compliant with the
  211 + * PCS!).
206 212 */
207   - kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
  213 +
  214 + kvm_call_hyp(NULL, 0, boot_pgd_ptr);
  215 +
  216 + kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
208 217 }
  218 +
  219 +static inline int kvm_arch_dev_ioctl_check_extension(long ext)
  220 +{
  221 + return 0;
  222 +}
  223 +
  224 +int kvm_perf_init(void);
  225 +int kvm_perf_teardown(void);
209 226  
210 227 #endif /* __ARM_KVM_HOST_H__ */
arch/arm/include/asm/kvm_mmu.h
... ... @@ -19,21 +19,33 @@
19 19 #ifndef __ARM_KVM_MMU_H__
20 20 #define __ARM_KVM_MMU_H__
21 21  
22   -#include <asm/cacheflush.h>
23   -#include <asm/pgalloc.h>
24   -#include <asm/idmap.h>
  22 +#include <asm/memory.h>
  23 +#include <asm/page.h>
25 24  
26 25 /*
27 26 * We directly use the kernel VA for the HYP, as we can directly share
28 27 * the mapping (HTTBR "covers" TTBR1).
29 28 */
30   -#define HYP_PAGE_OFFSET_MASK (~0UL)
  29 +#define HYP_PAGE_OFFSET_MASK UL(~0)
31 30 #define HYP_PAGE_OFFSET PAGE_OFFSET
32 31 #define KERN_TO_HYP(kva) (kva)
33 32  
  33 +/*
  34 + * Our virtual mapping for the boot-time MMU-enable code. Must be
  35 + * shared across all the page-tables. Conveniently, we use the vectors
  36 + * page, where no kernel data will ever be shared with HYP.
  37 + */
  38 +#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE)
  39 +
  40 +#ifndef __ASSEMBLY__
  41 +
  42 +#include <asm/cacheflush.h>
  43 +#include <asm/pgalloc.h>
  44 +
34 45 int create_hyp_mappings(void *from, void *to);
35 46 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
36   -void free_hyp_pmds(void);
  47 +void free_boot_hyp_pgd(void);
  48 +void free_hyp_pgds(void);
37 49  
38 50 int kvm_alloc_stage2_pgd(struct kvm *kvm);
39 51 void kvm_free_stage2_pgd(struct kvm *kvm);
... ... @@ -45,6 +57,8 @@
45 57 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
46 58  
47 59 phys_addr_t kvm_mmu_get_httbr(void);
  60 +phys_addr_t kvm_mmu_get_boot_httbr(void);
  61 +phys_addr_t kvm_get_idmap_vector(void);
48 62 int kvm_mmu_init(void);
49 63 void kvm_clear_hyp_idmap(void);
50 64  
... ... @@ -113,6 +127,10 @@
113 127 __flush_icache_all();
114 128 }
115 129 }
  130 +
  131 +#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
  132 +
  133 +#endif /* !__ASSEMBLY__ */
116 134  
117 135 #endif /* __ARM_KVM_MMU_H__ */
arch/arm/kernel/asm-offsets.c
... ... @@ -154,7 +154,7 @@
154 154 DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr));
155 155 DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15));
156 156 DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest));
157   - DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.vfp_host));
  157 + DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context));
158 158 DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs));
159 159 DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs));
160 160 DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs));
arch/arm/kernel/vmlinux.lds.S
... ... @@ -20,7 +20,7 @@
20 20 VMLINUX_SYMBOL(__idmap_text_start) = .; \
21 21 *(.idmap.text) \
22 22 VMLINUX_SYMBOL(__idmap_text_end) = .; \
23   - ALIGN_FUNCTION(); \
  23 + . = ALIGN(32); \
24 24 VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
25 25 *(.hyp.idmap.text) \
26 26 VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
... ... @@ -315,4 +315,9 @@
315 315 */
316 316 ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
317 317 ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
  318 +/*
  319 + * The HYP init code can't be more than a page long.
  320 + * The above comment applies as well.
  321 + */
  322 +ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big")
arch/arm/kvm/Kconfig
... ... @@ -41,9 +41,9 @@
41 41 Provides host support for ARM processors.
42 42  
43 43 config KVM_ARM_MAX_VCPUS
44   - int "Number maximum supported virtual CPUs per VM"
45   - depends on KVM_ARM_HOST
46   - default 4
  44 + int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
  45 + default 4 if KVM_ARM_HOST
  46 + default 0
47 47 help
48 48 Static number of max supported virtual CPUs per VM.
49 49  
arch/arm/kvm/Makefile
... ... @@ -18,7 +18,7 @@
18 18  
19 19 obj-y += kvm-arm.o init.o interrupts.o
20 20 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
21   -obj-y += coproc.o coproc_a15.o mmio.o psci.o
  21 +obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
22 22 obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
23 23 obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
arch/arm/kvm/arch_timer.c
... ... @@ -22,6 +22,7 @@
22 22 #include <linux/kvm_host.h>
23 23 #include <linux/interrupt.h>
24 24  
  25 +#include <clocksource/arm_arch_timer.h>
25 26 #include <asm/arch_timer.h>
26 27  
27 28 #include <asm/kvm_vgic.h>
... ... @@ -64,7 +65,7 @@
64 65 {
65 66 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
66 67  
67   - timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */
  68 + timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
68 69 kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
69 70 vcpu->arch.timer_cpu.irq->irq,
70 71 vcpu->arch.timer_cpu.irq->level);
... ... @@ -133,8 +134,8 @@
133 134 cycle_t cval, now;
134 135 u64 ns;
135 136  
136   - /* Check if the timer is enabled and unmasked first */
137   - if ((timer->cntv_ctl & 3) != 1)
  137 + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
  138 + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
138 139 return;
139 140  
140 141 cval = timer->cntv_cval;
... ... @@ -16,6 +16,7 @@
16 16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 17 */
18 18  
  19 +#include <linux/cpu.h>
19 20 #include <linux/errno.h>
20 21 #include <linux/err.h>
21 22 #include <linux/kvm_host.h>
... ... @@ -48,7 +49,7 @@
48 49 #endif
49 50  
50 51 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
51   -static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state;
  52 +static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
52 53 static unsigned long hyp_default_vectors;
53 54  
54 55 /* Per-CPU variable containing the currently running vcpu. */
... ... @@ -205,7 +206,7 @@
205 206 r = KVM_MAX_VCPUS;
206 207 break;
207 208 default:
208   - r = 0;
  209 + r = kvm_arch_dev_ioctl_check_extension(ext);
209 210 break;
210 211 }
211 212 return r;
... ... @@ -316,7 +317,7 @@
316 317 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
317 318 {
318 319 vcpu->cpu = cpu;
319   - vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
  320 + vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
320 321  
321 322 /*
322 323 * Check whether this vcpu requires the cache to be flushed on
323 324  
324 325  
325 326  
326 327  
327 328  
328 329  
... ... @@ -785,30 +786,48 @@
785 786 }
786 787 }
787 788  
788   -static void cpu_init_hyp_mode(void *vector)
  789 +static void cpu_init_hyp_mode(void *dummy)
789 790 {
  791 + unsigned long long boot_pgd_ptr;
790 792 unsigned long long pgd_ptr;
791 793 unsigned long hyp_stack_ptr;
792 794 unsigned long stack_page;
793 795 unsigned long vector_ptr;
794 796  
795 797 /* Switch from the HYP stub to our own HYP init vector */
796   - __hyp_set_vectors((unsigned long)vector);
  798 + __hyp_set_vectors(kvm_get_idmap_vector());
797 799  
  800 + boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
798 801 pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
799 802 stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
800 803 hyp_stack_ptr = stack_page + PAGE_SIZE;
801 804 vector_ptr = (unsigned long)__kvm_hyp_vector;
802 805  
803   - __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
  806 + __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
804 807 }
805 808  
  809 +static int hyp_init_cpu_notify(struct notifier_block *self,
  810 + unsigned long action, void *cpu)
  811 +{
  812 + switch (action) {
  813 + case CPU_STARTING:
  814 + case CPU_STARTING_FROZEN:
  815 + cpu_init_hyp_mode(NULL);
  816 + break;
  817 + }
  818 +
  819 + return NOTIFY_OK;
  820 +}
  821 +
  822 +static struct notifier_block hyp_init_cpu_nb = {
  823 + .notifier_call = hyp_init_cpu_notify,
  824 +};
  825 +
806 826 /**
807 827 * Inits Hyp-mode on all online CPUs
808 828 */
809 829 static int init_hyp_mode(void)
810 830 {
811   - phys_addr_t init_phys_addr;
812 831 int cpu;
813 832 int err = 0;
814 833  
... ... @@ -841,24 +860,6 @@
841 860 }
842 861  
843 862 /*
844   - * Execute the init code on each CPU.
845   - *
846   - * Note: The stack is not mapped yet, so don't do anything else than
847   - * initializing the hypervisor mode on each CPU using a local stack
848   - * space for temporary storage.
849   - */
850   - init_phys_addr = virt_to_phys(__kvm_hyp_init);
851   - for_each_online_cpu(cpu) {
852   - smp_call_function_single(cpu, cpu_init_hyp_mode,
853   - (void *)(long)init_phys_addr, 1);
854   - }
855   -
856   - /*
857   - * Unmap the identity mapping
858   - */
859   - kvm_clear_hyp_idmap();
860   -
861   - /*
862 863 * Map the Hyp-code called directly from the host
863 864 */
864 865 err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
865 866  
866 867  
867 868  
868 869  
869 870  
870 871  
871 872  
... ... @@ -881,33 +882,38 @@
881 882 }
882 883  
883 884 /*
884   - * Map the host VFP structures
  885 + * Map the host CPU structures
885 886 */
886   - kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t);
887   - if (!kvm_host_vfp_state) {
  887 + kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
  888 + if (!kvm_host_cpu_state) {
888 889 err = -ENOMEM;
889   - kvm_err("Cannot allocate host VFP state\n");
  890 + kvm_err("Cannot allocate host CPU state\n");
890 891 goto out_free_mappings;
891 892 }
892 893  
893 894 for_each_possible_cpu(cpu) {
894   - kvm_kernel_vfp_t *vfp;
  895 + kvm_cpu_context_t *cpu_ctxt;
895 896  
896   - vfp = per_cpu_ptr(kvm_host_vfp_state, cpu);
897   - err = create_hyp_mappings(vfp, vfp + 1);
  897 + cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
  898 + err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1);
898 899  
899 900 if (err) {
900   - kvm_err("Cannot map host VFP state: %d\n", err);
901   - goto out_free_vfp;
  901 + kvm_err("Cannot map host CPU state: %d\n", err);
  902 + goto out_free_context;
902 903 }
903 904 }
904 905  
905 906 /*
  907 + * Execute the init code on each CPU.
  908 + */
  909 + on_each_cpu(cpu_init_hyp_mode, NULL, 1);
  910 +
  911 + /*
906 912 * Init HYP view of VGIC
907 913 */
908 914 err = kvm_vgic_hyp_init();
909 915 if (err)
910   - goto out_free_vfp;
  916 + goto out_free_context;
911 917  
912 918 #ifdef CONFIG_KVM_ARM_VGIC
913 919 vgic_present = true;
914 920  
915 921  
916 922  
... ... @@ -920,12 +926,19 @@
920 926 if (err)
921 927 goto out_free_mappings;
922 928  
  929 +#ifndef CONFIG_HOTPLUG_CPU
  930 + free_boot_hyp_pgd();
  931 +#endif
  932 +
  933 + kvm_perf_init();
  934 +
923 935 kvm_info("Hyp mode initialized successfully\n");
  936 +
924 937 return 0;
925   -out_free_vfp:
926   - free_percpu(kvm_host_vfp_state);
  938 +out_free_context:
  939 + free_percpu(kvm_host_cpu_state);
927 940 out_free_mappings:
928   - free_hyp_pmds();
  941 + free_hyp_pgds();
929 942 out_free_stack_pages:
930 943 for_each_possible_cpu(cpu)
931 944 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
932 945  
933 946  
934 947  
... ... @@ -934,27 +947,42 @@
934 947 return err;
935 948 }
936 949  
  950 +static void check_kvm_target_cpu(void *ret)
  951 +{
  952 + *(int *)ret = kvm_target_cpu();
  953 +}
  954 +
937 955 /**
938 956 * Initialize Hyp-mode and memory mappings on all CPUs.
939 957 */
940 958 int kvm_arch_init(void *opaque)
941 959 {
942 960 int err;
  961 + int ret, cpu;
943 962  
944 963 if (!is_hyp_mode_available()) {
945 964 kvm_err("HYP mode not available\n");
946 965 return -ENODEV;
947 966 }
948 967  
949   - if (kvm_target_cpu() < 0) {
950   - kvm_err("Target CPU not supported!\n");
951   - return -ENODEV;
  968 + for_each_online_cpu(cpu) {
  969 + smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
  970 + if (ret < 0) {
  971 + kvm_err("Error, CPU %d not supported!\n", cpu);
  972 + return -ENODEV;
  973 + }
952 974 }
953 975  
954 976 err = init_hyp_mode();
955 977 if (err)
956 978 goto out_err;
957 979  
  980 + err = register_cpu_notifier(&hyp_init_cpu_nb);
  981 + if (err) {
  982 + kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
  983 + goto out_err;
  984 + }
  985 +
958 986 kvm_coproc_table_init();
959 987 return 0;
960 988 out_err:
... ... @@ -964,6 +992,7 @@
964 992 /* NOP: Compiling as a module not supported */
965 993 void kvm_arch_exit(void)
966 994 {
  995 + kvm_perf_teardown();
967 996 }
968 997  
969 998 static int arm_init(void)
arch/arm/kvm/handle_exit.c
... ... @@ -115,7 +115,7 @@
115 115  
116 116 if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
117 117 !arm_exit_handlers[hsr_ec]) {
118   - kvm_err("Unkown exception class: hsr: %#08x\n",
  118 + kvm_err("Unknown exception class: hsr: %#08x\n",
119 119 (unsigned int)kvm_vcpu_get_hsr(vcpu));
120 120 BUG();
121 121 }
... ... @@ -21,13 +21,33 @@
21 21 #include <asm/asm-offsets.h>
22 22 #include <asm/kvm_asm.h>
23 23 #include <asm/kvm_arm.h>
  24 +#include <asm/kvm_mmu.h>
24 25  
25 26 /********************************************************************
26 27 * Hypervisor initialization
27 28 * - should be called with:
28   - * r0,r1 = Hypervisor pgd pointer
29   - * r2 = top of Hyp stack (kernel VA)
30   - * r3 = pointer to hyp vectors
  29 + * r0 = top of Hyp stack (kernel VA)
  30 + * r1 = pointer to hyp vectors
  31 + * r2,r3 = Hypervisor pgd pointer
  32 + *
  33 + * The init scenario is:
  34 + * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd,
  35 + * runtime stack, runtime vectors
  36 + * - Enable the MMU with the boot pgd
  37 + * - Jump to a target into the trampoline page (remember, this is the same
  38 + * physical page!)
  39 + * - Now switch to the runtime pgd (same VA, and still the same physical
  40 + * page!)
  41 + * - Invalidate TLBs
  42 + * - Set stack and vectors
  43 + * - Profit! (or eret, if you only care about the code).
  44 + *
  45 + * As we only have four registers available to pass parameters (and we
  46 + * need six), we split the init in two phases:
  47 + * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD.
  48 + * Provides the basic HYP init, and enable the MMU.
  49 + * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD.
  50 + * Switches to the runtime PGD, set stack and vectors.
31 51 */
32 52  
33 53 .text
34 54  
35 55  
36 56  
... ... @@ -47,22 +67,25 @@
47 67 W(b) .
48 68  
49 69 __do_hyp_init:
  70 + cmp r0, #0 @ We have a SP?
  71 + bne phase2 @ Yes, second stage init
  72 +
50 73 @ Set the HTTBR to point to the hypervisor PGD pointer passed
51   - mcrr p15, 4, r0, r1, c2
  74 + mcrr p15, 4, r2, r3, c2
52 75  
53 76 @ Set the HTCR and VTCR to the same shareability and cacheability
54 77 @ settings as the non-secure TTBCR and with T0SZ == 0.
55 78 mrc p15, 4, r0, c2, c0, 2 @ HTCR
56   - ldr r12, =HTCR_MASK
57   - bic r0, r0, r12
  79 + ldr r2, =HTCR_MASK
  80 + bic r0, r0, r2
58 81 mrc p15, 0, r1, c2, c0, 2 @ TTBCR
59 82 and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
60 83 orr r0, r0, r1
61 84 mcr p15, 4, r0, c2, c0, 2 @ HTCR
62 85  
63 86 mrc p15, 4, r1, c2, c1, 2 @ VTCR
64   - ldr r12, =VTCR_MASK
65   - bic r1, r1, r12
  87 + ldr r2, =VTCR_MASK
  88 + bic r1, r1, r2
66 89 bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits
67 90 orr r1, r0, r1
68 91 orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
69 92  
70 93  
71 94  
72 95  
73 96  
... ... @@ -85,24 +108,41 @@
85 108 @ - Memory alignment checks: enabled
86 109 @ - MMU: enabled (this code must be run from an identity mapping)
87 110 mrc p15, 4, r0, c1, c0, 0 @ HSCR
88   - ldr r12, =HSCTLR_MASK
89   - bic r0, r0, r12
  111 + ldr r2, =HSCTLR_MASK
  112 + bic r0, r0, r2
90 113 mrc p15, 0, r1, c1, c0, 0 @ SCTLR
91   - ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
92   - and r1, r1, r12
93   - ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) )
94   - THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
95   - orr r1, r1, r12
  114 + ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
  115 + and r1, r1, r2
  116 + ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) )
  117 + THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
  118 + orr r1, r1, r2
96 119 orr r0, r0, r1
97 120 isb
98 121 mcr p15, 4, r0, c1, c0, 0 @ HSCR
99   - isb
100 122  
101   - @ Set stack pointer and return to the kernel
102   - mov sp, r2
  123 + @ End of init phase-1
  124 + eret
103 125  
  126 +phase2:
  127 + @ Set stack pointer
  128 + mov sp, r0
  129 +
104 130 @ Set HVBAR to point to the HYP vectors
105   - mcr p15, 4, r3, c12, c0, 0 @ HVBAR
  131 + mcr p15, 4, r1, c12, c0, 0 @ HVBAR
  132 +
  133 + @ Jump to the trampoline page
  134 + ldr r0, =TRAMPOLINE_VA
  135 + adr r1, target
  136 + bfi r0, r1, #0, #PAGE_SHIFT
  137 + mov pc, r0
  138 +
  139 +target: @ We're now in the trampoline code, switch page tables
  140 + mcrr p15, 4, r2, r3, c2
  141 + isb
  142 +
  143 + @ Invalidate the old TLBs
  144 + mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
  145 + dsb
106 146  
107 147 eret
108 148  
... ... @@ -32,8 +32,15 @@
32 32  
33 33 extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
34 34  
  35 +static pgd_t *boot_hyp_pgd;
  36 +static pgd_t *hyp_pgd;
35 37 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
36 38  
  39 +static void *init_bounce_page;
  40 +static unsigned long hyp_idmap_start;
  41 +static unsigned long hyp_idmap_end;
  42 +static phys_addr_t hyp_idmap_vector;
  43 +
37 44 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
38 45 {
39 46 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
40 47  
41 48  
42 49  
43 50  
44 51  
45 52  
46 53  
47 54  
48 55  
49 56  
50 57  
51 58  
52 59  
53 60  
54 61  
55 62  
56 63  
57 64  
58 65  
59 66  
60 67  
61 68  
62 69  
63 70  
64 71  
65 72  
66 73  
67 74  
68 75  
69 76  
70 77  
71 78  
72 79  
73 80  
... ... @@ -71,172 +78,224 @@
71 78 return p;
72 79 }
73 80  
74   -static void free_ptes(pmd_t *pmd, unsigned long addr)
  81 +static void clear_pud_entry(pud_t *pud)
75 82 {
76   - pte_t *pte;
77   - unsigned int i;
  83 + pmd_t *pmd_table = pmd_offset(pud, 0);
  84 + pud_clear(pud);
  85 + pmd_free(NULL, pmd_table);
  86 + put_page(virt_to_page(pud));
  87 +}
78 88  
79   - for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
80   - if (!pmd_none(*pmd) && pmd_table(*pmd)) {
81   - pte = pte_offset_kernel(pmd, addr);
82   - pte_free_kernel(NULL, pte);
83   - }
84   - pmd++;
  89 +static void clear_pmd_entry(pmd_t *pmd)
  90 +{
  91 + pte_t *pte_table = pte_offset_kernel(pmd, 0);
  92 + pmd_clear(pmd);
  93 + pte_free_kernel(NULL, pte_table);
  94 + put_page(virt_to_page(pmd));
  95 +}
  96 +
  97 +static bool pmd_empty(pmd_t *pmd)
  98 +{
  99 + struct page *pmd_page = virt_to_page(pmd);
  100 + return page_count(pmd_page) == 1;
  101 +}
  102 +
  103 +static void clear_pte_entry(pte_t *pte)
  104 +{
  105 + if (pte_present(*pte)) {
  106 + kvm_set_pte(pte, __pte(0));
  107 + put_page(virt_to_page(pte));
85 108 }
86 109 }
87 110  
88   -static void free_hyp_pgd_entry(unsigned long addr)
  111 +static bool pte_empty(pte_t *pte)
89 112 {
  113 + struct page *pte_page = virt_to_page(pte);
  114 + return page_count(pte_page) == 1;
  115 +}
  116 +
  117 +static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
  118 +{
90 119 pgd_t *pgd;
91 120 pud_t *pud;
92 121 pmd_t *pmd;
93   - unsigned long hyp_addr = KERN_TO_HYP(addr);
  122 + pte_t *pte;
  123 + unsigned long long addr = start, end = start + size;
  124 + u64 range;
94 125  
95   - pgd = hyp_pgd + pgd_index(hyp_addr);
96   - pud = pud_offset(pgd, hyp_addr);
  126 + while (addr < end) {
  127 + pgd = pgdp + pgd_index(addr);
  128 + pud = pud_offset(pgd, addr);
  129 + if (pud_none(*pud)) {
  130 + addr += PUD_SIZE;
  131 + continue;
  132 + }
97 133  
98   - if (pud_none(*pud))
99   - return;
100   - BUG_ON(pud_bad(*pud));
  134 + pmd = pmd_offset(pud, addr);
  135 + if (pmd_none(*pmd)) {
  136 + addr += PMD_SIZE;
  137 + continue;
  138 + }
101 139  
102   - pmd = pmd_offset(pud, hyp_addr);
103   - free_ptes(pmd, addr);
104   - pmd_free(NULL, pmd);
105   - pud_clear(pud);
  140 + pte = pte_offset_kernel(pmd, addr);
  141 + clear_pte_entry(pte);
  142 + range = PAGE_SIZE;
  143 +
  144 + /* If we emptied the pte, walk back up the ladder */
  145 + if (pte_empty(pte)) {
  146 + clear_pmd_entry(pmd);
  147 + range = PMD_SIZE;
  148 + if (pmd_empty(pmd)) {
  149 + clear_pud_entry(pud);
  150 + range = PUD_SIZE;
  151 + }
  152 + }
  153 +
  154 + addr += range;
  155 + }
106 156 }
107 157  
108 158 /**
109   - * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
  159 + * free_boot_hyp_pgd - free HYP boot page tables
110 160 *
111   - * Assumes this is a page table used strictly in Hyp-mode and therefore contains
112   - * either mappings in the kernel memory area (above PAGE_OFFSET), or
113   - * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END).
  161 + * Free the HYP boot page tables. The bounce page is also freed.
114 162 */
115   -void free_hyp_pmds(void)
  163 +void free_boot_hyp_pgd(void)
116 164 {
117   - unsigned long addr;
118   -
119 165 mutex_lock(&kvm_hyp_pgd_mutex);
120   - for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
121   - free_hyp_pgd_entry(addr);
122   - for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
123   - free_hyp_pgd_entry(addr);
  166 +
  167 + if (boot_hyp_pgd) {
  168 + unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
  169 + unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
  170 + kfree(boot_hyp_pgd);
  171 + boot_hyp_pgd = NULL;
  172 + }
  173 +
  174 + if (hyp_pgd)
  175 + unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
  176 +
  177 + kfree(init_bounce_page);
  178 + init_bounce_page = NULL;
  179 +
124 180 mutex_unlock(&kvm_hyp_pgd_mutex);
125 181 }
126 182  
127   -static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
128   - unsigned long end)
  183 +/**
  184 + * free_hyp_pgds - free Hyp-mode page tables
  185 + *
  186 + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
  187 + * therefore contains either mappings in the kernel memory area (above
  188 + * PAGE_OFFSET), or device mappings in the vmalloc range (from
  189 + * VMALLOC_START to VMALLOC_END).
  190 + *
  191 + * boot_hyp_pgd should only map two pages for the init code.
  192 + */
  193 +void free_hyp_pgds(void)
129 194 {
130   - pte_t *pte;
131 195 unsigned long addr;
132   - struct page *page;
133 196  
134   - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
135   - unsigned long hyp_addr = KERN_TO_HYP(addr);
  197 + free_boot_hyp_pgd();
136 198  
137   - pte = pte_offset_kernel(pmd, hyp_addr);
138   - BUG_ON(!virt_addr_valid(addr));
139   - page = virt_to_page(addr);
140   - kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
  199 + mutex_lock(&kvm_hyp_pgd_mutex);
  200 +
  201 + if (hyp_pgd) {
  202 + for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
  203 + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
  204 + for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
  205 + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
  206 + kfree(hyp_pgd);
  207 + hyp_pgd = NULL;
141 208 }
  209 +
  210 + mutex_unlock(&kvm_hyp_pgd_mutex);
142 211 }
143 212  
144   -static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
145   - unsigned long end,
146   - unsigned long *pfn_base)
  213 +static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
  214 + unsigned long end, unsigned long pfn,
  215 + pgprot_t prot)
147 216 {
148 217 pte_t *pte;
149 218 unsigned long addr;
150 219  
151   - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
152   - unsigned long hyp_addr = KERN_TO_HYP(addr);
153   -
154   - pte = pte_offset_kernel(pmd, hyp_addr);
155   - BUG_ON(pfn_valid(*pfn_base));
156   - kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
157   - (*pfn_base)++;
158   - }
  220 + addr = start;
  221 + do {
  222 + pte = pte_offset_kernel(pmd, addr);
  223 + kvm_set_pte(pte, pfn_pte(pfn, prot));
  224 + get_page(virt_to_page(pte));
  225 + kvm_flush_dcache_to_poc(pte, sizeof(*pte));
  226 + pfn++;
  227 + } while (addr += PAGE_SIZE, addr != end);
159 228 }
160 229  
161 230 static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
162   - unsigned long end, unsigned long *pfn_base)
  231 + unsigned long end, unsigned long pfn,
  232 + pgprot_t prot)
163 233 {
164 234 pmd_t *pmd;
165 235 pte_t *pte;
166 236 unsigned long addr, next;
167 237  
168   - for (addr = start; addr < end; addr = next) {
169   - unsigned long hyp_addr = KERN_TO_HYP(addr);
170   - pmd = pmd_offset(pud, hyp_addr);
  238 + addr = start;
  239 + do {
  240 + pmd = pmd_offset(pud, addr);
171 241  
172 242 BUG_ON(pmd_sect(*pmd));
173 243  
174 244 if (pmd_none(*pmd)) {
175   - pte = pte_alloc_one_kernel(NULL, hyp_addr);
  245 + pte = pte_alloc_one_kernel(NULL, addr);
176 246 if (!pte) {
177 247 kvm_err("Cannot allocate Hyp pte\n");
178 248 return -ENOMEM;
179 249 }
180 250 pmd_populate_kernel(NULL, pmd, pte);
  251 + get_page(virt_to_page(pmd));
  252 + kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
181 253 }
182 254  
183 255 next = pmd_addr_end(addr, end);
184 256  
185   - /*
186   - * If pfn_base is NULL, we map kernel pages into HYP with the
187   - * virtual address. Otherwise, this is considered an I/O
188   - * mapping and we map the physical region starting at
189   - * *pfn_base to [start, end[.
190   - */
191   - if (!pfn_base)
192   - create_hyp_pte_mappings(pmd, addr, next);
193   - else
194   - create_hyp_io_pte_mappings(pmd, addr, next, pfn_base);
195   - }
  257 + create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
  258 + pfn += (next - addr) >> PAGE_SHIFT;
  259 + } while (addr = next, addr != end);
196 260  
197 261 return 0;
198 262 }
199 263  
200   -static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
  264 +static int __create_hyp_mappings(pgd_t *pgdp,
  265 + unsigned long start, unsigned long end,
  266 + unsigned long pfn, pgprot_t prot)
201 267 {
202   - unsigned long start = (unsigned long)from;
203   - unsigned long end = (unsigned long)to;
204 268 pgd_t *pgd;
205 269 pud_t *pud;
206 270 pmd_t *pmd;
207 271 unsigned long addr, next;
208 272 int err = 0;
209 273  
210   - if (start >= end)
211   - return -EINVAL;
212   - /* Check for a valid kernel memory mapping */
213   - if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1)))
214   - return -EINVAL;
215   - /* Check for a valid kernel IO mapping */
216   - if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)))
217   - return -EINVAL;
218   -
219 274 mutex_lock(&kvm_hyp_pgd_mutex);
220   - for (addr = start; addr < end; addr = next) {
221   - unsigned long hyp_addr = KERN_TO_HYP(addr);
222   - pgd = hyp_pgd + pgd_index(hyp_addr);
223   - pud = pud_offset(pgd, hyp_addr);
  275 + addr = start & PAGE_MASK;
  276 + end = PAGE_ALIGN(end);
  277 + do {
  278 + pgd = pgdp + pgd_index(addr);
  279 + pud = pud_offset(pgd, addr);
224 280  
225 281 if (pud_none_or_clear_bad(pud)) {
226   - pmd = pmd_alloc_one(NULL, hyp_addr);
  282 + pmd = pmd_alloc_one(NULL, addr);
227 283 if (!pmd) {
228 284 kvm_err("Cannot allocate Hyp pmd\n");
229 285 err = -ENOMEM;
230 286 goto out;
231 287 }
232 288 pud_populate(NULL, pud, pmd);
  289 + get_page(virt_to_page(pud));
  290 + kvm_flush_dcache_to_poc(pud, sizeof(*pud));
233 291 }
234 292  
235 293 next = pgd_addr_end(addr, end);
236   - err = create_hyp_pmd_mappings(pud, addr, next, pfn_base);
  294 + err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
237 295 if (err)
238 296 goto out;
239   - }
  297 + pfn += (next - addr) >> PAGE_SHIFT;
  298 + } while (addr = next, addr != end);
240 299 out:
241 300 mutex_unlock(&kvm_hyp_pgd_mutex);
242 301 return err;
243 302  
244 303  
245 304  
246 305  
... ... @@ -250,27 +309,41 @@
250 309 * The same virtual address as the kernel virtual address is also used
251 310 * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
252 311 * physical pages.
253   - *
254   - * Note: Wrapping around zero in the "to" address is not supported.
255 312 */
256 313 int create_hyp_mappings(void *from, void *to)
257 314 {
258   - return __create_hyp_mappings(from, to, NULL);
  315 + unsigned long phys_addr = virt_to_phys(from);
  316 + unsigned long start = KERN_TO_HYP((unsigned long)from);
  317 + unsigned long end = KERN_TO_HYP((unsigned long)to);
  318 +
  319 + /* Check for a valid kernel memory mapping */
  320 + if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
  321 + return -EINVAL;
  322 +
  323 + return __create_hyp_mappings(hyp_pgd, start, end,
  324 + __phys_to_pfn(phys_addr), PAGE_HYP);
259 325 }
260 326  
261 327 /**
262 328 * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
263 329 * @from: The kernel start VA of the range
264 330 * @to: The kernel end VA of the range (exclusive)
265   - * @addr: The physical start address which gets mapped
  331 + * @phys_addr: The physical start address which gets mapped
266 332 *
267 333 * The resulting HYP VA is the same as the kernel VA, modulo
268 334 * HYP_PAGE_OFFSET.
269 335 */
270   -int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
  336 +int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
271 337 {
272   - unsigned long pfn = __phys_to_pfn(addr);
273   - return __create_hyp_mappings(from, to, &pfn);
  338 + unsigned long start = KERN_TO_HYP((unsigned long)from);
  339 + unsigned long end = KERN_TO_HYP((unsigned long)to);
  340 +
  341 + /* Check for a valid kernel IO mapping */
  342 + if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
  343 + return -EINVAL;
  344 +
  345 + return __create_hyp_mappings(hyp_pgd, start, end,
  346 + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
274 347 }
275 348  
276 349 /**
... ... @@ -307,42 +380,6 @@
307 380 return 0;
308 381 }
309 382  
310   -static void clear_pud_entry(pud_t *pud)
311   -{
312   - pmd_t *pmd_table = pmd_offset(pud, 0);
313   - pud_clear(pud);
314   - pmd_free(NULL, pmd_table);
315   - put_page(virt_to_page(pud));
316   -}
317   -
318   -static void clear_pmd_entry(pmd_t *pmd)
319   -{
320   - pte_t *pte_table = pte_offset_kernel(pmd, 0);
321   - pmd_clear(pmd);
322   - pte_free_kernel(NULL, pte_table);
323   - put_page(virt_to_page(pmd));
324   -}
325   -
326   -static bool pmd_empty(pmd_t *pmd)
327   -{
328   - struct page *pmd_page = virt_to_page(pmd);
329   - return page_count(pmd_page) == 1;
330   -}
331   -
332   -static void clear_pte_entry(pte_t *pte)
333   -{
334   - if (pte_present(*pte)) {
335   - kvm_set_pte(pte, __pte(0));
336   - put_page(virt_to_page(pte));
337   - }
338   -}
339   -
340   -static bool pte_empty(pte_t *pte)
341   -{
342   - struct page *pte_page = virt_to_page(pte);
343   - return page_count(pte_page) == 1;
344   -}
345   -
346 383 /**
347 384 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
348 385 * @kvm: The VM pointer
... ... @@ -356,43 +393,7 @@
356 393 */
357 394 static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
358 395 {
359   - pgd_t *pgd;
360   - pud_t *pud;
361   - pmd_t *pmd;
362   - pte_t *pte;
363   - phys_addr_t addr = start, end = start + size;
364   - u64 range;
365   -
366   - while (addr < end) {
367   - pgd = kvm->arch.pgd + pgd_index(addr);
368   - pud = pud_offset(pgd, addr);
369   - if (pud_none(*pud)) {
370   - addr += PUD_SIZE;
371   - continue;
372   - }
373   -
374   - pmd = pmd_offset(pud, addr);
375   - if (pmd_none(*pmd)) {
376   - addr += PMD_SIZE;
377   - continue;
378   - }
379   -
380   - pte = pte_offset_kernel(pmd, addr);
381   - clear_pte_entry(pte);
382   - range = PAGE_SIZE;
383   -
384   - /* If we emptied the pte, walk back up the ladder */
385   - if (pte_empty(pte)) {
386   - clear_pmd_entry(pmd);
387   - range = PMD_SIZE;
388   - if (pmd_empty(pmd)) {
389   - clear_pud_entry(pud);
390   - range = PUD_SIZE;
391   - }
392   - }
393   -
394   - addr += range;
395   - }
  396 + unmap_range(kvm->arch.pgd, start, size);
396 397 }
397 398  
398 399 /**
399 400  
400 401  
401 402  
402 403  
403 404  
404 405  
405 406  
406 407  
... ... @@ -728,48 +729,106 @@
728 729  
729 730 phys_addr_t kvm_mmu_get_httbr(void)
730 731 {
731   - VM_BUG_ON(!virt_addr_valid(hyp_pgd));
732 732 return virt_to_phys(hyp_pgd);
733 733 }
734 734  
  735 +phys_addr_t kvm_mmu_get_boot_httbr(void)
  736 +{
  737 + return virt_to_phys(boot_hyp_pgd);
  738 +}
  739 +
  740 +phys_addr_t kvm_get_idmap_vector(void)
  741 +{
  742 + return hyp_idmap_vector;
  743 +}
  744 +
735 745 int kvm_mmu_init(void)
736 746 {
737   - if (!hyp_pgd) {
  747 + int err;
  748 +
  749 + hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
  750 + hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
  751 + hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
  752 +
  753 + if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
  754 + /*
  755 + * Our init code is crossing a page boundary. Allocate
  756 + * a bounce page, copy the code over and use that.
  757 + */
  758 + size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
  759 + phys_addr_t phys_base;
  760 +
  761 + init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
  762 + if (!init_bounce_page) {
  763 + kvm_err("Couldn't allocate HYP init bounce page\n");
  764 + err = -ENOMEM;
  765 + goto out;
  766 + }
  767 +
  768 + memcpy(init_bounce_page, __hyp_idmap_text_start, len);
  769 + /*
  770 + * Warning: the code we just copied to the bounce page
  771 + * must be flushed to the point of coherency.
  772 + * Otherwise, the data may be sitting in L2, and HYP
  773 + * mode won't be able to observe it as it runs with
  774 + * caches off at that point.
  775 + */
  776 + kvm_flush_dcache_to_poc(init_bounce_page, len);
  777 +
  778 + phys_base = virt_to_phys(init_bounce_page);
  779 + hyp_idmap_vector += phys_base - hyp_idmap_start;
  780 + hyp_idmap_start = phys_base;
  781 + hyp_idmap_end = phys_base + len;
  782 +
  783 + kvm_info("Using HYP init bounce page @%lx\n",
  784 + (unsigned long)phys_base);
  785 + }
  786 +
  787 + hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
  788 + boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
  789 + if (!hyp_pgd || !boot_hyp_pgd) {
738 790 kvm_err("Hyp mode PGD not allocated\n");
739   - return -ENOMEM;
  791 + err = -ENOMEM;
  792 + goto out;
740 793 }
741 794  
742   - return 0;
743   -}
  795 + /* Create the idmap in the boot page tables */
  796 + err = __create_hyp_mappings(boot_hyp_pgd,
  797 + hyp_idmap_start, hyp_idmap_end,
  798 + __phys_to_pfn(hyp_idmap_start),
  799 + PAGE_HYP);
744 800  
745   -/**
746   - * kvm_clear_idmap - remove all idmaps from the hyp pgd
747   - *
748   - * Free the underlying pmds for all pgds in range and clear the pgds (but
749   - * don't free them) afterwards.
750   - */
751   -void kvm_clear_hyp_idmap(void)
752   -{
753   - unsigned long addr, end;
754   - unsigned long next;
755   - pgd_t *pgd = hyp_pgd;
756   - pud_t *pud;
757   - pmd_t *pmd;
  801 + if (err) {
  802 + kvm_err("Failed to idmap %lx-%lx\n",
  803 + hyp_idmap_start, hyp_idmap_end);
  804 + goto out;
  805 + }
758 806  
759   - addr = virt_to_phys(__hyp_idmap_text_start);
760   - end = virt_to_phys(__hyp_idmap_text_end);
  807 + /* Map the very same page at the trampoline VA */
  808 + err = __create_hyp_mappings(boot_hyp_pgd,
  809 + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
  810 + __phys_to_pfn(hyp_idmap_start),
  811 + PAGE_HYP);
  812 + if (err) {
  813 + kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n",
  814 + TRAMPOLINE_VA);
  815 + goto out;
  816 + }
761 817  
762   - pgd += pgd_index(addr);
763   - do {
764   - next = pgd_addr_end(addr, end);
765   - if (pgd_none_or_clear_bad(pgd))
766   - continue;
767   - pud = pud_offset(pgd, addr);
768   - pmd = pmd_offset(pud, addr);
  818 + /* Map the same page again into the runtime page tables */
  819 + err = __create_hyp_mappings(hyp_pgd,
  820 + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
  821 + __phys_to_pfn(hyp_idmap_start),
  822 + PAGE_HYP);
  823 + if (err) {
  824 + kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n",
  825 + TRAMPOLINE_VA);
  826 + goto out;
  827 + }
769 828  
770   - pud_clear(pud);
771   - kvm_clean_pmd_entry(pmd);
772   - pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
773   - } while (pgd++, addr = next, addr < end);
  829 + return 0;
  830 +out:
  831 + free_hyp_pgds();
  832 + return err;
774 833 }
  1 +/*
  2 + * Based on the x86 implementation.
  3 + *
  4 + * Copyright (C) 2012 ARM Ltd.
  5 + * Author: Marc Zyngier <marc.zyngier@arm.com>
  6 + *
  7 + * This program is free software; you can redistribute it and/or modify
  8 + * it under the terms of the GNU General Public License version 2 as
  9 + * published by the Free Software Foundation.
  10 + *
  11 + * This program is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + */
  19 +
  20 +#include <linux/perf_event.h>
  21 +#include <linux/kvm_host.h>
  22 +
  23 +#include <asm/kvm_emulate.h>
  24 +
  25 +static int kvm_is_in_guest(void)
  26 +{
  27 + return kvm_arm_get_running_vcpu() != NULL;
  28 +}
  29 +
  30 +static int kvm_is_user_mode(void)
  31 +{
  32 + struct kvm_vcpu *vcpu;
  33 +
  34 + vcpu = kvm_arm_get_running_vcpu();
  35 +
  36 + if (vcpu)
  37 + return !vcpu_mode_priv(vcpu);
  38 +
  39 + return 0;
  40 +}
  41 +
  42 +static unsigned long kvm_get_guest_ip(void)
  43 +{
  44 + struct kvm_vcpu *vcpu;
  45 +
  46 + vcpu = kvm_arm_get_running_vcpu();
  47 +
  48 + if (vcpu)
  49 + return *vcpu_pc(vcpu);
  50 +
  51 + return 0;
  52 +}
  53 +
  54 +static struct perf_guest_info_callbacks kvm_guest_cbs = {
  55 + .is_in_guest = kvm_is_in_guest,
  56 + .is_user_mode = kvm_is_user_mode,
  57 + .get_guest_ip = kvm_get_guest_ip,
  58 +};
  59 +
  60 +int kvm_perf_init(void)
  61 +{
  62 + return perf_register_guest_info_callbacks(&kvm_guest_cbs);
  63 +}
  64 +
  65 +int kvm_perf_teardown(void)
  66 +{
  67 + return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
  68 +}
... ... @@ -8,7 +8,6 @@
8 8 #include <asm/pgtable.h>
9 9 #include <asm/sections.h>
10 10 #include <asm/system_info.h>
11   -#include <asm/virt.h>
12 11  
13 12 pgd_t *idmap_pgd;
14 13  
15 14  
... ... @@ -83,37 +82,10 @@
83 82 } while (pgd++, addr = next, addr != end);
84 83 }
85 84  
86   -#if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE)
87   -pgd_t *hyp_pgd;
88   -
89   -extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
90   -
91   -static int __init init_static_idmap_hyp(void)
92   -{
93   - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
94   - if (!hyp_pgd)
95   - return -ENOMEM;
96   -
97   - pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n",
98   - __hyp_idmap_text_start, __hyp_idmap_text_end);
99   - identity_mapping_add(hyp_pgd, __hyp_idmap_text_start,
100   - __hyp_idmap_text_end, PMD_SECT_AP1);
101   -
102   - return 0;
103   -}
104   -#else
105   -static int __init init_static_idmap_hyp(void)
106   -{
107   - return 0;
108   -}
109   -#endif
110   -
111 85 extern char __idmap_text_start[], __idmap_text_end[];
112 86  
113 87 static int __init init_static_idmap(void)
114 88 {
115   - int ret;
116   -
117 89 idmap_pgd = pgd_alloc(&init_mm);
118 90 if (!idmap_pgd)
119 91 return -ENOMEM;
120 92  
... ... @@ -123,12 +95,10 @@
123 95 identity_mapping_add(idmap_pgd, __idmap_text_start,
124 96 __idmap_text_end, 0);
125 97  
126   - ret = init_static_idmap_hyp();
127   -
128 98 /* Flush L1 for the hardware to see this page table content */
129 99 flush_cache_louis();
130 100  
131   - return ret;
  101 + return 0;
132 102 }
133 103 early_initcall(init_static_idmap);
134 104