Commit 7f20fd23377ac3356657ce35fcaf19ee2fea8345

Authored by Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "Bugfixes (arm and x86) and cleanups"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  selftests: kvm: Adding config fragments
  KVM: selftests: Update gitignore file for latest changes
  kvm: remove unnecessary PageReserved check
  KVM: arm/arm64: vgic: Reevaluate level sensitive interrupts on enable
  KVM: arm: Don't write junk to CP15 registers on reset
  KVM: arm64: Don't write junk to sysregs on reset
  KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block
  x86: kvm: remove useless calls to kvm_para_available
  KVM: no need to check return value of debugfs_create functions
  KVM: remove kvm_arch_has_vcpu_debugfs()
  KVM: Fix leak vCPU's VMCS value into other pCPU
  KVM: Check preempted_in_kernel for involuntary preemption
  KVM: LAPIC: Don't need to wakeup vCPU twice afer timer fire
  arm64: KVM: hyp: debug-sr: Mark expected switch fall-through
  KVM: arm64: Update kvm_arm_exception_class and esr_class_str for new EC
  KVM: arm: vgic-v3: Mark expected switch fall-through
  arm64: KVM: regmap: Fix unexpected switch fall-through
  KVM: arm/arm64: Introduce kvm_pmu_vcpu_init() to setup PMU counter index

Showing 30 changed files Side-by-side Diff

arch/arm/kvm/coproc.c
... ... @@ -651,13 +651,22 @@
651 651 }
652 652  
653 653 static void reset_coproc_regs(struct kvm_vcpu *vcpu,
654   - const struct coproc_reg *table, size_t num)
  654 + const struct coproc_reg *table, size_t num,
  655 + unsigned long *bmap)
655 656 {
656 657 unsigned long i;
657 658  
658 659 for (i = 0; i < num; i++)
659   - if (table[i].reset)
  660 + if (table[i].reset) {
  661 + int reg = table[i].reg;
  662 +
660 663 table[i].reset(vcpu, &table[i]);
  664 + if (reg > 0 && reg < NR_CP15_REGS) {
  665 + set_bit(reg, bmap);
  666 + if (table[i].is_64bit)
  667 + set_bit(reg + 1, bmap);
  668 + }
  669 + }
661 670 }
662 671  
663 672 static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu)
664 673  
665 674  
666 675  
667 676  
... ... @@ -1432,18 +1441,16 @@
1432 1441 {
1433 1442 size_t num;
1434 1443 const struct coproc_reg *table;
  1444 + DECLARE_BITMAP(bmap, NR_CP15_REGS) = { 0, };
1435 1445  
1436   - /* Catch someone adding a register without putting in reset entry. */
1437   - memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15));
1438   -
1439 1446 /* Generic chip reset first (so target could override). */
1440   - reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
  1447 + reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs), bmap);
1441 1448  
1442 1449 table = get_target_table(vcpu->arch.target, &num);
1443   - reset_coproc_regs(vcpu, table, num);
  1450 + reset_coproc_regs(vcpu, table, num, bmap);
1444 1451  
1445 1452 for (num = 1; num < NR_CP15_REGS; num++)
1446   - WARN(vcpu_cp15(vcpu, num) == 0x42424242,
  1453 + WARN(!test_bit(num, bmap),
1447 1454 "Didn't reset vcpu_cp15(vcpu, %zi)", num);
1448 1455 }
arch/arm64/include/asm/kvm_arm.h
... ... @@ -316,9 +316,10 @@
316 316  
317 317 #define kvm_arm_exception_class \
318 318 ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
319   - ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \
320   - ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \
321   - ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
  319 + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \
  320 + ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \
  321 + ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \
  322 + ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
322 323 ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
323 324 ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
324 325 ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
arch/arm64/kernel/traps.c
... ... @@ -733,6 +733,7 @@
733 733 [ESR_ELx_EC_CP14_LS] = "CP14 LDC/STC",
734 734 [ESR_ELx_EC_FP_ASIMD] = "ASIMD",
735 735 [ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS",
  736 + [ESR_ELx_EC_PAC] = "PAC",
736 737 [ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC",
737 738 [ESR_ELx_EC_ILL] = "PSTATE.IL",
738 739 [ESR_ELx_EC_SVC32] = "SVC (AArch32)",
arch/arm64/kvm/hyp/debug-sr.c
... ... @@ -18,40 +18,70 @@
18 18 #define save_debug(ptr,reg,nr) \
19 19 switch (nr) { \
20 20 case 15: ptr[15] = read_debug(reg, 15); \
  21 + /* Fall through */ \
21 22 case 14: ptr[14] = read_debug(reg, 14); \
  23 + /* Fall through */ \
22 24 case 13: ptr[13] = read_debug(reg, 13); \
  25 + /* Fall through */ \
23 26 case 12: ptr[12] = read_debug(reg, 12); \
  27 + /* Fall through */ \
24 28 case 11: ptr[11] = read_debug(reg, 11); \
  29 + /* Fall through */ \
25 30 case 10: ptr[10] = read_debug(reg, 10); \
  31 + /* Fall through */ \
26 32 case 9: ptr[9] = read_debug(reg, 9); \
  33 + /* Fall through */ \
27 34 case 8: ptr[8] = read_debug(reg, 8); \
  35 + /* Fall through */ \
28 36 case 7: ptr[7] = read_debug(reg, 7); \
  37 + /* Fall through */ \
29 38 case 6: ptr[6] = read_debug(reg, 6); \
  39 + /* Fall through */ \
30 40 case 5: ptr[5] = read_debug(reg, 5); \
  41 + /* Fall through */ \
31 42 case 4: ptr[4] = read_debug(reg, 4); \
  43 + /* Fall through */ \
32 44 case 3: ptr[3] = read_debug(reg, 3); \
  45 + /* Fall through */ \
33 46 case 2: ptr[2] = read_debug(reg, 2); \
  47 + /* Fall through */ \
34 48 case 1: ptr[1] = read_debug(reg, 1); \
  49 + /* Fall through */ \
35 50 default: ptr[0] = read_debug(reg, 0); \
36 51 }
37 52  
38 53 #define restore_debug(ptr,reg,nr) \
39 54 switch (nr) { \
40 55 case 15: write_debug(ptr[15], reg, 15); \
  56 + /* Fall through */ \
41 57 case 14: write_debug(ptr[14], reg, 14); \
  58 + /* Fall through */ \
42 59 case 13: write_debug(ptr[13], reg, 13); \
  60 + /* Fall through */ \
43 61 case 12: write_debug(ptr[12], reg, 12); \
  62 + /* Fall through */ \
44 63 case 11: write_debug(ptr[11], reg, 11); \
  64 + /* Fall through */ \
45 65 case 10: write_debug(ptr[10], reg, 10); \
  66 + /* Fall through */ \
46 67 case 9: write_debug(ptr[9], reg, 9); \
  68 + /* Fall through */ \
47 69 case 8: write_debug(ptr[8], reg, 8); \
  70 + /* Fall through */ \
48 71 case 7: write_debug(ptr[7], reg, 7); \
  72 + /* Fall through */ \
49 73 case 6: write_debug(ptr[6], reg, 6); \
  74 + /* Fall through */ \
50 75 case 5: write_debug(ptr[5], reg, 5); \
  76 + /* Fall through */ \
51 77 case 4: write_debug(ptr[4], reg, 4); \
  78 + /* Fall through */ \
52 79 case 3: write_debug(ptr[3], reg, 3); \
  80 + /* Fall through */ \
53 81 case 2: write_debug(ptr[2], reg, 2); \
  82 + /* Fall through */ \
54 83 case 1: write_debug(ptr[1], reg, 1); \
  84 + /* Fall through */ \
55 85 default: write_debug(ptr[0], reg, 0); \
56 86 }
57 87  
arch/arm64/kvm/regmap.c
... ... @@ -178,14 +178,19 @@
178 178 switch (spsr_idx) {
179 179 case KVM_SPSR_SVC:
180 180 write_sysreg_el1(v, SYS_SPSR);
  181 + break;
181 182 case KVM_SPSR_ABT:
182 183 write_sysreg(v, spsr_abt);
  184 + break;
183 185 case KVM_SPSR_UND:
184 186 write_sysreg(v, spsr_und);
  187 + break;
185 188 case KVM_SPSR_IRQ:
186 189 write_sysreg(v, spsr_irq);
  190 + break;
187 191 case KVM_SPSR_FIQ:
188 192 write_sysreg(v, spsr_fiq);
  193 + break;
189 194 }
190 195 }
arch/arm64/kvm/sys_regs.c
... ... @@ -632,7 +632,7 @@
632 632 */
633 633 val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
634 634 | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
635   - __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
  635 + __vcpu_sys_reg(vcpu, r->reg) = val;
636 636 }
637 637  
638 638 static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
639 639  
640 640  
641 641  
... ... @@ -981,13 +981,13 @@
981 981 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
982 982 #define DBG_BCR_BVR_WCR_WVR_EL1(n) \
983 983 { SYS_DESC(SYS_DBGBVRn_EL1(n)), \
984   - trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \
  984 + trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \
985 985 { SYS_DESC(SYS_DBGBCRn_EL1(n)), \
986   - trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \
  986 + trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \
987 987 { SYS_DESC(SYS_DBGWVRn_EL1(n)), \
988   - trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \
  988 + trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \
989 989 { SYS_DESC(SYS_DBGWCRn_EL1(n)), \
990   - trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr }
  990 + trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
991 991  
992 992 /* Macro to expand the PMEVCNTRn_EL0 register */
993 993 #define PMU_PMEVCNTR_EL0(n) \
... ... @@ -1540,7 +1540,7 @@
1540 1540 { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
1541 1541 { SYS_DESC(SYS_CTR_EL0), access_ctr },
1542 1542  
1543   - { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, },
  1543 + { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 },
1544 1544 { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
1545 1545 { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 },
1546 1546 { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 },
1547 1547  
1548 1548  
... ... @@ -2254,13 +2254,19 @@
2254 2254 }
2255 2255  
2256 2256 static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
2257   - const struct sys_reg_desc *table, size_t num)
  2257 + const struct sys_reg_desc *table, size_t num,
  2258 + unsigned long *bmap)
2258 2259 {
2259 2260 unsigned long i;
2260 2261  
2261 2262 for (i = 0; i < num; i++)
2262   - if (table[i].reset)
  2263 + if (table[i].reset) {
  2264 + int reg = table[i].reg;
  2265 +
2263 2266 table[i].reset(vcpu, &table[i]);
  2267 + if (reg > 0 && reg < NR_SYS_REGS)
  2268 + set_bit(reg, bmap);
  2269 + }
2264 2270 }
2265 2271  
2266 2272 /**
2267 2273  
2268 2274  
2269 2275  
2270 2276  
... ... @@ -2774,18 +2780,16 @@
2774 2780 {
2775 2781 size_t num;
2776 2782 const struct sys_reg_desc *table;
  2783 + DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, };
2777 2784  
2778   - /* Catch someone adding a register without putting in reset entry. */
2779   - memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs));
2780   -
2781 2785 /* Generic chip reset first (so target could override). */
2782   - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
  2786 + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap);
2783 2787  
2784 2788 table = get_target_table(vcpu->arch.target, true, &num);
2785   - reset_sys_reg_descs(vcpu, table, num);
  2789 + reset_sys_reg_descs(vcpu, table, num, bmap);
2786 2790  
2787 2791 for (num = 1; num < NR_SYS_REGS; num++) {
2788   - if (WARN(__vcpu_sys_reg(vcpu, num) == 0x4242424242424242,
  2792 + if (WARN(!test_bit(num, bmap),
2789 2793 "Didn't reset __vcpu_sys_reg(%zi)\n", num))
2790 2794 break;
2791 2795 }
arch/mips/kvm/mips.c
... ... @@ -150,16 +150,6 @@
150 150 return 0;
151 151 }
152 152  
153   -bool kvm_arch_has_vcpu_debugfs(void)
154   -{
155   - return false;
156   -}
157   -
158   -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
159   -{
160   - return 0;
161   -}
162   -
163 153 void kvm_mips_free_vcpus(struct kvm *kvm)
164 154 {
165 155 unsigned int i;
arch/powerpc/kvm/powerpc.c
... ... @@ -50,6 +50,11 @@
50 50 return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
51 51 }
52 52  
  53 +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
  54 +{
  55 + return kvm_arch_vcpu_runnable(vcpu);
  56 +}
  57 +
53 58 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
54 59 {
55 60 return false;
... ... @@ -450,16 +455,6 @@
450 455 return kvmppc_core_init_vm(kvm);
451 456 err_out:
452 457 return -EINVAL;
453   -}
454   -
455   -bool kvm_arch_has_vcpu_debugfs(void)
456   -{
457   - return false;
458   -}
459   -
460   -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
461   -{
462   - return 0;
463 458 }
464 459  
465 460 void kvm_arch_destroy_vm(struct kvm *kvm)
arch/s390/kvm/kvm-s390.c
... ... @@ -2516,16 +2516,6 @@
2516 2516 return rc;
2517 2517 }
2518 2518  
2519   -bool kvm_arch_has_vcpu_debugfs(void)
2520   -{
2521   - return false;
2522   -}
2523   -
2524   -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2525   -{
2526   - return 0;
2527   -}
2528   -
2529 2519 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2530 2520 {
2531 2521 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
arch/x86/include/asm/kvm_host.h
... ... @@ -35,6 +35,8 @@
35 35 #include <asm/kvm_vcpu_regs.h>
36 36 #include <asm/hyperv-tlfs.h>
37 37  
  38 +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
  39 +
38 40 #define KVM_MAX_VCPUS 288
39 41 #define KVM_SOFT_MAX_VCPUS 240
40 42 #define KVM_MAX_VCPU_ID 1023
... ... @@ -1175,6 +1177,7 @@
1175 1177 int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
1176 1178 uint32_t guest_irq, bool set);
1177 1179 void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
  1180 + bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
1178 1181  
1179 1182 int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
1180 1183 bool *expired);
arch/x86/kernel/kvm.c
... ... @@ -308,9 +308,6 @@
308 308  
309 309 static void kvm_guest_cpu_init(void)
310 310 {
311   - if (!kvm_para_available())
312   - return;
313   -
314 311 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
315 312 u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
316 313  
... ... @@ -625,9 +622,6 @@
625 622 {
626 623 int i;
627 624  
628   - if (!kvm_para_available())
629   - return;
630   -
631 625 paravirt_ops_setup();
632 626 register_reboot_notifier(&kvm_pv_reboot_nb);
633 627 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
... ... @@ -848,8 +842,6 @@
848 842 */
849 843 void __init kvm_spinlock_init(void)
850 844 {
851   - if (!kvm_para_available())
852   - return;
853 845 /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
854 846 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
855 847 return;
arch/x86/kvm/debugfs.c
... ... @@ -8,11 +8,6 @@
8 8 #include <linux/debugfs.h>
9 9 #include "lapic.h"
10 10  
11   -bool kvm_arch_has_vcpu_debugfs(void)
12   -{
13   - return true;
14   -}
15   -
16 11 static int vcpu_get_timer_advance_ns(void *data, u64 *val)
17 12 {
18 13 struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
19 14  
20 15  
21 16  
22 17  
23 18  
... ... @@ -48,38 +43,23 @@
48 43  
49 44 DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n");
50 45  
51   -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
  46 +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
52 47 {
53   - struct dentry *ret;
  48 + debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu,
  49 + &vcpu_tsc_offset_fops);
54 50  
55   - ret = debugfs_create_file("tsc-offset", 0444,
56   - vcpu->debugfs_dentry,
57   - vcpu, &vcpu_tsc_offset_fops);
58   - if (!ret)
59   - return -ENOMEM;
  51 + if (lapic_in_kernel(vcpu))
  52 + debugfs_create_file("lapic_timer_advance_ns", 0444,
  53 + vcpu->debugfs_dentry, vcpu,
  54 + &vcpu_timer_advance_ns_fops);
60 55  
61   - if (lapic_in_kernel(vcpu)) {
62   - ret = debugfs_create_file("lapic_timer_advance_ns", 0444,
63   - vcpu->debugfs_dentry,
64   - vcpu, &vcpu_timer_advance_ns_fops);
65   - if (!ret)
66   - return -ENOMEM;
67   - }
68   -
69 56 if (kvm_has_tsc_control) {
70   - ret = debugfs_create_file("tsc-scaling-ratio", 0444,
71   - vcpu->debugfs_dentry,
72   - vcpu, &vcpu_tsc_scaling_fops);
73   - if (!ret)
74   - return -ENOMEM;
75   - ret = debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
76   - vcpu->debugfs_dentry,
77   - vcpu, &vcpu_tsc_scaling_frac_fops);
78   - if (!ret)
79   - return -ENOMEM;
80   -
  57 + debugfs_create_file("tsc-scaling-ratio", 0444,
  58 + vcpu->debugfs_dentry, vcpu,
  59 + &vcpu_tsc_scaling_fops);
  60 + debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
  61 + vcpu->debugfs_dentry, vcpu,
  62 + &vcpu_tsc_scaling_frac_fops);
81 63 }
82   -
83   - return 0;
84 64 }
arch/x86/kvm/lapic.c
... ... @@ -1548,7 +1548,6 @@
1548 1548 static void apic_timer_expired(struct kvm_lapic *apic)
1549 1549 {
1550 1550 struct kvm_vcpu *vcpu = apic->vcpu;
1551   - struct swait_queue_head *q = &vcpu->wq;
1552 1551 struct kvm_timer *ktimer = &apic->lapic_timer;
1553 1552  
1554 1553 if (atomic_read(&apic->lapic_timer.pending))
... ... @@ -1566,13 +1565,6 @@
1566 1565  
1567 1566 atomic_inc(&apic->lapic_timer.pending);
1568 1567 kvm_set_pending_timer(vcpu);
1569   -
1570   - /*
1571   - * For x86, the atomic_inc() is serialized, thus
1572   - * using swait_active() is safe.
1573   - */
1574   - if (swait_active(q))
1575   - swake_up_one(q);
1576 1568 }
1577 1569  
1578 1570 static void start_sw_tscdeadline(struct kvm_lapic *apic)
... ... @@ -5190,6 +5190,11 @@
5190 5190 kvm_vcpu_wake_up(vcpu);
5191 5191 }
5192 5192  
  5193 +static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
  5194 +{
  5195 + return false;
  5196 +}
  5197 +
5193 5198 static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
5194 5199 {
5195 5200 unsigned long flags;
... ... @@ -7314,6 +7319,7 @@
7314 7319  
7315 7320 .pmu_ops = &amd_pmu_ops,
7316 7321 .deliver_posted_interrupt = svm_deliver_avic_intr,
  7322 + .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
7317 7323 .update_pi_irte = svm_update_pi_irte,
7318 7324 .setup_mce = svm_setup_mce,
7319 7325  
arch/x86/kvm/vmx/vmx.c
... ... @@ -6117,6 +6117,11 @@
6117 6117 return max_irr;
6118 6118 }
6119 6119  
  6120 +static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
  6121 +{
  6122 + return pi_test_on(vcpu_to_pi_desc(vcpu));
  6123 +}
  6124 +
6120 6125 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6121 6126 {
6122 6127 if (!kvm_vcpu_apicv_active(vcpu))
... ... @@ -7726,6 +7731,7 @@
7726 7731 .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
7727 7732 .sync_pir_to_irr = vmx_sync_pir_to_irr,
7728 7733 .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
  7734 + .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
7729 7735  
7730 7736 .set_tss_addr = vmx_set_tss_addr,
7731 7737 .set_identity_map_addr = vmx_set_identity_map_addr,
... ... @@ -9698,6 +9698,22 @@
9698 9698 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
9699 9699 }
9700 9700  
  9701 +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
  9702 +{
  9703 + if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
  9704 + return true;
  9705 +
  9706 + if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
  9707 + kvm_test_request(KVM_REQ_SMI, vcpu) ||
  9708 + kvm_test_request(KVM_REQ_EVENT, vcpu))
  9709 + return true;
  9710 +
  9711 + if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
  9712 + return true;
  9713 +
  9714 + return false;
  9715 +}
  9716 +
9701 9717 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
9702 9718 {
9703 9719 return vcpu->arch.preempted_in_kernel;
include/kvm/arm_pmu.h
... ... @@ -34,6 +34,7 @@
34 34 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
35 35 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
36 36 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu);
  37 +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu);
37 38 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
38 39 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
39 40 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val);
... ... @@ -71,6 +72,7 @@
71 72 {
72 73 return 0;
73 74 }
  75 +static inline void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) {}
74 76 static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
75 77 static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
76 78 static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {}
include/kvm/arm_vgic.h
... ... @@ -350,6 +350,7 @@
350 350  
351 351 void kvm_vgic_load(struct kvm_vcpu *vcpu);
352 352 void kvm_vgic_put(struct kvm_vcpu *vcpu);
  353 +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu);
353 354  
354 355 #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
355 356 #define vgic_initialized(k) ((k)->arch.vgic.initialized)
include/linux/kvm_host.h
... ... @@ -861,8 +861,9 @@
861 861 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
862 862 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
863 863  
864   -bool kvm_arch_has_vcpu_debugfs(void);
865   -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu);
  864 +#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
  865 +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu);
  866 +#endif
866 867  
867 868 int kvm_arch_hardware_enable(void);
868 869 void kvm_arch_hardware_disable(void);
... ... @@ -872,6 +873,7 @@
872 873 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
873 874 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
874 875 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
  876 +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
875 877  
876 878 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
877 879 /*
tools/testing/selftests/kvm/.gitignore
  1 +/s390x/sync_regs_test
1 2 /x86_64/cr4_cpuid_sync_test
2 3 /x86_64/evmcs_test
3 4 /x86_64/hyperv_cpuid
4   -/x86_64/kvm_create_max_vcpus
5 5 /x86_64/mmio_warning_test
6 6 /x86_64/platform_info_test
7 7 /x86_64/set_sregs_test
... ... @@ -13,4 +13,5 @@
13 13 /x86_64/vmx_tsc_adjust_test
14 14 /clear_dirty_log_test
15 15 /dirty_log_test
  16 +/kvm_create_max_vcpus
tools/testing/selftests/kvm/config
  1 +CONFIG_KVM=y
  2 +CONFIG_KVM_INTEL=y
  3 +CONFIG_KVM_AMD=y
... ... @@ -144,11 +144,6 @@
144 144 return ret;
145 145 }
146 146  
147   -bool kvm_arch_has_vcpu_debugfs(void)
148   -{
149   - return false;
150   -}
151   -
152 147 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
153 148 {
154 149 return 0;
... ... @@ -323,6 +318,17 @@
323 318  
324 319 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
325 320 {
  321 + /*
  322 + * If we're about to block (most likely because we've just hit a
  323 + * WFI), we need to sync back the state of the GIC CPU interface
  324 + * so that we have the lastest PMR and group enables. This ensures
  325 + * that kvm_arch_vcpu_runnable has up-to-date data to decide
  326 + * whether we have pending interrupts.
  327 + */
  328 + preempt_disable();
  329 + kvm_vgic_vmcr_sync(vcpu);
  330 + preempt_enable();
  331 +
326 332 kvm_vgic_v4_enable_doorbell(vcpu);
327 333 }
328 334  
... ... @@ -339,6 +345,8 @@
339 345  
340 346 /* Set up the timer */
341 347 kvm_timer_vcpu_init(vcpu);
  348 +
  349 + kvm_pmu_vcpu_init(vcpu);
342 350  
343 351 kvm_arm_reset_debug_ptr(vcpu);
344 352  
virt/kvm/arm/hyp/vgic-v3-sr.c
... ... @@ -349,8 +349,10 @@
349 349 case 7:
350 350 cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
351 351 cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
  352 + /* Fall through */
352 353 case 6:
353 354 cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
  355 + /* Fall through */
354 356 default:
355 357 cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
356 358 }
357 359  
... ... @@ -359,8 +361,10 @@
359 361 case 7:
360 362 cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
361 363 cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
  364 + /* Fall through */
362 365 case 6:
363 366 cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
  367 + /* Fall through */
364 368 default:
365 369 cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
366 370 }
367 371  
... ... @@ -382,8 +386,10 @@
382 386 case 7:
383 387 __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
384 388 __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
  389 + /* Fall through */
385 390 case 6:
386 391 __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
  392 + /* Fall through */
387 393 default:
388 394 __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
389 395 }
390 396  
... ... @@ -392,8 +398,10 @@
392 398 case 7:
393 399 __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
394 400 __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
  401 + /* Fall through */
395 402 case 6:
396 403 __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
  404 + /* Fall through */
397 405 default:
398 406 __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
399 407 }
... ... @@ -215,6 +215,20 @@
215 215 }
216 216  
217 217 /**
  218 + * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
  219 + * @vcpu: The vcpu pointer
  220 + *
  221 + */
  222 +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
  223 +{
  224 + int i;
  225 + struct kvm_pmu *pmu = &vcpu->arch.pmu;
  226 +
  227 + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
  228 + pmu->pmc[i].idx = i;
  229 +}
  230 +
  231 +/**
218 232 * kvm_pmu_vcpu_reset - reset pmu state for cpu
219 233 * @vcpu: The vcpu pointer
220 234 *
221 235  
... ... @@ -224,10 +238,8 @@
224 238 int i;
225 239 struct kvm_pmu *pmu = &vcpu->arch.pmu;
226 240  
227   - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
  241 + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
228 242 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
229   - pmu->pmc[i].idx = i;
230   - }
231 243  
232 244 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
233 245 }
virt/kvm/arm/vgic/vgic-mmio.c
... ... @@ -113,6 +113,22 @@
113 113 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
114 114  
115 115 raw_spin_lock_irqsave(&irq->irq_lock, flags);
  116 + if (vgic_irq_is_mapped_level(irq)) {
  117 + bool was_high = irq->line_level;
  118 +
  119 + /*
  120 + * We need to update the state of the interrupt because
  121 + * the guest might have changed the state of the device
  122 + * while the interrupt was disabled at the VGIC level.
  123 + */
  124 + irq->line_level = vgic_get_phys_line_level(irq);
  125 + /*
  126 + * Deactivate the physical interrupt so the GIC will let
  127 + * us know when it is asserted again.
  128 + */
  129 + if (!irq->active && was_high && !irq->line_level)
  130 + vgic_irq_set_phys_active(irq, false);
  131 + }
116 132 irq->enabled = true;
117 133 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
118 134  
virt/kvm/arm/vgic/vgic-v2.c
... ... @@ -484,11 +484,18 @@
484 484 kvm_vgic_global_state.vctrl_base + GICH_APR);
485 485 }
486 486  
487   -void vgic_v2_put(struct kvm_vcpu *vcpu)
  487 +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu)
488 488 {
489 489 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
490 490  
491 491 cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
  492 +}
  493 +
  494 +void vgic_v2_put(struct kvm_vcpu *vcpu)
  495 +{
  496 + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
  497 +
  498 + vgic_v2_vmcr_sync(vcpu);
492 499 cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
493 500 }
virt/kvm/arm/vgic/vgic-v3.c
... ... @@ -662,12 +662,17 @@
662 662 __vgic_v3_activate_traps(vcpu);
663 663 }
664 664  
665   -void vgic_v3_put(struct kvm_vcpu *vcpu)
  665 +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
666 666 {
667 667 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
668 668  
669 669 if (likely(cpu_if->vgic_sre))
670 670 cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
  671 +}
  672 +
  673 +void vgic_v3_put(struct kvm_vcpu *vcpu)
  674 +{
  675 + vgic_v3_vmcr_sync(vcpu);
671 676  
672 677 kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
673 678  
virt/kvm/arm/vgic/vgic.c
... ... @@ -919,6 +919,17 @@
919 919 vgic_v3_put(vcpu);
920 920 }
921 921  
  922 +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
  923 +{
  924 + if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
  925 + return;
  926 +
  927 + if (kvm_vgic_global_state.type == VGIC_V2)
  928 + vgic_v2_vmcr_sync(vcpu);
  929 + else
  930 + vgic_v3_vmcr_sync(vcpu);
  931 +}
  932 +
922 933 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
923 934 {
924 935 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
virt/kvm/arm/vgic/vgic.h
... ... @@ -193,6 +193,7 @@
193 193 void vgic_v2_init_lrs(void);
194 194 void vgic_v2_load(struct kvm_vcpu *vcpu);
195 195 void vgic_v2_put(struct kvm_vcpu *vcpu);
  196 +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
196 197  
197 198 void vgic_v2_save_state(struct kvm_vcpu *vcpu);
198 199 void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
... ... @@ -223,6 +224,7 @@
223 224  
224 225 void vgic_v3_load(struct kvm_vcpu *vcpu);
225 226 void vgic_v3_put(struct kvm_vcpu *vcpu);
  227 +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
226 228  
227 229 bool vgic_has_its(struct kvm *kvm);
228 230 int kvm_vgic_register_its_device(void);
... ... @@ -1855,8 +1855,7 @@
1855 1855 if (!kvm_is_reserved_pfn(pfn)) {
1856 1856 struct page *page = pfn_to_page(pfn);
1857 1857  
1858   - if (!PageReserved(page))
1859   - SetPageDirty(page);
  1858 + SetPageDirty(page);
1860 1859 }
1861 1860 }
1862 1861 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
... ... @@ -2477,6 +2476,29 @@
2477 2476 #endif
2478 2477 }
2479 2478  
  2479 +/*
  2480 + * Unlike kvm_arch_vcpu_runnable, this function is called outside
  2481 + * a vcpu_load/vcpu_put pair. However, for most architectures
  2482 + * kvm_arch_vcpu_runnable does not require vcpu_load.
  2483 + */
  2484 +bool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
  2485 +{
  2486 + return kvm_arch_vcpu_runnable(vcpu);
  2487 +}
  2488 +
  2489 +static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
  2490 +{
  2491 + if (kvm_arch_dy_runnable(vcpu))
  2492 + return true;
  2493 +
  2494 +#ifdef CONFIG_KVM_ASYNC_PF
  2495 + if (!list_empty_careful(&vcpu->async_pf.done))
  2496 + return true;
  2497 +#endif
  2498 +
  2499 + return false;
  2500 +}
  2501 +
2480 2502 void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
2481 2503 {
2482 2504 struct kvm *kvm = me->kvm;
2483 2505  
... ... @@ -2506,9 +2528,10 @@
2506 2528 continue;
2507 2529 if (vcpu == me)
2508 2530 continue;
2509   - if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
  2531 + if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
2510 2532 continue;
2511   - if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu))
  2533 + if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
  2534 + !kvm_arch_vcpu_in_kernel(vcpu))
2512 2535 continue;
2513 2536 if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
2514 2537 continue;
2515 2538  
2516 2539  
2517 2540  
2518 2541  
2519 2542  
2520 2543  
... ... @@ -2591,30 +2614,20 @@
2591 2614 return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
2592 2615 }
2593 2616  
2594   -static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
  2617 +static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2595 2618 {
  2619 +#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
2596 2620 char dir_name[ITOA_MAX_LEN * 2];
2597   - int ret;
2598 2621  
2599   - if (!kvm_arch_has_vcpu_debugfs())
2600   - return 0;
2601   -
2602 2622 if (!debugfs_initialized())
2603   - return 0;
  2623 + return;
2604 2624  
2605 2625 snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
2606 2626 vcpu->debugfs_dentry = debugfs_create_dir(dir_name,
2607   - vcpu->kvm->debugfs_dentry);
2608   - if (!vcpu->debugfs_dentry)
2609   - return -ENOMEM;
  2627 + vcpu->kvm->debugfs_dentry);
2610 2628  
2611   - ret = kvm_arch_create_vcpu_debugfs(vcpu);
2612   - if (ret < 0) {
2613   - debugfs_remove_recursive(vcpu->debugfs_dentry);
2614   - return ret;
2615   - }
2616   -
2617   - return 0;
  2629 + kvm_arch_create_vcpu_debugfs(vcpu);
  2630 +#endif
2618 2631 }
2619 2632  
2620 2633 /*
... ... @@ -2649,9 +2662,7 @@
2649 2662 if (r)
2650 2663 goto vcpu_destroy;
2651 2664  
2652   - r = kvm_create_vcpu_debugfs(vcpu);
2653   - if (r)
2654   - goto vcpu_destroy;
  2665 + kvm_create_vcpu_debugfs(vcpu);
2655 2666  
2656 2667 mutex_lock(&kvm->lock);
2657 2668 if (kvm_get_vcpu_by_id(kvm, id)) {
... ... @@ -4205,7 +4216,7 @@
4205 4216 {
4206 4217 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
4207 4218  
4208   - vcpu->preempted = false;
  4219 + WRITE_ONCE(vcpu->preempted, false);
4209 4220 WRITE_ONCE(vcpu->ready, false);
4210 4221  
4211 4222 kvm_arch_sched_in(vcpu, cpu);
... ... @@ -4219,7 +4230,7 @@
4219 4230 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
4220 4231  
4221 4232 if (current->state == TASK_RUNNING) {
4222   - vcpu->preempted = true;
  4233 + WRITE_ONCE(vcpu->preempted, true);
4223 4234 WRITE_ONCE(vcpu->ready, true);
4224 4235 }
4225 4236 kvm_arch_vcpu_put(vcpu);