Commit c9aaa8957f203bd6df83b002fb40b98390bed078

Authored by Glauber Costa
Committed by Avi Kivity
1 parent 9ddabbe72e

KVM: Steal time implementation

To implement steal time, we need the hypervisor to pass the guest
information about how much time was spent running other processes
outside the VM, while the vcpu had meaningful work to do - halt
time does not count.

This information is acquired through the run_delay field of
delayacct/schedstats infrastructure, that counts time spent in a
runqueue but not running.

Steal time is a per-cpu information, so the traditional MSR-based
infrastructure is used. A new msr, KVM_MSR_STEAL_TIME, holds the
memory area address containing information about steal time

This patch contains the hypervisor part of the steal time infrasructure,
and can be backported independently of the guest portion.

[avi, yongjie: export delayacct_on, to avoid build failures in some configs]

Signed-off-by: Glauber Costa <glommer@redhat.com>
Tested-by: Eric B Munson <emunson@mgebm.net>
CC: Rik van Riel <riel@redhat.com>
CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Yongjie Ren <yongjie.ren@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

Showing 6 changed files with 89 additions and 2 deletions Side-by-side Diff

arch/x86/include/asm/kvm_host.h
... ... @@ -389,6 +389,15 @@
389 389 unsigned int hw_tsc_khz;
390 390 unsigned int time_offset;
391 391 struct page *time_page;
  392 +
  393 + struct {
  394 + u64 msr_val;
  395 + u64 last_steal;
  396 + u64 accum_steal;
  397 + struct gfn_to_hva_cache stime;
  398 + struct kvm_steal_time steal;
  399 + } st;
  400 +
392 401 u64 last_guest_tsc;
393 402 u64 last_kernel_ns;
394 403 u64 last_tsc_nsec;
arch/x86/include/asm/kvm_para.h
... ... @@ -45,6 +45,10 @@
45 45 __u32 pad[12];
46 46 };
47 47  
  48 +#define KVM_STEAL_ALIGNMENT_BITS 5
  49 +#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
  50 +#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
  51 +
48 52 #define KVM_MAX_MMU_OP_BATCH 32
49 53  
50 54 #define KVM_ASYNC_PF_ENABLED (1 << 0)
arch/x86/kvm/Kconfig
... ... @@ -31,6 +31,7 @@
31 31 select KVM_ASYNC_PF
32 32 select USER_RETURN_NOTIFIER
33 33 select KVM_MMIO
  34 + select TASK_DELAY_ACCT
34 35 ---help---
35 36 Support hosting fully virtualized guest machines using hardware
36 37 virtualization extensions. You will need a fairly recent
... ... @@ -808,12 +808,12 @@
808 808 * kvm-specific. Those are put in the beginning of the list.
809 809 */
810 810  
811   -#define KVM_SAVE_MSRS_BEGIN 8
  811 +#define KVM_SAVE_MSRS_BEGIN 9
812 812 static u32 msrs_to_save[] = {
813 813 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
814 814 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
815 815 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
816   - HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN,
  816 + HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
817 817 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
818 818 MSR_STAR,
819 819 #ifdef CONFIG_X86_64
... ... @@ -1488,6 +1488,35 @@
1488 1488 }
1489 1489 }
1490 1490  
  1491 +static void accumulate_steal_time(struct kvm_vcpu *vcpu)
  1492 +{
  1493 + u64 delta;
  1494 +
  1495 + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
  1496 + return;
  1497 +
  1498 + delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
  1499 + vcpu->arch.st.last_steal = current->sched_info.run_delay;
  1500 + vcpu->arch.st.accum_steal = delta;
  1501 +}
  1502 +
  1503 +static void record_steal_time(struct kvm_vcpu *vcpu)
  1504 +{
  1505 + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
  1506 + return;
  1507 +
  1508 + if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
  1509 + &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
  1510 + return;
  1511 +
  1512 + vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
  1513 + vcpu->arch.st.steal.version += 2;
  1514 + vcpu->arch.st.accum_steal = 0;
  1515 +
  1516 + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
  1517 + &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
  1518 +}
  1519 +
1491 1520 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1492 1521 {
1493 1522 switch (msr) {
... ... @@ -1570,6 +1599,33 @@
1570 1599 if (kvm_pv_enable_async_pf(vcpu, data))
1571 1600 return 1;
1572 1601 break;
  1602 + case MSR_KVM_STEAL_TIME:
  1603 +
  1604 + if (unlikely(!sched_info_on()))
  1605 + return 1;
  1606 +
  1607 + if (data & KVM_STEAL_RESERVED_MASK)
  1608 + return 1;
  1609 +
  1610 + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
  1611 + data & KVM_STEAL_VALID_BITS))
  1612 + return 1;
  1613 +
  1614 + vcpu->arch.st.msr_val = data;
  1615 +
  1616 + if (!(data & KVM_MSR_ENABLED))
  1617 + break;
  1618 +
  1619 + vcpu->arch.st.last_steal = current->sched_info.run_delay;
  1620 +
  1621 + preempt_disable();
  1622 + accumulate_steal_time(vcpu);
  1623 + preempt_enable();
  1624 +
  1625 + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
  1626 +
  1627 + break;
  1628 +
1573 1629 case MSR_IA32_MCG_CTL:
1574 1630 case MSR_IA32_MCG_STATUS:
1575 1631 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
... ... @@ -1855,6 +1911,9 @@
1855 1911 case MSR_KVM_ASYNC_PF_EN:
1856 1912 data = vcpu->arch.apf.msr_val;
1857 1913 break;
  1914 + case MSR_KVM_STEAL_TIME:
  1915 + data = vcpu->arch.st.msr_val;
  1916 + break;
1858 1917 case MSR_IA32_P5_MC_ADDR:
1859 1918 case MSR_IA32_P5_MC_TYPE:
1860 1919 case MSR_IA32_MCG_CAP:
... ... @@ -2166,6 +2225,9 @@
2166 2225 kvm_migrate_timers(vcpu);
2167 2226 vcpu->cpu = cpu;
2168 2227 }
  2228 +
  2229 + accumulate_steal_time(vcpu);
  2230 + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2169 2231 }
2170 2232  
2171 2233 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
... ... @@ -2487,6 +2549,10 @@
2487 2549 (1 << KVM_FEATURE_CLOCKSOURCE2) |
2488 2550 (1 << KVM_FEATURE_ASYNC_PF) |
2489 2551 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
  2552 +
  2553 + if (sched_info_on())
  2554 + entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
  2555 +
2490 2556 entry->ebx = 0;
2491 2557 entry->ecx = 0;
2492 2558 entry->edx = 0;
... ... @@ -5470,6 +5536,9 @@
5470 5536 r = 1;
5471 5537 goto out;
5472 5538 }
  5539 + if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
  5540 + record_steal_time(vcpu);
  5541 +
5473 5542 }
5474 5543  
5475 5544 r = kvm_mmu_reload(vcpu);
... ... @@ -6206,6 +6275,7 @@
6206 6275  
6207 6276 kvm_make_request(KVM_REQ_EVENT, vcpu);
6208 6277 vcpu->arch.apf.msr_val = 0;
  6278 + vcpu->arch.st.msr_val = 0;
6209 6279  
6210 6280 kvmclock_reset(vcpu);
6211 6281  
include/linux/kvm_host.h
... ... @@ -47,6 +47,7 @@
47 47 #define KVM_REQ_DEACTIVATE_FPU 10
48 48 #define KVM_REQ_EVENT 11
49 49 #define KVM_REQ_APF_HALT 12
  50 +#define KVM_REQ_STEAL_UPDATE 13
50 51  
51 52 #define KVM_USERSPACE_IRQ_SOURCE_ID 0
52 53  
... ... @@ -19,8 +19,10 @@
19 19 #include <linux/time.h>
20 20 #include <linux/sysctl.h>
21 21 #include <linux/delayacct.h>
  22 +#include <linux/module.h>
22 23  
23 24 int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */
  25 +EXPORT_SYMBOL_GPL(delayacct_on);
24 26 struct kmem_cache *delayacct_cache;
25 27  
26 28 static int __init delayacct_setup_disable(char *str)