Commit 790c73f6289a204f858ffdcbe4a2b38e91657ec6
Committed by
Avi Kivity
1 parent
18068523d3
Exists in
master
and in
20 other branches
x86: KVM guest: paravirtualized clocksource
This is the guest part of kvm clock implementation It does not do tsc-only timing, as tsc can have deltas between cpus, and it did not seem worthy to me to keep adjusting them. We do use it, however, for fine-grained adjustment. Other than that, time comes from the host. [randy dunlap: add missing include] [randy dunlap: disallow on Voyager or Visual WS] Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
Showing 5 changed files with 182 additions and 0 deletions Side-by-side Diff
arch/x86/Kconfig
... | ... | @@ -373,6 +373,17 @@ |
373 | 373 | at the moment), by linking the kernel to a GPL-ed ROM module |
374 | 374 | provided by the hypervisor. |
375 | 375 | |
376 | +config KVM_CLOCK | |
377 | + bool "KVM paravirtualized clock" | |
378 | + select PARAVIRT | |
379 | + depends on !(X86_VISWS || X86_VOYAGER) | |
380 | + help | |
381 | + Turning on this option will allow you to run a paravirtualized clock | |
382 | + when running over the KVM hypervisor. Instead of relying on a PIT | |
383 | + (or probably other) emulation by the underlying device model, the host | |
384 | + provides the guest with timing infrastructure such as time of day, and | |
385 | + system time | |
386 | + | |
376 | 387 | source "arch/x86/lguest/Kconfig" |
377 | 388 | |
378 | 389 | config PARAVIRT |
arch/x86/kernel/Makefile
arch/x86/kernel/kvmclock.c
1 | +/* KVM paravirtual clock driver. A clocksource implementation | |
2 | + Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc. | |
3 | + | |
4 | + This program is free software; you can redistribute it and/or modify | |
5 | + it under the terms of the GNU General Public License as published by | |
6 | + the Free Software Foundation; either version 2 of the License, or | |
7 | + (at your option) any later version. | |
8 | + | |
9 | + This program is distributed in the hope that it will be useful, | |
10 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | + GNU General Public License for more details. | |
13 | + | |
14 | + You should have received a copy of the GNU General Public License | |
15 | + along with this program; if not, write to the Free Software | |
16 | + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | +*/ | |
18 | + | |
19 | +#include <linux/clocksource.h> | |
20 | +#include <linux/kvm_para.h> | |
21 | +#include <asm/arch_hooks.h> | |
22 | +#include <asm/msr.h> | |
23 | +#include <asm/apic.h> | |
24 | +#include <linux/percpu.h> | |
25 | + | |
26 | +#define KVM_SCALE 22 | |
27 | + | |
28 | +static int kvmclock = 1; | |
29 | + | |
30 | +static int parse_no_kvmclock(char *arg) | |
31 | +{ | |
32 | + kvmclock = 0; | |
33 | + return 0; | |
34 | +} | |
35 | +early_param("no-kvmclock", parse_no_kvmclock); | |
36 | + | |
37 | +/* The hypervisor will put information about time periodically here */ | |
38 | +static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); | |
39 | +#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field | |
40 | + | |
41 | +static inline u64 kvm_get_delta(u64 last_tsc) | |
42 | +{ | |
43 | + int cpu = smp_processor_id(); | |
44 | + u64 delta = native_read_tsc() - last_tsc; | |
45 | + return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; | |
46 | +} | |
47 | + | |
48 | +static struct kvm_wall_clock wall_clock; | |
49 | +static cycle_t kvm_clock_read(void); | |
50 | +/* | |
51 | + * The wallclock is the time of day when we booted. Since then, some time may | |
52 | + * have elapsed since the hypervisor wrote the data. So we try to account for | |
53 | + * that with system time | |
54 | + */ | |
55 | +unsigned long kvm_get_wallclock(void) | |
56 | +{ | |
57 | + u32 wc_sec, wc_nsec; | |
58 | + u64 delta; | |
59 | + struct timespec ts; | |
60 | + int version, nsec; | |
61 | + int low, high; | |
62 | + | |
63 | + low = (int)__pa(&wall_clock); | |
64 | + high = ((u64)__pa(&wall_clock) >> 32); | |
65 | + | |
66 | + delta = kvm_clock_read(); | |
67 | + | |
68 | + native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | |
69 | + do { | |
70 | + version = wall_clock.wc_version; | |
71 | + rmb(); | |
72 | + wc_sec = wall_clock.wc_sec; | |
73 | + wc_nsec = wall_clock.wc_nsec; | |
74 | + rmb(); | |
75 | + } while ((wall_clock.wc_version != version) || (version & 1)); | |
76 | + | |
77 | + delta = kvm_clock_read() - delta; | |
78 | + delta += wc_nsec; | |
79 | + nsec = do_div(delta, NSEC_PER_SEC); | |
80 | + set_normalized_timespec(&ts, wc_sec + delta, nsec); | |
81 | + /* | |
82 | + * Of all mechanisms of time adjustment I've tested, this one | |
83 | + * was the champion! | |
84 | + */ | |
85 | + return ts.tv_sec + 1; | |
86 | +} | |
87 | + | |
88 | +int kvm_set_wallclock(unsigned long now) | |
89 | +{ | |
90 | + return 0; | |
91 | +} | |
92 | + | |
93 | +/* | |
94 | + * This is our read_clock function. The host puts an tsc timestamp each time | |
95 | + * it updates a new time. Without the tsc adjustment, we can have a situation | |
96 | + * in which a vcpu starts to run earlier (smaller system_time), but probes | |
97 | + * time later (compared to another vcpu), leading to backwards time | |
98 | + */ | |
99 | +static cycle_t kvm_clock_read(void) | |
100 | +{ | |
101 | + u64 last_tsc, now; | |
102 | + int cpu; | |
103 | + | |
104 | + preempt_disable(); | |
105 | + cpu = smp_processor_id(); | |
106 | + | |
107 | + last_tsc = get_clock(cpu, tsc_timestamp); | |
108 | + now = get_clock(cpu, system_time); | |
109 | + | |
110 | + now += kvm_get_delta(last_tsc); | |
111 | + preempt_enable(); | |
112 | + | |
113 | + return now; | |
114 | +} | |
115 | +static struct clocksource kvm_clock = { | |
116 | + .name = "kvm-clock", | |
117 | + .read = kvm_clock_read, | |
118 | + .rating = 400, | |
119 | + .mask = CLOCKSOURCE_MASK(64), | |
120 | + .mult = 1 << KVM_SCALE, | |
121 | + .shift = KVM_SCALE, | |
122 | + .flags = CLOCK_SOURCE_IS_CONTINUOUS, | |
123 | +}; | |
124 | + | |
125 | +static int kvm_register_clock(void) | |
126 | +{ | |
127 | + int cpu = smp_processor_id(); | |
128 | + int low, high; | |
129 | + low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; | |
130 | + high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | |
131 | + | |
132 | + return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | |
133 | +} | |
134 | + | |
135 | +static void kvm_setup_secondary_clock(void) | |
136 | +{ | |
137 | + /* | |
138 | + * Now that the first cpu already had this clocksource initialized, | |
139 | + * we shouldn't fail. | |
140 | + */ | |
141 | + WARN_ON(kvm_register_clock()); | |
142 | + /* ok, done with our trickery, call native */ | |
143 | + setup_secondary_APIC_clock(); | |
144 | +} | |
145 | + | |
146 | +void __init kvmclock_init(void) | |
147 | +{ | |
148 | + if (!kvm_para_available()) | |
149 | + return; | |
150 | + | |
151 | + if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | |
152 | + if (kvm_register_clock()) | |
153 | + return; | |
154 | + pv_time_ops.get_wallclock = kvm_get_wallclock; | |
155 | + pv_time_ops.set_wallclock = kvm_set_wallclock; | |
156 | + pv_time_ops.sched_clock = kvm_clock_read; | |
157 | + pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | |
158 | + clocksource_register(&kvm_clock); | |
159 | + } | |
160 | +} |
arch/x86/kernel/setup_32.c
... | ... | @@ -47,6 +47,7 @@ |
47 | 47 | #include <linux/pfn.h> |
48 | 48 | #include <linux/pci.h> |
49 | 49 | #include <linux/init_ohci1394_dma.h> |
50 | +#include <linux/kvm_para.h> | |
50 | 51 | |
51 | 52 | #include <video/edid.h> |
52 | 53 | |
... | ... | @@ -819,6 +820,10 @@ |
819 | 820 | propagate_e820_map(); |
820 | 821 | |
821 | 822 | max_low_pfn = setup_memory(); |
823 | + | |
824 | +#ifdef CONFIG_KVM_CLOCK | |
825 | + kvmclock_init(); | |
826 | +#endif | |
822 | 827 | |
823 | 828 | #ifdef CONFIG_VMI |
824 | 829 | /* |
arch/x86/kernel/setup_64.c
... | ... | @@ -42,6 +42,7 @@ |
42 | 42 | #include <linux/ctype.h> |
43 | 43 | #include <linux/uaccess.h> |
44 | 44 | #include <linux/init_ohci1394_dma.h> |
45 | +#include <linux/kvm_para.h> | |
45 | 46 | |
46 | 47 | #include <asm/mtrr.h> |
47 | 48 | #include <asm/uaccess.h> |
... | ... | @@ -383,6 +384,10 @@ |
383 | 384 | dmi_scan_machine(); |
384 | 385 | |
385 | 386 | io_delay_init(); |
387 | + | |
388 | +#ifdef CONFIG_KVM_CLOCK | |
389 | + kvmclock_init(); | |
390 | +#endif | |
386 | 391 | |
387 | 392 | #ifdef CONFIG_SMP |
388 | 393 | /* setup to use the early static init tables during kernel startup */ |