Commit ecbf29cdb3990c83d90d0c4187c89fb2ce423367
Committed by
Ingo Molnar
1 parent
f63c2f2489
Exists in
master
and in
39 other branches
xen: clean up asm/xen/hypervisor.h
Impact: cleanup hypervisor.h had accumulated a lot of crud, including lots of spurious #includes. Clean it all up, and go around fixing up everything else accordingly. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 8 changed files with 31 additions and 33 deletions Inline Diff
arch/x86/include/asm/xen/hypercall.h
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * hypercall.h | 2 | * hypercall.h |
3 | * | 3 | * |
4 | * Linux-specific hypervisor handling. | 4 | * Linux-specific hypervisor handling. |
5 | * | 5 | * |
6 | * Copyright (c) 2002-2004, K A Fraser | 6 | * Copyright (c) 2002-2004, K A Fraser |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or | 8 | * This program is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU General Public License version 2 | 9 | * modify it under the terms of the GNU General Public License version 2 |
10 | * as published by the Free Software Foundation; or, when distributed | 10 | * as published by the Free Software Foundation; or, when distributed |
11 | * separately from the Linux kernel or incorporated into other | 11 | * separately from the Linux kernel or incorporated into other |
12 | * software packages, subject to the following license: | 12 | * software packages, subject to the following license: |
13 | * | 13 | * |
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 14 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
15 | * of this source file (the "Software"), to deal in the Software without | 15 | * of this source file (the "Software"), to deal in the Software without |
16 | * restriction, including without limitation the rights to use, copy, modify, | 16 | * restriction, including without limitation the rights to use, copy, modify, |
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, |
18 | * and to permit persons to whom the Software is furnished to do so, subject to | 18 | * and to permit persons to whom the Software is furnished to do so, subject to |
19 | * the following conditions: | 19 | * the following conditions: |
20 | * | 20 | * |
21 | * The above copyright notice and this permission notice shall be included in | 21 | * The above copyright notice and this permission notice shall be included in |
22 | * all copies or substantial portions of the Software. | 22 | * all copies or substantial portions of the Software. |
23 | * | 23 | * |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
30 | * IN THE SOFTWARE. | 30 | * IN THE SOFTWARE. |
31 | */ | 31 | */ |
32 | 32 | ||
33 | #ifndef _ASM_X86_XEN_HYPERCALL_H | 33 | #ifndef _ASM_X86_XEN_HYPERCALL_H |
34 | #define _ASM_X86_XEN_HYPERCALL_H | 34 | #define _ASM_X86_XEN_HYPERCALL_H |
35 | 35 | ||
36 | #include <linux/kernel.h> | ||
37 | #include <linux/spinlock.h> | ||
36 | #include <linux/errno.h> | 38 | #include <linux/errno.h> |
37 | #include <linux/string.h> | 39 | #include <linux/string.h> |
40 | #include <linux/types.h> | ||
41 | |||
42 | #include <asm/page.h> | ||
43 | #include <asm/pgtable.h> | ||
38 | 44 | ||
39 | #include <xen/interface/xen.h> | 45 | #include <xen/interface/xen.h> |
40 | #include <xen/interface/sched.h> | 46 | #include <xen/interface/sched.h> |
41 | #include <xen/interface/physdev.h> | 47 | #include <xen/interface/physdev.h> |
42 | 48 | ||
43 | /* | 49 | /* |
44 | * The hypercall asms have to meet several constraints: | 50 | * The hypercall asms have to meet several constraints: |
45 | * - Work on 32- and 64-bit. | 51 | * - Work on 32- and 64-bit. |
46 | * The two architectures put their arguments in different sets of | 52 | * The two architectures put their arguments in different sets of |
47 | * registers. | 53 | * registers. |
48 | * | 54 | * |
49 | * - Work around asm syntax quirks | 55 | * - Work around asm syntax quirks |
50 | * It isn't possible to specify one of the rNN registers in a | 56 | * It isn't possible to specify one of the rNN registers in a |
51 | * constraint, so we use explicit register variables to get the | 57 | * constraint, so we use explicit register variables to get the |
52 | * args into the right place. | 58 | * args into the right place. |
53 | * | 59 | * |
54 | * - Mark all registers as potentially clobbered | 60 | * - Mark all registers as potentially clobbered |
55 | * Even unused parameters can be clobbered by the hypervisor, so we | 61 | * Even unused parameters can be clobbered by the hypervisor, so we |
56 | * need to make sure gcc knows it. | 62 | * need to make sure gcc knows it. |
57 | * | 63 | * |
58 | * - Avoid compiler bugs. | 64 | * - Avoid compiler bugs. |
59 | * This is the tricky part. Because x86_32 has such a constrained | 65 | * This is the tricky part. Because x86_32 has such a constrained |
60 | * register set, gcc versions below 4.3 have trouble generating | 66 | * register set, gcc versions below 4.3 have trouble generating |
61 | * code when all the arg registers and memory are trashed by the | 67 | * code when all the arg registers and memory are trashed by the |
62 | * asm. There are syntactically simpler ways of achieving the | 68 | * asm. There are syntactically simpler ways of achieving the |
63 | * semantics below, but they cause the compiler to crash. | 69 | * semantics below, but they cause the compiler to crash. |
64 | * | 70 | * |
65 | * The only combination I found which works is: | 71 | * The only combination I found which works is: |
66 | * - assign the __argX variables first | 72 | * - assign the __argX variables first |
67 | * - list all actually used parameters as "+r" (__argX) | 73 | * - list all actually used parameters as "+r" (__argX) |
68 | * - clobber the rest | 74 | * - clobber the rest |
69 | * | 75 | * |
70 | * The result certainly isn't pretty, and it really shows up cpp's | 76 | * The result certainly isn't pretty, and it really shows up cpp's |
71 | * weakness as as macro language. Sorry. (But let's just give thanks | 77 | * weakness as as macro language. Sorry. (But let's just give thanks |
72 | * there aren't more than 5 arguments...) | 78 | * there aren't more than 5 arguments...) |
73 | */ | 79 | */ |
74 | 80 | ||
75 | extern struct { char _entry[32]; } hypercall_page[]; | 81 | extern struct { char _entry[32]; } hypercall_page[]; |
76 | 82 | ||
77 | #define __HYPERCALL "call hypercall_page+%c[offset]" | 83 | #define __HYPERCALL "call hypercall_page+%c[offset]" |
78 | #define __HYPERCALL_ENTRY(x) \ | 84 | #define __HYPERCALL_ENTRY(x) \ |
79 | [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0])) | 85 | [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0])) |
80 | 86 | ||
81 | #ifdef CONFIG_X86_32 | 87 | #ifdef CONFIG_X86_32 |
82 | #define __HYPERCALL_RETREG "eax" | 88 | #define __HYPERCALL_RETREG "eax" |
83 | #define __HYPERCALL_ARG1REG "ebx" | 89 | #define __HYPERCALL_ARG1REG "ebx" |
84 | #define __HYPERCALL_ARG2REG "ecx" | 90 | #define __HYPERCALL_ARG2REG "ecx" |
85 | #define __HYPERCALL_ARG3REG "edx" | 91 | #define __HYPERCALL_ARG3REG "edx" |
86 | #define __HYPERCALL_ARG4REG "esi" | 92 | #define __HYPERCALL_ARG4REG "esi" |
87 | #define __HYPERCALL_ARG5REG "edi" | 93 | #define __HYPERCALL_ARG5REG "edi" |
88 | #else | 94 | #else |
89 | #define __HYPERCALL_RETREG "rax" | 95 | #define __HYPERCALL_RETREG "rax" |
90 | #define __HYPERCALL_ARG1REG "rdi" | 96 | #define __HYPERCALL_ARG1REG "rdi" |
91 | #define __HYPERCALL_ARG2REG "rsi" | 97 | #define __HYPERCALL_ARG2REG "rsi" |
92 | #define __HYPERCALL_ARG3REG "rdx" | 98 | #define __HYPERCALL_ARG3REG "rdx" |
93 | #define __HYPERCALL_ARG4REG "r10" | 99 | #define __HYPERCALL_ARG4REG "r10" |
94 | #define __HYPERCALL_ARG5REG "r8" | 100 | #define __HYPERCALL_ARG5REG "r8" |
95 | #endif | 101 | #endif |
96 | 102 | ||
97 | #define __HYPERCALL_DECLS \ | 103 | #define __HYPERCALL_DECLS \ |
98 | register unsigned long __res asm(__HYPERCALL_RETREG); \ | 104 | register unsigned long __res asm(__HYPERCALL_RETREG); \ |
99 | register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \ | 105 | register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \ |
100 | register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ | 106 | register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ |
101 | register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ | 107 | register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ |
102 | register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ | 108 | register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ |
103 | register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; | 109 | register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; |
104 | 110 | ||
105 | #define __HYPERCALL_0PARAM "=r" (__res) | 111 | #define __HYPERCALL_0PARAM "=r" (__res) |
106 | #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) | 112 | #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) |
107 | #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) | 113 | #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) |
108 | #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) | 114 | #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) |
109 | #define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, "+r" (__arg4) | 115 | #define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, "+r" (__arg4) |
110 | #define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, "+r" (__arg5) | 116 | #define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, "+r" (__arg5) |
111 | 117 | ||
112 | #define __HYPERCALL_0ARG() | 118 | #define __HYPERCALL_0ARG() |
113 | #define __HYPERCALL_1ARG(a1) \ | 119 | #define __HYPERCALL_1ARG(a1) \ |
114 | __HYPERCALL_0ARG() __arg1 = (unsigned long)(a1); | 120 | __HYPERCALL_0ARG() __arg1 = (unsigned long)(a1); |
115 | #define __HYPERCALL_2ARG(a1,a2) \ | 121 | #define __HYPERCALL_2ARG(a1,a2) \ |
116 | __HYPERCALL_1ARG(a1) __arg2 = (unsigned long)(a2); | 122 | __HYPERCALL_1ARG(a1) __arg2 = (unsigned long)(a2); |
117 | #define __HYPERCALL_3ARG(a1,a2,a3) \ | 123 | #define __HYPERCALL_3ARG(a1,a2,a3) \ |
118 | __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3); | 124 | __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3); |
119 | #define __HYPERCALL_4ARG(a1,a2,a3,a4) \ | 125 | #define __HYPERCALL_4ARG(a1,a2,a3,a4) \ |
120 | __HYPERCALL_3ARG(a1,a2,a3) __arg4 = (unsigned long)(a4); | 126 | __HYPERCALL_3ARG(a1,a2,a3) __arg4 = (unsigned long)(a4); |
121 | #define __HYPERCALL_5ARG(a1,a2,a3,a4,a5) \ | 127 | #define __HYPERCALL_5ARG(a1,a2,a3,a4,a5) \ |
122 | __HYPERCALL_4ARG(a1,a2,a3,a4) __arg5 = (unsigned long)(a5); | 128 | __HYPERCALL_4ARG(a1,a2,a3,a4) __arg5 = (unsigned long)(a5); |
123 | 129 | ||
124 | #define __HYPERCALL_CLOBBER5 "memory" | 130 | #define __HYPERCALL_CLOBBER5 "memory" |
125 | #define __HYPERCALL_CLOBBER4 __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG | 131 | #define __HYPERCALL_CLOBBER4 __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG |
126 | #define __HYPERCALL_CLOBBER3 __HYPERCALL_CLOBBER4, __HYPERCALL_ARG4REG | 132 | #define __HYPERCALL_CLOBBER3 __HYPERCALL_CLOBBER4, __HYPERCALL_ARG4REG |
127 | #define __HYPERCALL_CLOBBER2 __HYPERCALL_CLOBBER3, __HYPERCALL_ARG3REG | 133 | #define __HYPERCALL_CLOBBER2 __HYPERCALL_CLOBBER3, __HYPERCALL_ARG3REG |
128 | #define __HYPERCALL_CLOBBER1 __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG | 134 | #define __HYPERCALL_CLOBBER1 __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG |
129 | #define __HYPERCALL_CLOBBER0 __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG | 135 | #define __HYPERCALL_CLOBBER0 __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG |
130 | 136 | ||
131 | #define _hypercall0(type, name) \ | 137 | #define _hypercall0(type, name) \ |
132 | ({ \ | 138 | ({ \ |
133 | __HYPERCALL_DECLS; \ | 139 | __HYPERCALL_DECLS; \ |
134 | __HYPERCALL_0ARG(); \ | 140 | __HYPERCALL_0ARG(); \ |
135 | asm volatile (__HYPERCALL \ | 141 | asm volatile (__HYPERCALL \ |
136 | : __HYPERCALL_0PARAM \ | 142 | : __HYPERCALL_0PARAM \ |
137 | : __HYPERCALL_ENTRY(name) \ | 143 | : __HYPERCALL_ENTRY(name) \ |
138 | : __HYPERCALL_CLOBBER0); \ | 144 | : __HYPERCALL_CLOBBER0); \ |
139 | (type)__res; \ | 145 | (type)__res; \ |
140 | }) | 146 | }) |
141 | 147 | ||
142 | #define _hypercall1(type, name, a1) \ | 148 | #define _hypercall1(type, name, a1) \ |
143 | ({ \ | 149 | ({ \ |
144 | __HYPERCALL_DECLS; \ | 150 | __HYPERCALL_DECLS; \ |
145 | __HYPERCALL_1ARG(a1); \ | 151 | __HYPERCALL_1ARG(a1); \ |
146 | asm volatile (__HYPERCALL \ | 152 | asm volatile (__HYPERCALL \ |
147 | : __HYPERCALL_1PARAM \ | 153 | : __HYPERCALL_1PARAM \ |
148 | : __HYPERCALL_ENTRY(name) \ | 154 | : __HYPERCALL_ENTRY(name) \ |
149 | : __HYPERCALL_CLOBBER1); \ | 155 | : __HYPERCALL_CLOBBER1); \ |
150 | (type)__res; \ | 156 | (type)__res; \ |
151 | }) | 157 | }) |
152 | 158 | ||
153 | #define _hypercall2(type, name, a1, a2) \ | 159 | #define _hypercall2(type, name, a1, a2) \ |
154 | ({ \ | 160 | ({ \ |
155 | __HYPERCALL_DECLS; \ | 161 | __HYPERCALL_DECLS; \ |
156 | __HYPERCALL_2ARG(a1, a2); \ | 162 | __HYPERCALL_2ARG(a1, a2); \ |
157 | asm volatile (__HYPERCALL \ | 163 | asm volatile (__HYPERCALL \ |
158 | : __HYPERCALL_2PARAM \ | 164 | : __HYPERCALL_2PARAM \ |
159 | : __HYPERCALL_ENTRY(name) \ | 165 | : __HYPERCALL_ENTRY(name) \ |
160 | : __HYPERCALL_CLOBBER2); \ | 166 | : __HYPERCALL_CLOBBER2); \ |
161 | (type)__res; \ | 167 | (type)__res; \ |
162 | }) | 168 | }) |
163 | 169 | ||
164 | #define _hypercall3(type, name, a1, a2, a3) \ | 170 | #define _hypercall3(type, name, a1, a2, a3) \ |
165 | ({ \ | 171 | ({ \ |
166 | __HYPERCALL_DECLS; \ | 172 | __HYPERCALL_DECLS; \ |
167 | __HYPERCALL_3ARG(a1, a2, a3); \ | 173 | __HYPERCALL_3ARG(a1, a2, a3); \ |
168 | asm volatile (__HYPERCALL \ | 174 | asm volatile (__HYPERCALL \ |
169 | : __HYPERCALL_3PARAM \ | 175 | : __HYPERCALL_3PARAM \ |
170 | : __HYPERCALL_ENTRY(name) \ | 176 | : __HYPERCALL_ENTRY(name) \ |
171 | : __HYPERCALL_CLOBBER3); \ | 177 | : __HYPERCALL_CLOBBER3); \ |
172 | (type)__res; \ | 178 | (type)__res; \ |
173 | }) | 179 | }) |
174 | 180 | ||
175 | #define _hypercall4(type, name, a1, a2, a3, a4) \ | 181 | #define _hypercall4(type, name, a1, a2, a3, a4) \ |
176 | ({ \ | 182 | ({ \ |
177 | __HYPERCALL_DECLS; \ | 183 | __HYPERCALL_DECLS; \ |
178 | __HYPERCALL_4ARG(a1, a2, a3, a4); \ | 184 | __HYPERCALL_4ARG(a1, a2, a3, a4); \ |
179 | asm volatile (__HYPERCALL \ | 185 | asm volatile (__HYPERCALL \ |
180 | : __HYPERCALL_4PARAM \ | 186 | : __HYPERCALL_4PARAM \ |
181 | : __HYPERCALL_ENTRY(name) \ | 187 | : __HYPERCALL_ENTRY(name) \ |
182 | : __HYPERCALL_CLOBBER4); \ | 188 | : __HYPERCALL_CLOBBER4); \ |
183 | (type)__res; \ | 189 | (type)__res; \ |
184 | }) | 190 | }) |
185 | 191 | ||
186 | #define _hypercall5(type, name, a1, a2, a3, a4, a5) \ | 192 | #define _hypercall5(type, name, a1, a2, a3, a4, a5) \ |
187 | ({ \ | 193 | ({ \ |
188 | __HYPERCALL_DECLS; \ | 194 | __HYPERCALL_DECLS; \ |
189 | __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \ | 195 | __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \ |
190 | asm volatile (__HYPERCALL \ | 196 | asm volatile (__HYPERCALL \ |
191 | : __HYPERCALL_5PARAM \ | 197 | : __HYPERCALL_5PARAM \ |
192 | : __HYPERCALL_ENTRY(name) \ | 198 | : __HYPERCALL_ENTRY(name) \ |
193 | : __HYPERCALL_CLOBBER5); \ | 199 | : __HYPERCALL_CLOBBER5); \ |
194 | (type)__res; \ | 200 | (type)__res; \ |
195 | }) | 201 | }) |
196 | 202 | ||
197 | static inline int | 203 | static inline int |
198 | HYPERVISOR_set_trap_table(struct trap_info *table) | 204 | HYPERVISOR_set_trap_table(struct trap_info *table) |
199 | { | 205 | { |
200 | return _hypercall1(int, set_trap_table, table); | 206 | return _hypercall1(int, set_trap_table, table); |
201 | } | 207 | } |
202 | 208 | ||
203 | static inline int | 209 | static inline int |
204 | HYPERVISOR_mmu_update(struct mmu_update *req, int count, | 210 | HYPERVISOR_mmu_update(struct mmu_update *req, int count, |
205 | int *success_count, domid_t domid) | 211 | int *success_count, domid_t domid) |
206 | { | 212 | { |
207 | return _hypercall4(int, mmu_update, req, count, success_count, domid); | 213 | return _hypercall4(int, mmu_update, req, count, success_count, domid); |
208 | } | 214 | } |
209 | 215 | ||
210 | static inline int | 216 | static inline int |
211 | HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, | 217 | HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, |
212 | int *success_count, domid_t domid) | 218 | int *success_count, domid_t domid) |
213 | { | 219 | { |
214 | return _hypercall4(int, mmuext_op, op, count, success_count, domid); | 220 | return _hypercall4(int, mmuext_op, op, count, success_count, domid); |
215 | } | 221 | } |
216 | 222 | ||
217 | static inline int | 223 | static inline int |
218 | HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) | 224 | HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) |
219 | { | 225 | { |
220 | return _hypercall2(int, set_gdt, frame_list, entries); | 226 | return _hypercall2(int, set_gdt, frame_list, entries); |
221 | } | 227 | } |
222 | 228 | ||
223 | static inline int | 229 | static inline int |
224 | HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) | 230 | HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) |
225 | { | 231 | { |
226 | return _hypercall2(int, stack_switch, ss, esp); | 232 | return _hypercall2(int, stack_switch, ss, esp); |
227 | } | 233 | } |
228 | 234 | ||
229 | #ifdef CONFIG_X86_32 | 235 | #ifdef CONFIG_X86_32 |
230 | static inline int | 236 | static inline int |
231 | HYPERVISOR_set_callbacks(unsigned long event_selector, | 237 | HYPERVISOR_set_callbacks(unsigned long event_selector, |
232 | unsigned long event_address, | 238 | unsigned long event_address, |
233 | unsigned long failsafe_selector, | 239 | unsigned long failsafe_selector, |
234 | unsigned long failsafe_address) | 240 | unsigned long failsafe_address) |
235 | { | 241 | { |
236 | return _hypercall4(int, set_callbacks, | 242 | return _hypercall4(int, set_callbacks, |
237 | event_selector, event_address, | 243 | event_selector, event_address, |
238 | failsafe_selector, failsafe_address); | 244 | failsafe_selector, failsafe_address); |
239 | } | 245 | } |
240 | #else /* CONFIG_X86_64 */ | 246 | #else /* CONFIG_X86_64 */ |
241 | static inline int | 247 | static inline int |
242 | HYPERVISOR_set_callbacks(unsigned long event_address, | 248 | HYPERVISOR_set_callbacks(unsigned long event_address, |
243 | unsigned long failsafe_address, | 249 | unsigned long failsafe_address, |
244 | unsigned long syscall_address) | 250 | unsigned long syscall_address) |
245 | { | 251 | { |
246 | return _hypercall3(int, set_callbacks, | 252 | return _hypercall3(int, set_callbacks, |
247 | event_address, failsafe_address, | 253 | event_address, failsafe_address, |
248 | syscall_address); | 254 | syscall_address); |
249 | } | 255 | } |
250 | #endif /* CONFIG_X86_{32,64} */ | 256 | #endif /* CONFIG_X86_{32,64} */ |
251 | 257 | ||
252 | static inline int | 258 | static inline int |
253 | HYPERVISOR_callback_op(int cmd, void *arg) | 259 | HYPERVISOR_callback_op(int cmd, void *arg) |
254 | { | 260 | { |
255 | return _hypercall2(int, callback_op, cmd, arg); | 261 | return _hypercall2(int, callback_op, cmd, arg); |
256 | } | 262 | } |
257 | 263 | ||
258 | static inline int | 264 | static inline int |
259 | HYPERVISOR_fpu_taskswitch(int set) | 265 | HYPERVISOR_fpu_taskswitch(int set) |
260 | { | 266 | { |
261 | return _hypercall1(int, fpu_taskswitch, set); | 267 | return _hypercall1(int, fpu_taskswitch, set); |
262 | } | 268 | } |
263 | 269 | ||
264 | static inline int | 270 | static inline int |
265 | HYPERVISOR_sched_op(int cmd, void *arg) | 271 | HYPERVISOR_sched_op(int cmd, void *arg) |
266 | { | 272 | { |
267 | return _hypercall2(int, sched_op_new, cmd, arg); | 273 | return _hypercall2(int, sched_op_new, cmd, arg); |
268 | } | 274 | } |
269 | 275 | ||
270 | static inline long | 276 | static inline long |
271 | HYPERVISOR_set_timer_op(u64 timeout) | 277 | HYPERVISOR_set_timer_op(u64 timeout) |
272 | { | 278 | { |
273 | unsigned long timeout_hi = (unsigned long)(timeout>>32); | 279 | unsigned long timeout_hi = (unsigned long)(timeout>>32); |
274 | unsigned long timeout_lo = (unsigned long)timeout; | 280 | unsigned long timeout_lo = (unsigned long)timeout; |
275 | return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); | 281 | return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); |
276 | } | 282 | } |
277 | 283 | ||
278 | static inline int | 284 | static inline int |
279 | HYPERVISOR_set_debugreg(int reg, unsigned long value) | 285 | HYPERVISOR_set_debugreg(int reg, unsigned long value) |
280 | { | 286 | { |
281 | return _hypercall2(int, set_debugreg, reg, value); | 287 | return _hypercall2(int, set_debugreg, reg, value); |
282 | } | 288 | } |
283 | 289 | ||
284 | static inline unsigned long | 290 | static inline unsigned long |
285 | HYPERVISOR_get_debugreg(int reg) | 291 | HYPERVISOR_get_debugreg(int reg) |
286 | { | 292 | { |
287 | return _hypercall1(unsigned long, get_debugreg, reg); | 293 | return _hypercall1(unsigned long, get_debugreg, reg); |
288 | } | 294 | } |
289 | 295 | ||
290 | static inline int | 296 | static inline int |
291 | HYPERVISOR_update_descriptor(u64 ma, u64 desc) | 297 | HYPERVISOR_update_descriptor(u64 ma, u64 desc) |
292 | { | 298 | { |
293 | return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); | 299 | return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); |
294 | } | 300 | } |
295 | 301 | ||
296 | static inline int | 302 | static inline int |
297 | HYPERVISOR_memory_op(unsigned int cmd, void *arg) | 303 | HYPERVISOR_memory_op(unsigned int cmd, void *arg) |
298 | { | 304 | { |
299 | return _hypercall2(int, memory_op, cmd, arg); | 305 | return _hypercall2(int, memory_op, cmd, arg); |
300 | } | 306 | } |
301 | 307 | ||
302 | static inline int | 308 | static inline int |
303 | HYPERVISOR_multicall(void *call_list, int nr_calls) | 309 | HYPERVISOR_multicall(void *call_list, int nr_calls) |
304 | { | 310 | { |
305 | return _hypercall2(int, multicall, call_list, nr_calls); | 311 | return _hypercall2(int, multicall, call_list, nr_calls); |
306 | } | 312 | } |
307 | 313 | ||
308 | static inline int | 314 | static inline int |
309 | HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, | 315 | HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, |
310 | unsigned long flags) | 316 | unsigned long flags) |
311 | { | 317 | { |
312 | if (sizeof(new_val) == sizeof(long)) | 318 | if (sizeof(new_val) == sizeof(long)) |
313 | return _hypercall3(int, update_va_mapping, va, | 319 | return _hypercall3(int, update_va_mapping, va, |
314 | new_val.pte, flags); | 320 | new_val.pte, flags); |
315 | else | 321 | else |
316 | return _hypercall4(int, update_va_mapping, va, | 322 | return _hypercall4(int, update_va_mapping, va, |
317 | new_val.pte, new_val.pte >> 32, flags); | 323 | new_val.pte, new_val.pte >> 32, flags); |
318 | } | 324 | } |
319 | 325 | ||
320 | static inline int | 326 | static inline int |
321 | HYPERVISOR_event_channel_op(int cmd, void *arg) | 327 | HYPERVISOR_event_channel_op(int cmd, void *arg) |
322 | { | 328 | { |
323 | int rc = _hypercall2(int, event_channel_op, cmd, arg); | 329 | int rc = _hypercall2(int, event_channel_op, cmd, arg); |
324 | if (unlikely(rc == -ENOSYS)) { | 330 | if (unlikely(rc == -ENOSYS)) { |
325 | struct evtchn_op op; | 331 | struct evtchn_op op; |
326 | op.cmd = cmd; | 332 | op.cmd = cmd; |
327 | memcpy(&op.u, arg, sizeof(op.u)); | 333 | memcpy(&op.u, arg, sizeof(op.u)); |
328 | rc = _hypercall1(int, event_channel_op_compat, &op); | 334 | rc = _hypercall1(int, event_channel_op_compat, &op); |
329 | memcpy(arg, &op.u, sizeof(op.u)); | 335 | memcpy(arg, &op.u, sizeof(op.u)); |
330 | } | 336 | } |
331 | return rc; | 337 | return rc; |
332 | } | 338 | } |
333 | 339 | ||
334 | static inline int | 340 | static inline int |
335 | HYPERVISOR_xen_version(int cmd, void *arg) | 341 | HYPERVISOR_xen_version(int cmd, void *arg) |
336 | { | 342 | { |
337 | return _hypercall2(int, xen_version, cmd, arg); | 343 | return _hypercall2(int, xen_version, cmd, arg); |
338 | } | 344 | } |
339 | 345 | ||
340 | static inline int | 346 | static inline int |
341 | HYPERVISOR_console_io(int cmd, int count, char *str) | 347 | HYPERVISOR_console_io(int cmd, int count, char *str) |
342 | { | 348 | { |
343 | return _hypercall3(int, console_io, cmd, count, str); | 349 | return _hypercall3(int, console_io, cmd, count, str); |
344 | } | 350 | } |
345 | 351 | ||
346 | static inline int | 352 | static inline int |
347 | HYPERVISOR_physdev_op(int cmd, void *arg) | 353 | HYPERVISOR_physdev_op(int cmd, void *arg) |
348 | { | 354 | { |
349 | int rc = _hypercall2(int, physdev_op, cmd, arg); | 355 | int rc = _hypercall2(int, physdev_op, cmd, arg); |
350 | if (unlikely(rc == -ENOSYS)) { | 356 | if (unlikely(rc == -ENOSYS)) { |
351 | struct physdev_op op; | 357 | struct physdev_op op; |
352 | op.cmd = cmd; | 358 | op.cmd = cmd; |
353 | memcpy(&op.u, arg, sizeof(op.u)); | 359 | memcpy(&op.u, arg, sizeof(op.u)); |
354 | rc = _hypercall1(int, physdev_op_compat, &op); | 360 | rc = _hypercall1(int, physdev_op_compat, &op); |
355 | memcpy(arg, &op.u, sizeof(op.u)); | 361 | memcpy(arg, &op.u, sizeof(op.u)); |
356 | } | 362 | } |
357 | return rc; | 363 | return rc; |
358 | } | 364 | } |
359 | 365 | ||
360 | static inline int | 366 | static inline int |
361 | HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) | 367 | HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) |
362 | { | 368 | { |
363 | return _hypercall3(int, grant_table_op, cmd, uop, count); | 369 | return _hypercall3(int, grant_table_op, cmd, uop, count); |
364 | } | 370 | } |
365 | 371 | ||
366 | static inline int | 372 | static inline int |
367 | HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val, | 373 | HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val, |
368 | unsigned long flags, domid_t domid) | 374 | unsigned long flags, domid_t domid) |
369 | { | 375 | { |
370 | if (sizeof(new_val) == sizeof(long)) | 376 | if (sizeof(new_val) == sizeof(long)) |
371 | return _hypercall4(int, update_va_mapping_otherdomain, va, | 377 | return _hypercall4(int, update_va_mapping_otherdomain, va, |
372 | new_val.pte, flags, domid); | 378 | new_val.pte, flags, domid); |
373 | else | 379 | else |
374 | return _hypercall5(int, update_va_mapping_otherdomain, va, | 380 | return _hypercall5(int, update_va_mapping_otherdomain, va, |
375 | new_val.pte, new_val.pte >> 32, | 381 | new_val.pte, new_val.pte >> 32, |
376 | flags, domid); | 382 | flags, domid); |
377 | } | 383 | } |
378 | 384 | ||
379 | static inline int | 385 | static inline int |
380 | HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) | 386 | HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) |
381 | { | 387 | { |
382 | return _hypercall2(int, vm_assist, cmd, type); | 388 | return _hypercall2(int, vm_assist, cmd, type); |
383 | } | 389 | } |
384 | 390 | ||
385 | static inline int | 391 | static inline int |
386 | HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) | 392 | HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) |
387 | { | 393 | { |
388 | return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); | 394 | return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); |
389 | } | 395 | } |
390 | 396 | ||
391 | #ifdef CONFIG_X86_64 | 397 | #ifdef CONFIG_X86_64 |
392 | static inline int | 398 | static inline int |
393 | HYPERVISOR_set_segment_base(int reg, unsigned long value) | 399 | HYPERVISOR_set_segment_base(int reg, unsigned long value) |
394 | { | 400 | { |
395 | return _hypercall2(int, set_segment_base, reg, value); | 401 | return _hypercall2(int, set_segment_base, reg, value); |
396 | } | 402 | } |
397 | #endif | 403 | #endif |
398 | 404 | ||
399 | static inline int | 405 | static inline int |
400 | HYPERVISOR_suspend(unsigned long srec) | 406 | HYPERVISOR_suspend(unsigned long srec) |
401 | { | 407 | { |
402 | return _hypercall3(int, sched_op, SCHEDOP_shutdown, | 408 | return _hypercall3(int, sched_op, SCHEDOP_shutdown, |
403 | SHUTDOWN_suspend, srec); | 409 | SHUTDOWN_suspend, srec); |
404 | } | 410 | } |
405 | 411 | ||
406 | static inline int | 412 | static inline int |
407 | HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) | 413 | HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) |
408 | { | 414 | { |
409 | return _hypercall2(int, nmi_op, op, arg); | 415 | return _hypercall2(int, nmi_op, op, arg); |
410 | } | 416 | } |
411 | 417 | ||
412 | static inline void | 418 | static inline void |
413 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) | 419 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) |
414 | { | 420 | { |
415 | mcl->op = __HYPERVISOR_fpu_taskswitch; | 421 | mcl->op = __HYPERVISOR_fpu_taskswitch; |
416 | mcl->args[0] = set; | 422 | mcl->args[0] = set; |
417 | } | 423 | } |
418 | 424 | ||
419 | static inline void | 425 | static inline void |
420 | MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, | 426 | MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, |
421 | pte_t new_val, unsigned long flags) | 427 | pte_t new_val, unsigned long flags) |
422 | { | 428 | { |
423 | mcl->op = __HYPERVISOR_update_va_mapping; | 429 | mcl->op = __HYPERVISOR_update_va_mapping; |
424 | mcl->args[0] = va; | 430 | mcl->args[0] = va; |
425 | if (sizeof(new_val) == sizeof(long)) { | 431 | if (sizeof(new_val) == sizeof(long)) { |
426 | mcl->args[1] = new_val.pte; | 432 | mcl->args[1] = new_val.pte; |
427 | mcl->args[2] = flags; | 433 | mcl->args[2] = flags; |
428 | } else { | 434 | } else { |
429 | mcl->args[1] = new_val.pte; | 435 | mcl->args[1] = new_val.pte; |
430 | mcl->args[2] = new_val.pte >> 32; | 436 | mcl->args[2] = new_val.pte >> 32; |
431 | mcl->args[3] = flags; | 437 | mcl->args[3] = flags; |
432 | } | 438 | } |
433 | } | 439 | } |
434 | 440 | ||
435 | static inline void | 441 | static inline void |
436 | MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd, | 442 | MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd, |
437 | void *uop, unsigned int count) | 443 | void *uop, unsigned int count) |
438 | { | 444 | { |
439 | mcl->op = __HYPERVISOR_grant_table_op; | 445 | mcl->op = __HYPERVISOR_grant_table_op; |
440 | mcl->args[0] = cmd; | 446 | mcl->args[0] = cmd; |
441 | mcl->args[1] = (unsigned long)uop; | 447 | mcl->args[1] = (unsigned long)uop; |
442 | mcl->args[2] = count; | 448 | mcl->args[2] = count; |
443 | } | 449 | } |
444 | 450 | ||
445 | static inline void | 451 | static inline void |
446 | MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va, | 452 | MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va, |
447 | pte_t new_val, unsigned long flags, | 453 | pte_t new_val, unsigned long flags, |
448 | domid_t domid) | 454 | domid_t domid) |
449 | { | 455 | { |
450 | mcl->op = __HYPERVISOR_update_va_mapping_otherdomain; | 456 | mcl->op = __HYPERVISOR_update_va_mapping_otherdomain; |
451 | mcl->args[0] = va; | 457 | mcl->args[0] = va; |
452 | if (sizeof(new_val) == sizeof(long)) { | 458 | if (sizeof(new_val) == sizeof(long)) { |
453 | mcl->args[1] = new_val.pte; | 459 | mcl->args[1] = new_val.pte; |
454 | mcl->args[2] = flags; | 460 | mcl->args[2] = flags; |
455 | mcl->args[3] = domid; | 461 | mcl->args[3] = domid; |
456 | } else { | 462 | } else { |
457 | mcl->args[1] = new_val.pte; | 463 | mcl->args[1] = new_val.pte; |
458 | mcl->args[2] = new_val.pte >> 32; | 464 | mcl->args[2] = new_val.pte >> 32; |
459 | mcl->args[3] = flags; | 465 | mcl->args[3] = flags; |
460 | mcl->args[4] = domid; | 466 | mcl->args[4] = domid; |
461 | } | 467 | } |
462 | } | 468 | } |
463 | 469 | ||
464 | static inline void | 470 | static inline void |
465 | MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, | 471 | MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, |
466 | struct desc_struct desc) | 472 | struct desc_struct desc) |
467 | { | 473 | { |
468 | mcl->op = __HYPERVISOR_update_descriptor; | 474 | mcl->op = __HYPERVISOR_update_descriptor; |
469 | if (sizeof(maddr) == sizeof(long)) { | 475 | if (sizeof(maddr) == sizeof(long)) { |
470 | mcl->args[0] = maddr; | 476 | mcl->args[0] = maddr; |
471 | mcl->args[1] = *(unsigned long *)&desc; | 477 | mcl->args[1] = *(unsigned long *)&desc; |
472 | } else { | 478 | } else { |
473 | mcl->args[0] = maddr; | 479 | mcl->args[0] = maddr; |
474 | mcl->args[1] = maddr >> 32; | 480 | mcl->args[1] = maddr >> 32; |
475 | mcl->args[2] = desc.a; | 481 | mcl->args[2] = desc.a; |
476 | mcl->args[3] = desc.b; | 482 | mcl->args[3] = desc.b; |
477 | } | 483 | } |
478 | } | 484 | } |
479 | 485 | ||
480 | static inline void | 486 | static inline void |
481 | MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg) | 487 | MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg) |
482 | { | 488 | { |
483 | mcl->op = __HYPERVISOR_memory_op; | 489 | mcl->op = __HYPERVISOR_memory_op; |
484 | mcl->args[0] = cmd; | 490 | mcl->args[0] = cmd; |
485 | mcl->args[1] = (unsigned long)arg; | 491 | mcl->args[1] = (unsigned long)arg; |
486 | } | 492 | } |
487 | 493 | ||
488 | static inline void | 494 | static inline void |
489 | MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, | 495 | MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, |
490 | int count, int *success_count, domid_t domid) | 496 | int count, int *success_count, domid_t domid) |
491 | { | 497 | { |
492 | mcl->op = __HYPERVISOR_mmu_update; | 498 | mcl->op = __HYPERVISOR_mmu_update; |
493 | mcl->args[0] = (unsigned long)req; | 499 | mcl->args[0] = (unsigned long)req; |
494 | mcl->args[1] = count; | 500 | mcl->args[1] = count; |
495 | mcl->args[2] = (unsigned long)success_count; | 501 | mcl->args[2] = (unsigned long)success_count; |
496 | mcl->args[3] = domid; | 502 | mcl->args[3] = domid; |
497 | } | 503 | } |
498 | 504 | ||
499 | static inline void | 505 | static inline void |
500 | MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, | 506 | MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, |
501 | int *success_count, domid_t domid) | 507 | int *success_count, domid_t domid) |
502 | { | 508 | { |
503 | mcl->op = __HYPERVISOR_mmuext_op; | 509 | mcl->op = __HYPERVISOR_mmuext_op; |
504 | mcl->args[0] = (unsigned long)op; | 510 | mcl->args[0] = (unsigned long)op; |
505 | mcl->args[1] = count; | 511 | mcl->args[1] = count; |
506 | mcl->args[2] = (unsigned long)success_count; | 512 | mcl->args[2] = (unsigned long)success_count; |
507 | mcl->args[3] = domid; | 513 | mcl->args[3] = domid; |
508 | } | 514 | } |
509 | 515 | ||
510 | static inline void | 516 | static inline void |
511 | MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries) | 517 | MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries) |
512 | { | 518 | { |
513 | mcl->op = __HYPERVISOR_set_gdt; | 519 | mcl->op = __HYPERVISOR_set_gdt; |
514 | mcl->args[0] = (unsigned long)frames; | 520 | mcl->args[0] = (unsigned long)frames; |
515 | mcl->args[1] = entries; | 521 | mcl->args[1] = entries; |
516 | } | 522 | } |
517 | 523 | ||
518 | static inline void | 524 | static inline void |
519 | MULTI_stack_switch(struct multicall_entry *mcl, | 525 | MULTI_stack_switch(struct multicall_entry *mcl, |
520 | unsigned long ss, unsigned long esp) | 526 | unsigned long ss, unsigned long esp) |
521 | { | 527 | { |
522 | mcl->op = __HYPERVISOR_stack_switch; | 528 | mcl->op = __HYPERVISOR_stack_switch; |
523 | mcl->args[0] = ss; | 529 | mcl->args[0] = ss; |
524 | mcl->args[1] = esp; | 530 | mcl->args[1] = esp; |
525 | } | 531 | } |
526 | 532 | ||
527 | #endif /* _ASM_X86_XEN_HYPERCALL_H */ | 533 | #endif /* _ASM_X86_XEN_HYPERCALL_H */ |
528 | 534 |
arch/x86/include/asm/xen/hypervisor.h
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * hypervisor.h | 2 | * hypervisor.h |
3 | * | 3 | * |
4 | * Linux-specific hypervisor handling. | 4 | * Linux-specific hypervisor handling. |
5 | * | 5 | * |
6 | * Copyright (c) 2002-2004, K A Fraser | 6 | * Copyright (c) 2002-2004, K A Fraser |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or | 8 | * This program is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU General Public License version 2 | 9 | * modify it under the terms of the GNU General Public License version 2 |
10 | * as published by the Free Software Foundation; or, when distributed | 10 | * as published by the Free Software Foundation; or, when distributed |
11 | * separately from the Linux kernel or incorporated into other | 11 | * separately from the Linux kernel or incorporated into other |
12 | * software packages, subject to the following license: | 12 | * software packages, subject to the following license: |
13 | * | 13 | * |
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 14 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
15 | * of this source file (the "Software"), to deal in the Software without | 15 | * of this source file (the "Software"), to deal in the Software without |
16 | * restriction, including without limitation the rights to use, copy, modify, | 16 | * restriction, including without limitation the rights to use, copy, modify, |
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, |
18 | * and to permit persons to whom the Software is furnished to do so, subject to | 18 | * and to permit persons to whom the Software is furnished to do so, subject to |
19 | * the following conditions: | 19 | * the following conditions: |
20 | * | 20 | * |
21 | * The above copyright notice and this permission notice shall be included in | 21 | * The above copyright notice and this permission notice shall be included in |
22 | * all copies or substantial portions of the Software. | 22 | * all copies or substantial portions of the Software. |
23 | * | 23 | * |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
30 | * IN THE SOFTWARE. | 30 | * IN THE SOFTWARE. |
31 | */ | 31 | */ |
32 | 32 | ||
33 | #ifndef _ASM_X86_XEN_HYPERVISOR_H | 33 | #ifndef _ASM_X86_XEN_HYPERVISOR_H |
34 | #define _ASM_X86_XEN_HYPERVISOR_H | 34 | #define _ASM_X86_XEN_HYPERVISOR_H |
35 | 35 | ||
36 | #include <linux/types.h> | ||
37 | #include <linux/kernel.h> | ||
38 | |||
39 | #include <xen/interface/xen.h> | ||
40 | #include <xen/interface/version.h> | ||
41 | |||
42 | #include <asm/ptrace.h> | ||
43 | #include <asm/page.h> | ||
44 | #include <asm/desc.h> | ||
45 | #if defined(__i386__) | ||
46 | # ifdef CONFIG_X86_PAE | ||
47 | # include <asm-generic/pgtable-nopud.h> | ||
48 | # else | ||
49 | # include <asm-generic/pgtable-nopmd.h> | ||
50 | # endif | ||
51 | #endif | ||
52 | #include <asm/xen/hypercall.h> | ||
53 | |||
54 | /* arch/i386/kernel/setup.c */ | 36 | /* arch/i386/kernel/setup.c */ |
55 | extern struct shared_info *HYPERVISOR_shared_info; | 37 | extern struct shared_info *HYPERVISOR_shared_info; |
56 | extern struct start_info *xen_start_info; | 38 | extern struct start_info *xen_start_info; |
57 | 39 | ||
58 | /* arch/i386/mach-xen/evtchn.c */ | ||
59 | /* Force a proper event-channel callback from Xen. */ | ||
60 | extern void force_evtchn_callback(void); | ||
61 | |||
62 | /* Turn jiffies into Xen system time. */ | ||
63 | u64 jiffies_to_st(unsigned long jiffies); | ||
64 | |||
65 | |||
66 | #define MULTI_UVMFLAGS_INDEX 3 | ||
67 | #define MULTI_UVMDOMID_INDEX 4 | ||
68 | |||
69 | enum xen_domain_type { | 40 | enum xen_domain_type { |
70 | XEN_NATIVE, | 41 | XEN_NATIVE, |
71 | XEN_PV_DOMAIN, | 42 | XEN_PV_DOMAIN, |
72 | XEN_HVM_DOMAIN, | 43 | XEN_HVM_DOMAIN, |
73 | }; | 44 | }; |
74 | 45 | ||
75 | extern enum xen_domain_type xen_domain_type; | 46 | extern enum xen_domain_type xen_domain_type; |
76 | 47 | ||
48 | #ifdef CONFIG_XEN | ||
77 | #define xen_domain() (xen_domain_type != XEN_NATIVE) | 49 | #define xen_domain() (xen_domain_type != XEN_NATIVE) |
78 | #define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) | 50 | #else |
51 | #define xen_domain() (0) | ||
52 | #endif | ||
53 | |||
54 | #define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN) |
arch/x86/include/asm/xen/page.h
1 | #ifndef _ASM_X86_XEN_PAGE_H | 1 | #ifndef _ASM_X86_XEN_PAGE_H |
2 | #define _ASM_X86_XEN_PAGE_H | 2 | #define _ASM_X86_XEN_PAGE_H |
3 | 3 | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/spinlock.h> | ||
4 | #include <linux/pfn.h> | 7 | #include <linux/pfn.h> |
5 | 8 | ||
6 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <asm/page.h> | ||
7 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
8 | 12 | ||
13 | #include <xen/interface/xen.h> | ||
9 | #include <xen/features.h> | 14 | #include <xen/features.h> |
10 | 15 | ||
11 | /* Xen machine address */ | 16 | /* Xen machine address */ |
12 | typedef struct xmaddr { | 17 | typedef struct xmaddr { |
13 | phys_addr_t maddr; | 18 | phys_addr_t maddr; |
14 | } xmaddr_t; | 19 | } xmaddr_t; |
15 | 20 | ||
16 | /* Xen pseudo-physical address */ | 21 | /* Xen pseudo-physical address */ |
17 | typedef struct xpaddr { | 22 | typedef struct xpaddr { |
18 | phys_addr_t paddr; | 23 | phys_addr_t paddr; |
19 | } xpaddr_t; | 24 | } xpaddr_t; |
20 | 25 | ||
21 | #define XMADDR(x) ((xmaddr_t) { .maddr = (x) }) | 26 | #define XMADDR(x) ((xmaddr_t) { .maddr = (x) }) |
22 | #define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) | 27 | #define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) |
23 | 28 | ||
24 | /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ | 29 | /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ |
25 | #define INVALID_P2M_ENTRY (~0UL) | 30 | #define INVALID_P2M_ENTRY (~0UL) |
26 | #define FOREIGN_FRAME_BIT (1UL<<31) | 31 | #define FOREIGN_FRAME_BIT (1UL<<31) |
27 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) | 32 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) |
28 | 33 | ||
29 | /* Maximum amount of memory we can handle in a domain in pages */ | 34 | /* Maximum amount of memory we can handle in a domain in pages */ |
30 | #define MAX_DOMAIN_PAGES \ | 35 | #define MAX_DOMAIN_PAGES \ |
31 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) | 36 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) |
32 | 37 | ||
33 | 38 | ||
34 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 39 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
35 | extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 40 | extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
36 | 41 | ||
37 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | 42 | static inline unsigned long pfn_to_mfn(unsigned long pfn) |
38 | { | 43 | { |
39 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 44 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
40 | return pfn; | 45 | return pfn; |
41 | 46 | ||
42 | return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; | 47 | return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; |
43 | } | 48 | } |
44 | 49 | ||
45 | static inline int phys_to_machine_mapping_valid(unsigned long pfn) | 50 | static inline int phys_to_machine_mapping_valid(unsigned long pfn) |
46 | { | 51 | { |
47 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 52 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
48 | return 1; | 53 | return 1; |
49 | 54 | ||
50 | return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; | 55 | return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; |
51 | } | 56 | } |
52 | 57 | ||
53 | static inline unsigned long mfn_to_pfn(unsigned long mfn) | 58 | static inline unsigned long mfn_to_pfn(unsigned long mfn) |
54 | { | 59 | { |
55 | unsigned long pfn; | 60 | unsigned long pfn; |
56 | 61 | ||
57 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 62 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
58 | return mfn; | 63 | return mfn; |
59 | 64 | ||
60 | #if 0 | 65 | #if 0 |
61 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | 66 | if (unlikely((mfn >> machine_to_phys_order) != 0)) |
62 | return max_mapnr; | 67 | return max_mapnr; |
63 | #endif | 68 | #endif |
64 | 69 | ||
65 | pfn = 0; | 70 | pfn = 0; |
66 | /* | 71 | /* |
67 | * The array access can fail (e.g., device space beyond end of RAM). | 72 | * The array access can fail (e.g., device space beyond end of RAM). |
68 | * In such cases it doesn't matter what we return (we return garbage), | 73 | * In such cases it doesn't matter what we return (we return garbage), |
69 | * but we must handle the fault without crashing! | 74 | * but we must handle the fault without crashing! |
70 | */ | 75 | */ |
71 | __get_user(pfn, &machine_to_phys_mapping[mfn]); | 76 | __get_user(pfn, &machine_to_phys_mapping[mfn]); |
72 | 77 | ||
73 | return pfn; | 78 | return pfn; |
74 | } | 79 | } |
75 | 80 | ||
76 | static inline xmaddr_t phys_to_machine(xpaddr_t phys) | 81 | static inline xmaddr_t phys_to_machine(xpaddr_t phys) |
77 | { | 82 | { |
78 | unsigned offset = phys.paddr & ~PAGE_MASK; | 83 | unsigned offset = phys.paddr & ~PAGE_MASK; |
79 | return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); | 84 | return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); |
80 | } | 85 | } |
81 | 86 | ||
82 | static inline xpaddr_t machine_to_phys(xmaddr_t machine) | 87 | static inline xpaddr_t machine_to_phys(xmaddr_t machine) |
83 | { | 88 | { |
84 | unsigned offset = machine.maddr & ~PAGE_MASK; | 89 | unsigned offset = machine.maddr & ~PAGE_MASK; |
85 | return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); | 90 | return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); |
86 | } | 91 | } |
87 | 92 | ||
88 | /* | 93 | /* |
89 | * We detect special mappings in one of two ways: | 94 | * We detect special mappings in one of two ways: |
90 | * 1. If the MFN is an I/O page then Xen will set the m2p entry | 95 | * 1. If the MFN is an I/O page then Xen will set the m2p entry |
91 | * to be outside our maximum possible pseudophys range. | 96 | * to be outside our maximum possible pseudophys range. |
92 | * 2. If the MFN belongs to a different domain then we will certainly | 97 | * 2. If the MFN belongs to a different domain then we will certainly |
93 | * not have MFN in our p2m table. Conversely, if the page is ours, | 98 | * not have MFN in our p2m table. Conversely, if the page is ours, |
94 | * then we'll have p2m(m2p(MFN))==MFN. | 99 | * then we'll have p2m(m2p(MFN))==MFN. |
95 | * If we detect a special mapping then it doesn't have a 'struct page'. | 100 | * If we detect a special mapping then it doesn't have a 'struct page'. |
96 | * We force !pfn_valid() by returning an out-of-range pointer. | 101 | * We force !pfn_valid() by returning an out-of-range pointer. |
97 | * | 102 | * |
98 | * NB. These checks require that, for any MFN that is not in our reservation, | 103 | * NB. These checks require that, for any MFN that is not in our reservation, |
99 | * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if | 104 | * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if |
100 | * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. | 105 | * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. |
101 | * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. | 106 | * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. |
102 | * | 107 | * |
103 | * NB2. When deliberately mapping foreign pages into the p2m table, you *must* | 108 | * NB2. When deliberately mapping foreign pages into the p2m table, you *must* |
104 | * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we | 109 | * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we |
105 | * require. In all the cases we care about, the FOREIGN_FRAME bit is | 110 | * require. In all the cases we care about, the FOREIGN_FRAME bit is |
106 | * masked (e.g., pfn_to_mfn()) so behaviour there is correct. | 111 | * masked (e.g., pfn_to_mfn()) so behaviour there is correct. |
107 | */ | 112 | */ |
108 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | 113 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) |
109 | { | 114 | { |
110 | extern unsigned long max_mapnr; | 115 | extern unsigned long max_mapnr; |
111 | unsigned long pfn = mfn_to_pfn(mfn); | 116 | unsigned long pfn = mfn_to_pfn(mfn); |
112 | if ((pfn < max_mapnr) | 117 | if ((pfn < max_mapnr) |
113 | && !xen_feature(XENFEAT_auto_translated_physmap) | 118 | && !xen_feature(XENFEAT_auto_translated_physmap) |
114 | && (get_phys_to_machine(pfn) != mfn)) | 119 | && (get_phys_to_machine(pfn) != mfn)) |
115 | return max_mapnr; /* force !pfn_valid() */ | 120 | return max_mapnr; /* force !pfn_valid() */ |
116 | /* XXX fixme; not true with sparsemem */ | 121 | /* XXX fixme; not true with sparsemem */ |
117 | return pfn; | 122 | return pfn; |
118 | } | 123 | } |
119 | 124 | ||
120 | /* VIRT <-> MACHINE conversion */ | 125 | /* VIRT <-> MACHINE conversion */ |
121 | #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) | 126 | #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) |
122 | #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) | 127 | #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) |
123 | #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) | 128 | #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) |
124 | 129 | ||
125 | static inline unsigned long pte_mfn(pte_t pte) | 130 | static inline unsigned long pte_mfn(pte_t pte) |
126 | { | 131 | { |
127 | return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; | 132 | return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; |
128 | } | 133 | } |
129 | 134 | ||
130 | static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) | 135 | static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) |
131 | { | 136 | { |
132 | pte_t pte; | 137 | pte_t pte; |
133 | 138 | ||
134 | pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | | 139 | pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | |
135 | (pgprot_val(pgprot) & __supported_pte_mask); | 140 | (pgprot_val(pgprot) & __supported_pte_mask); |
136 | 141 | ||
137 | return pte; | 142 | return pte; |
138 | } | 143 | } |
139 | 144 | ||
140 | static inline pteval_t pte_val_ma(pte_t pte) | 145 | static inline pteval_t pte_val_ma(pte_t pte) |
141 | { | 146 | { |
142 | return pte.pte; | 147 | return pte.pte; |
143 | } | 148 | } |
144 | 149 | ||
145 | static inline pte_t __pte_ma(pteval_t x) | 150 | static inline pte_t __pte_ma(pteval_t x) |
146 | { | 151 | { |
147 | return (pte_t) { .pte = x }; | 152 | return (pte_t) { .pte = x }; |
148 | } | 153 | } |
149 | 154 | ||
150 | #define pmd_val_ma(v) ((v).pmd) | 155 | #define pmd_val_ma(v) ((v).pmd) |
151 | #ifdef __PAGETABLE_PUD_FOLDED | 156 | #ifdef __PAGETABLE_PUD_FOLDED |
152 | #define pud_val_ma(v) ((v).pgd.pgd) | 157 | #define pud_val_ma(v) ((v).pgd.pgd) |
153 | #else | 158 | #else |
154 | #define pud_val_ma(v) ((v).pud) | 159 | #define pud_val_ma(v) ((v).pud) |
155 | #endif | 160 | #endif |
156 | #define __pmd_ma(x) ((pmd_t) { (x) } ) | 161 | #define __pmd_ma(x) ((pmd_t) { (x) } ) |
157 | 162 | ||
158 | #define pgd_val_ma(x) ((x).pgd) | 163 | #define pgd_val_ma(x) ((x).pgd) |
159 | 164 | ||
160 | 165 | ||
161 | xmaddr_t arbitrary_virt_to_machine(void *address); | 166 | xmaddr_t arbitrary_virt_to_machine(void *address); |
162 | void make_lowmem_page_readonly(void *vaddr); | 167 | void make_lowmem_page_readonly(void *vaddr); |
163 | void make_lowmem_page_readwrite(void *vaddr); | 168 | void make_lowmem_page_readwrite(void *vaddr); |
164 | 169 | ||
165 | #endif /* _ASM_X86_XEN_PAGE_H */ | 170 | #endif /* _ASM_X86_XEN_PAGE_H */ |
166 | 171 |
arch/x86/xen/enlighten.c
1 | /* | 1 | /* |
2 | * Core of Xen paravirt_ops implementation. | 2 | * Core of Xen paravirt_ops implementation. |
3 | * | 3 | * |
4 | * This file contains the xen_paravirt_ops structure itself, and the | 4 | * This file contains the xen_paravirt_ops structure itself, and the |
5 | * implementations for: | 5 | * implementations for: |
6 | * - privileged instructions | 6 | * - privileged instructions |
7 | * - interrupt flags | 7 | * - interrupt flags |
8 | * - segment operations | 8 | * - segment operations |
9 | * - booting and setup | 9 | * - booting and setup |
10 | * | 10 | * |
11 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | 11 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/smp.h> | 16 | #include <linux/smp.h> |
17 | #include <linux/preempt.h> | 17 | #include <linux/preempt.h> |
18 | #include <linux/hardirq.h> | 18 | #include <linux/hardirq.h> |
19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | #include <linux/start_kernel.h> | 21 | #include <linux/start_kernel.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
24 | #include <linux/module.h> | 24 | #include <linux/module.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/page-flags.h> | 26 | #include <linux/page-flags.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/console.h> | 28 | #include <linux/console.h> |
29 | 29 | ||
30 | #include <xen/interface/xen.h> | 30 | #include <xen/interface/xen.h> |
31 | #include <xen/interface/version.h> | ||
31 | #include <xen/interface/physdev.h> | 32 | #include <xen/interface/physdev.h> |
32 | #include <xen/interface/vcpu.h> | 33 | #include <xen/interface/vcpu.h> |
33 | #include <xen/features.h> | 34 | #include <xen/features.h> |
34 | #include <xen/page.h> | 35 | #include <xen/page.h> |
35 | #include <xen/hvc-console.h> | 36 | #include <xen/hvc-console.h> |
36 | 37 | ||
37 | #include <asm/paravirt.h> | 38 | #include <asm/paravirt.h> |
38 | #include <asm/apic.h> | 39 | #include <asm/apic.h> |
39 | #include <asm/page.h> | 40 | #include <asm/page.h> |
40 | #include <asm/xen/hypercall.h> | 41 | #include <asm/xen/hypercall.h> |
41 | #include <asm/xen/hypervisor.h> | 42 | #include <asm/xen/hypervisor.h> |
42 | #include <asm/fixmap.h> | 43 | #include <asm/fixmap.h> |
43 | #include <asm/processor.h> | 44 | #include <asm/processor.h> |
44 | #include <asm/msr-index.h> | 45 | #include <asm/msr-index.h> |
45 | #include <asm/setup.h> | 46 | #include <asm/setup.h> |
46 | #include <asm/desc.h> | 47 | #include <asm/desc.h> |
47 | #include <asm/pgtable.h> | 48 | #include <asm/pgtable.h> |
48 | #include <asm/tlbflush.h> | 49 | #include <asm/tlbflush.h> |
49 | #include <asm/reboot.h> | 50 | #include <asm/reboot.h> |
50 | 51 | ||
51 | #include "xen-ops.h" | 52 | #include "xen-ops.h" |
52 | #include "mmu.h" | 53 | #include "mmu.h" |
53 | #include "multicalls.h" | 54 | #include "multicalls.h" |
54 | 55 | ||
55 | EXPORT_SYMBOL_GPL(hypercall_page); | 56 | EXPORT_SYMBOL_GPL(hypercall_page); |
56 | 57 | ||
57 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); | 58 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); |
58 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | 59 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); |
59 | 60 | ||
60 | enum xen_domain_type xen_domain_type = XEN_NATIVE; | 61 | enum xen_domain_type xen_domain_type = XEN_NATIVE; |
61 | EXPORT_SYMBOL_GPL(xen_domain_type); | 62 | EXPORT_SYMBOL_GPL(xen_domain_type); |
62 | 63 | ||
63 | /* | 64 | /* |
64 | * Identity map, in addition to plain kernel map. This needs to be | 65 | * Identity map, in addition to plain kernel map. This needs to be |
65 | * large enough to allocate page table pages to allocate the rest. | 66 | * large enough to allocate page table pages to allocate the rest. |
66 | * Each page can map 2MB. | 67 | * Each page can map 2MB. |
67 | */ | 68 | */ |
68 | static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; | 69 | static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; |
69 | 70 | ||
70 | #ifdef CONFIG_X86_64 | 71 | #ifdef CONFIG_X86_64 |
71 | /* l3 pud for userspace vsyscall mapping */ | 72 | /* l3 pud for userspace vsyscall mapping */ |
72 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; | 73 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; |
73 | #endif /* CONFIG_X86_64 */ | 74 | #endif /* CONFIG_X86_64 */ |
74 | 75 | ||
75 | /* | 76 | /* |
76 | * Note about cr3 (pagetable base) values: | 77 | * Note about cr3 (pagetable base) values: |
77 | * | 78 | * |
78 | * xen_cr3 contains the current logical cr3 value; it contains the | 79 | * xen_cr3 contains the current logical cr3 value; it contains the |
79 | * last set cr3. This may not be the current effective cr3, because | 80 | * last set cr3. This may not be the current effective cr3, because |
80 | * its update may be being lazily deferred. However, a vcpu looking | 81 | * its update may be being lazily deferred. However, a vcpu looking |
81 | * at its own cr3 can use this value knowing that it everything will | 82 | * at its own cr3 can use this value knowing that it everything will |
82 | * be self-consistent. | 83 | * be self-consistent. |
83 | * | 84 | * |
84 | * xen_current_cr3 contains the actual vcpu cr3; it is set once the | 85 | * xen_current_cr3 contains the actual vcpu cr3; it is set once the |
85 | * hypercall to set the vcpu cr3 is complete (so it may be a little | 86 | * hypercall to set the vcpu cr3 is complete (so it may be a little |
86 | * out of date, but it will never be set early). If one vcpu is | 87 | * out of date, but it will never be set early). If one vcpu is |
87 | * looking at another vcpu's cr3 value, it should use this variable. | 88 | * looking at another vcpu's cr3 value, it should use this variable. |
88 | */ | 89 | */ |
89 | DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ | 90 | DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ |
90 | DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | 91 | DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ |
91 | 92 | ||
92 | struct start_info *xen_start_info; | 93 | struct start_info *xen_start_info; |
93 | EXPORT_SYMBOL_GPL(xen_start_info); | 94 | EXPORT_SYMBOL_GPL(xen_start_info); |
94 | 95 | ||
95 | struct shared_info xen_dummy_shared_info; | 96 | struct shared_info xen_dummy_shared_info; |
96 | 97 | ||
97 | /* | 98 | /* |
98 | * Point at some empty memory to start with. We map the real shared_info | 99 | * Point at some empty memory to start with. We map the real shared_info |
99 | * page as soon as fixmap is up and running. | 100 | * page as soon as fixmap is up and running. |
100 | */ | 101 | */ |
101 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | 102 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; |
102 | 103 | ||
103 | /* | 104 | /* |
104 | * Flag to determine whether vcpu info placement is available on all | 105 | * Flag to determine whether vcpu info placement is available on all |
105 | * VCPUs. We assume it is to start with, and then set it to zero on | 106 | * VCPUs. We assume it is to start with, and then set it to zero on |
106 | * the first failure. This is because it can succeed on some VCPUs | 107 | * the first failure. This is because it can succeed on some VCPUs |
107 | * and not others, since it can involve hypervisor memory allocation, | 108 | * and not others, since it can involve hypervisor memory allocation, |
108 | * or because the guest failed to guarantee all the appropriate | 109 | * or because the guest failed to guarantee all the appropriate |
109 | * constraints on all VCPUs (ie buffer can't cross a page boundary). | 110 | * constraints on all VCPUs (ie buffer can't cross a page boundary). |
110 | * | 111 | * |
111 | * Note that any particular CPU may be using a placed vcpu structure, | 112 | * Note that any particular CPU may be using a placed vcpu structure, |
112 | * but we can only optimise if the all are. | 113 | * but we can only optimise if the all are. |
113 | * | 114 | * |
114 | * 0: not available, 1: available | 115 | * 0: not available, 1: available |
115 | */ | 116 | */ |
116 | static int have_vcpu_info_placement = | 117 | static int have_vcpu_info_placement = |
117 | #ifdef CONFIG_X86_32 | 118 | #ifdef CONFIG_X86_32 |
118 | 1 | 119 | 1 |
119 | #else | 120 | #else |
120 | 0 | 121 | 0 |
121 | #endif | 122 | #endif |
122 | ; | 123 | ; |
123 | 124 | ||
124 | 125 | ||
125 | static void xen_vcpu_setup(int cpu) | 126 | static void xen_vcpu_setup(int cpu) |
126 | { | 127 | { |
127 | struct vcpu_register_vcpu_info info; | 128 | struct vcpu_register_vcpu_info info; |
128 | int err; | 129 | int err; |
129 | struct vcpu_info *vcpup; | 130 | struct vcpu_info *vcpup; |
130 | 131 | ||
131 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | 132 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
132 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 133 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
133 | 134 | ||
134 | if (!have_vcpu_info_placement) | 135 | if (!have_vcpu_info_placement) |
135 | return; /* already tested, not available */ | 136 | return; /* already tested, not available */ |
136 | 137 | ||
137 | vcpup = &per_cpu(xen_vcpu_info, cpu); | 138 | vcpup = &per_cpu(xen_vcpu_info, cpu); |
138 | 139 | ||
139 | info.mfn = virt_to_mfn(vcpup); | 140 | info.mfn = virt_to_mfn(vcpup); |
140 | info.offset = offset_in_page(vcpup); | 141 | info.offset = offset_in_page(vcpup); |
141 | 142 | ||
142 | printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", | 143 | printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", |
143 | cpu, vcpup, info.mfn, info.offset); | 144 | cpu, vcpup, info.mfn, info.offset); |
144 | 145 | ||
145 | /* Check to see if the hypervisor will put the vcpu_info | 146 | /* Check to see if the hypervisor will put the vcpu_info |
146 | structure where we want it, which allows direct access via | 147 | structure where we want it, which allows direct access via |
147 | a percpu-variable. */ | 148 | a percpu-variable. */ |
148 | err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); | 149 | err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); |
149 | 150 | ||
150 | if (err) { | 151 | if (err) { |
151 | printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); | 152 | printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); |
152 | have_vcpu_info_placement = 0; | 153 | have_vcpu_info_placement = 0; |
153 | } else { | 154 | } else { |
154 | /* This cpu is using the registered vcpu info, even if | 155 | /* This cpu is using the registered vcpu info, even if |
155 | later ones fail to. */ | 156 | later ones fail to. */ |
156 | per_cpu(xen_vcpu, cpu) = vcpup; | 157 | per_cpu(xen_vcpu, cpu) = vcpup; |
157 | 158 | ||
158 | printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n", | 159 | printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n", |
159 | cpu, vcpup); | 160 | cpu, vcpup); |
160 | } | 161 | } |
161 | } | 162 | } |
162 | 163 | ||
163 | /* | 164 | /* |
164 | * On restore, set the vcpu placement up again. | 165 | * On restore, set the vcpu placement up again. |
165 | * If it fails, then we're in a bad state, since | 166 | * If it fails, then we're in a bad state, since |
166 | * we can't back out from using it... | 167 | * we can't back out from using it... |
167 | */ | 168 | */ |
168 | void xen_vcpu_restore(void) | 169 | void xen_vcpu_restore(void) |
169 | { | 170 | { |
170 | if (have_vcpu_info_placement) { | 171 | if (have_vcpu_info_placement) { |
171 | int cpu; | 172 | int cpu; |
172 | 173 | ||
173 | for_each_online_cpu(cpu) { | 174 | for_each_online_cpu(cpu) { |
174 | bool other_cpu = (cpu != smp_processor_id()); | 175 | bool other_cpu = (cpu != smp_processor_id()); |
175 | 176 | ||
176 | if (other_cpu && | 177 | if (other_cpu && |
177 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) | 178 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) |
178 | BUG(); | 179 | BUG(); |
179 | 180 | ||
180 | xen_vcpu_setup(cpu); | 181 | xen_vcpu_setup(cpu); |
181 | 182 | ||
182 | if (other_cpu && | 183 | if (other_cpu && |
183 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) | 184 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) |
184 | BUG(); | 185 | BUG(); |
185 | } | 186 | } |
186 | 187 | ||
187 | BUG_ON(!have_vcpu_info_placement); | 188 | BUG_ON(!have_vcpu_info_placement); |
188 | } | 189 | } |
189 | } | 190 | } |
190 | 191 | ||
191 | static void __init xen_banner(void) | 192 | static void __init xen_banner(void) |
192 | { | 193 | { |
193 | unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); | 194 | unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); |
194 | struct xen_extraversion extra; | 195 | struct xen_extraversion extra; |
195 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); | 196 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); |
196 | 197 | ||
197 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 198 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
198 | pv_info.name); | 199 | pv_info.name); |
199 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", | 200 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", |
200 | version >> 16, version & 0xffff, extra.extraversion, | 201 | version >> 16, version & 0xffff, extra.extraversion, |
201 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | 202 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); |
202 | } | 203 | } |
203 | 204 | ||
204 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, | 205 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, |
205 | unsigned int *cx, unsigned int *dx) | 206 | unsigned int *cx, unsigned int *dx) |
206 | { | 207 | { |
207 | unsigned maskedx = ~0; | 208 | unsigned maskedx = ~0; |
208 | 209 | ||
209 | /* | 210 | /* |
210 | * Mask out inconvenient features, to try and disable as many | 211 | * Mask out inconvenient features, to try and disable as many |
211 | * unsupported kernel subsystems as possible. | 212 | * unsupported kernel subsystems as possible. |
212 | */ | 213 | */ |
213 | if (*ax == 1) | 214 | if (*ax == 1) |
214 | maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ | 215 | maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ |
215 | (1 << X86_FEATURE_ACPI) | /* disable ACPI */ | 216 | (1 << X86_FEATURE_ACPI) | /* disable ACPI */ |
216 | (1 << X86_FEATURE_MCE) | /* disable MCE */ | 217 | (1 << X86_FEATURE_MCE) | /* disable MCE */ |
217 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | 218 | (1 << X86_FEATURE_MCA) | /* disable MCA */ |
218 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 219 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
219 | 220 | ||
220 | asm(XEN_EMULATE_PREFIX "cpuid" | 221 | asm(XEN_EMULATE_PREFIX "cpuid" |
221 | : "=a" (*ax), | 222 | : "=a" (*ax), |
222 | "=b" (*bx), | 223 | "=b" (*bx), |
223 | "=c" (*cx), | 224 | "=c" (*cx), |
224 | "=d" (*dx) | 225 | "=d" (*dx) |
225 | : "0" (*ax), "2" (*cx)); | 226 | : "0" (*ax), "2" (*cx)); |
226 | *dx &= maskedx; | 227 | *dx &= maskedx; |
227 | } | 228 | } |
228 | 229 | ||
229 | static void xen_set_debugreg(int reg, unsigned long val) | 230 | static void xen_set_debugreg(int reg, unsigned long val) |
230 | { | 231 | { |
231 | HYPERVISOR_set_debugreg(reg, val); | 232 | HYPERVISOR_set_debugreg(reg, val); |
232 | } | 233 | } |
233 | 234 | ||
234 | static unsigned long xen_get_debugreg(int reg) | 235 | static unsigned long xen_get_debugreg(int reg) |
235 | { | 236 | { |
236 | return HYPERVISOR_get_debugreg(reg); | 237 | return HYPERVISOR_get_debugreg(reg); |
237 | } | 238 | } |
238 | 239 | ||
239 | static void xen_leave_lazy(void) | 240 | static void xen_leave_lazy(void) |
240 | { | 241 | { |
241 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | 242 | paravirt_leave_lazy(paravirt_get_lazy_mode()); |
242 | xen_mc_flush(); | 243 | xen_mc_flush(); |
243 | } | 244 | } |
244 | 245 | ||
245 | static unsigned long xen_store_tr(void) | 246 | static unsigned long xen_store_tr(void) |
246 | { | 247 | { |
247 | return 0; | 248 | return 0; |
248 | } | 249 | } |
249 | 250 | ||
250 | /* | 251 | /* |
251 | * Set the page permissions for a particular virtual address. If the | 252 | * Set the page permissions for a particular virtual address. If the |
252 | * address is a vmalloc mapping (or other non-linear mapping), then | 253 | * address is a vmalloc mapping (or other non-linear mapping), then |
253 | * find the linear mapping of the page and also set its protections to | 254 | * find the linear mapping of the page and also set its protections to |
254 | * match. | 255 | * match. |
255 | */ | 256 | */ |
256 | static void set_aliased_prot(void *v, pgprot_t prot) | 257 | static void set_aliased_prot(void *v, pgprot_t prot) |
257 | { | 258 | { |
258 | int level; | 259 | int level; |
259 | pte_t *ptep; | 260 | pte_t *ptep; |
260 | pte_t pte; | 261 | pte_t pte; |
261 | unsigned long pfn; | 262 | unsigned long pfn; |
262 | struct page *page; | 263 | struct page *page; |
263 | 264 | ||
264 | ptep = lookup_address((unsigned long)v, &level); | 265 | ptep = lookup_address((unsigned long)v, &level); |
265 | BUG_ON(ptep == NULL); | 266 | BUG_ON(ptep == NULL); |
266 | 267 | ||
267 | pfn = pte_pfn(*ptep); | 268 | pfn = pte_pfn(*ptep); |
268 | page = pfn_to_page(pfn); | 269 | page = pfn_to_page(pfn); |
269 | 270 | ||
270 | pte = pfn_pte(pfn, prot); | 271 | pte = pfn_pte(pfn, prot); |
271 | 272 | ||
272 | if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) | 273 | if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) |
273 | BUG(); | 274 | BUG(); |
274 | 275 | ||
275 | if (!PageHighMem(page)) { | 276 | if (!PageHighMem(page)) { |
276 | void *av = __va(PFN_PHYS(pfn)); | 277 | void *av = __va(PFN_PHYS(pfn)); |
277 | 278 | ||
278 | if (av != v) | 279 | if (av != v) |
279 | if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) | 280 | if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) |
280 | BUG(); | 281 | BUG(); |
281 | } else | 282 | } else |
282 | kmap_flush_unused(); | 283 | kmap_flush_unused(); |
283 | } | 284 | } |
284 | 285 | ||
285 | static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) | 286 | static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) |
286 | { | 287 | { |
287 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; | 288 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; |
288 | int i; | 289 | int i; |
289 | 290 | ||
290 | for(i = 0; i < entries; i += entries_per_page) | 291 | for(i = 0; i < entries; i += entries_per_page) |
291 | set_aliased_prot(ldt + i, PAGE_KERNEL_RO); | 292 | set_aliased_prot(ldt + i, PAGE_KERNEL_RO); |
292 | } | 293 | } |
293 | 294 | ||
294 | static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) | 295 | static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) |
295 | { | 296 | { |
296 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; | 297 | const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; |
297 | int i; | 298 | int i; |
298 | 299 | ||
299 | for(i = 0; i < entries; i += entries_per_page) | 300 | for(i = 0; i < entries; i += entries_per_page) |
300 | set_aliased_prot(ldt + i, PAGE_KERNEL); | 301 | set_aliased_prot(ldt + i, PAGE_KERNEL); |
301 | } | 302 | } |
302 | 303 | ||
303 | static void xen_set_ldt(const void *addr, unsigned entries) | 304 | static void xen_set_ldt(const void *addr, unsigned entries) |
304 | { | 305 | { |
305 | struct mmuext_op *op; | 306 | struct mmuext_op *op; |
306 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); | 307 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); |
307 | 308 | ||
308 | op = mcs.args; | 309 | op = mcs.args; |
309 | op->cmd = MMUEXT_SET_LDT; | 310 | op->cmd = MMUEXT_SET_LDT; |
310 | op->arg1.linear_addr = (unsigned long)addr; | 311 | op->arg1.linear_addr = (unsigned long)addr; |
311 | op->arg2.nr_ents = entries; | 312 | op->arg2.nr_ents = entries; |
312 | 313 | ||
313 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 314 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
314 | 315 | ||
315 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 316 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
316 | } | 317 | } |
317 | 318 | ||
318 | static void xen_load_gdt(const struct desc_ptr *dtr) | 319 | static void xen_load_gdt(const struct desc_ptr *dtr) |
319 | { | 320 | { |
320 | unsigned long *frames; | 321 | unsigned long *frames; |
321 | unsigned long va = dtr->address; | 322 | unsigned long va = dtr->address; |
322 | unsigned int size = dtr->size + 1; | 323 | unsigned int size = dtr->size + 1; |
323 | unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; | 324 | unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; |
324 | int f; | 325 | int f; |
325 | struct multicall_space mcs; | 326 | struct multicall_space mcs; |
326 | 327 | ||
327 | /* A GDT can be up to 64k in size, which corresponds to 8192 | 328 | /* A GDT can be up to 64k in size, which corresponds to 8192 |
328 | 8-byte entries, or 16 4k pages.. */ | 329 | 8-byte entries, or 16 4k pages.. */ |
329 | 330 | ||
330 | BUG_ON(size > 65536); | 331 | BUG_ON(size > 65536); |
331 | BUG_ON(va & ~PAGE_MASK); | 332 | BUG_ON(va & ~PAGE_MASK); |
332 | 333 | ||
333 | mcs = xen_mc_entry(sizeof(*frames) * pages); | 334 | mcs = xen_mc_entry(sizeof(*frames) * pages); |
334 | frames = mcs.args; | 335 | frames = mcs.args; |
335 | 336 | ||
336 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { | 337 | for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { |
337 | frames[f] = virt_to_mfn(va); | 338 | frames[f] = virt_to_mfn(va); |
338 | make_lowmem_page_readonly((void *)va); | 339 | make_lowmem_page_readonly((void *)va); |
339 | } | 340 | } |
340 | 341 | ||
341 | MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); | 342 | MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); |
342 | 343 | ||
343 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 344 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
344 | } | 345 | } |
345 | 346 | ||
346 | static void load_TLS_descriptor(struct thread_struct *t, | 347 | static void load_TLS_descriptor(struct thread_struct *t, |
347 | unsigned int cpu, unsigned int i) | 348 | unsigned int cpu, unsigned int i) |
348 | { | 349 | { |
349 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 350 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); |
350 | xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | 351 | xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); |
351 | struct multicall_space mc = __xen_mc_entry(0); | 352 | struct multicall_space mc = __xen_mc_entry(0); |
352 | 353 | ||
353 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); | 354 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); |
354 | } | 355 | } |
355 | 356 | ||
356 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) | 357 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) |
357 | { | 358 | { |
358 | /* | 359 | /* |
359 | * XXX sleazy hack: If we're being called in a lazy-cpu zone, | 360 | * XXX sleazy hack: If we're being called in a lazy-cpu zone, |
360 | * it means we're in a context switch, and %gs has just been | 361 | * it means we're in a context switch, and %gs has just been |
361 | * saved. This means we can zero it out to prevent faults on | 362 | * saved. This means we can zero it out to prevent faults on |
362 | * exit from the hypervisor if the next process has no %gs. | 363 | * exit from the hypervisor if the next process has no %gs. |
363 | * Either way, it has been saved, and the new value will get | 364 | * Either way, it has been saved, and the new value will get |
364 | * loaded properly. This will go away as soon as Xen has been | 365 | * loaded properly. This will go away as soon as Xen has been |
365 | * modified to not save/restore %gs for normal hypercalls. | 366 | * modified to not save/restore %gs for normal hypercalls. |
366 | * | 367 | * |
367 | * On x86_64, this hack is not used for %gs, because gs points | 368 | * On x86_64, this hack is not used for %gs, because gs points |
368 | * to KERNEL_GS_BASE (and uses it for PDA references), so we | 369 | * to KERNEL_GS_BASE (and uses it for PDA references), so we |
369 | * must not zero %gs on x86_64 | 370 | * must not zero %gs on x86_64 |
370 | * | 371 | * |
371 | * For x86_64, we need to zero %fs, otherwise we may get an | 372 | * For x86_64, we need to zero %fs, otherwise we may get an |
372 | * exception between the new %fs descriptor being loaded and | 373 | * exception between the new %fs descriptor being loaded and |
373 | * %fs being effectively cleared at __switch_to(). | 374 | * %fs being effectively cleared at __switch_to(). |
374 | */ | 375 | */ |
375 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { | 376 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { |
376 | #ifdef CONFIG_X86_32 | 377 | #ifdef CONFIG_X86_32 |
377 | loadsegment(gs, 0); | 378 | loadsegment(gs, 0); |
378 | #else | 379 | #else |
379 | loadsegment(fs, 0); | 380 | loadsegment(fs, 0); |
380 | #endif | 381 | #endif |
381 | } | 382 | } |
382 | 383 | ||
383 | xen_mc_batch(); | 384 | xen_mc_batch(); |
384 | 385 | ||
385 | load_TLS_descriptor(t, cpu, 0); | 386 | load_TLS_descriptor(t, cpu, 0); |
386 | load_TLS_descriptor(t, cpu, 1); | 387 | load_TLS_descriptor(t, cpu, 1); |
387 | load_TLS_descriptor(t, cpu, 2); | 388 | load_TLS_descriptor(t, cpu, 2); |
388 | 389 | ||
389 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 390 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
390 | } | 391 | } |
391 | 392 | ||
392 | #ifdef CONFIG_X86_64 | 393 | #ifdef CONFIG_X86_64 |
393 | static void xen_load_gs_index(unsigned int idx) | 394 | static void xen_load_gs_index(unsigned int idx) |
394 | { | 395 | { |
395 | if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) | 396 | if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) |
396 | BUG(); | 397 | BUG(); |
397 | } | 398 | } |
398 | #endif | 399 | #endif |
399 | 400 | ||
400 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | 401 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, |
401 | const void *ptr) | 402 | const void *ptr) |
402 | { | 403 | { |
403 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); | 404 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); |
404 | u64 entry = *(u64 *)ptr; | 405 | u64 entry = *(u64 *)ptr; |
405 | 406 | ||
406 | preempt_disable(); | 407 | preempt_disable(); |
407 | 408 | ||
408 | xen_mc_flush(); | 409 | xen_mc_flush(); |
409 | if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) | 410 | if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) |
410 | BUG(); | 411 | BUG(); |
411 | 412 | ||
412 | preempt_enable(); | 413 | preempt_enable(); |
413 | } | 414 | } |
414 | 415 | ||
415 | static int cvt_gate_to_trap(int vector, const gate_desc *val, | 416 | static int cvt_gate_to_trap(int vector, const gate_desc *val, |
416 | struct trap_info *info) | 417 | struct trap_info *info) |
417 | { | 418 | { |
418 | if (val->type != 0xf && val->type != 0xe) | 419 | if (val->type != 0xf && val->type != 0xe) |
419 | return 0; | 420 | return 0; |
420 | 421 | ||
421 | info->vector = vector; | 422 | info->vector = vector; |
422 | info->address = gate_offset(*val); | 423 | info->address = gate_offset(*val); |
423 | info->cs = gate_segment(*val); | 424 | info->cs = gate_segment(*val); |
424 | info->flags = val->dpl; | 425 | info->flags = val->dpl; |
425 | /* interrupt gates clear IF */ | 426 | /* interrupt gates clear IF */ |
426 | if (val->type == 0xe) | 427 | if (val->type == 0xe) |
427 | info->flags |= 4; | 428 | info->flags |= 4; |
428 | 429 | ||
429 | return 1; | 430 | return 1; |
430 | } | 431 | } |
431 | 432 | ||
432 | /* Locations of each CPU's IDT */ | 433 | /* Locations of each CPU's IDT */ |
433 | static DEFINE_PER_CPU(struct desc_ptr, idt_desc); | 434 | static DEFINE_PER_CPU(struct desc_ptr, idt_desc); |
434 | 435 | ||
435 | /* Set an IDT entry. If the entry is part of the current IDT, then | 436 | /* Set an IDT entry. If the entry is part of the current IDT, then |
436 | also update Xen. */ | 437 | also update Xen. */ |
437 | static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) | 438 | static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) |
438 | { | 439 | { |
439 | unsigned long p = (unsigned long)&dt[entrynum]; | 440 | unsigned long p = (unsigned long)&dt[entrynum]; |
440 | unsigned long start, end; | 441 | unsigned long start, end; |
441 | 442 | ||
442 | preempt_disable(); | 443 | preempt_disable(); |
443 | 444 | ||
444 | start = __get_cpu_var(idt_desc).address; | 445 | start = __get_cpu_var(idt_desc).address; |
445 | end = start + __get_cpu_var(idt_desc).size + 1; | 446 | end = start + __get_cpu_var(idt_desc).size + 1; |
446 | 447 | ||
447 | xen_mc_flush(); | 448 | xen_mc_flush(); |
448 | 449 | ||
449 | native_write_idt_entry(dt, entrynum, g); | 450 | native_write_idt_entry(dt, entrynum, g); |
450 | 451 | ||
451 | if (p >= start && (p + 8) <= end) { | 452 | if (p >= start && (p + 8) <= end) { |
452 | struct trap_info info[2]; | 453 | struct trap_info info[2]; |
453 | 454 | ||
454 | info[1].address = 0; | 455 | info[1].address = 0; |
455 | 456 | ||
456 | if (cvt_gate_to_trap(entrynum, g, &info[0])) | 457 | if (cvt_gate_to_trap(entrynum, g, &info[0])) |
457 | if (HYPERVISOR_set_trap_table(info)) | 458 | if (HYPERVISOR_set_trap_table(info)) |
458 | BUG(); | 459 | BUG(); |
459 | } | 460 | } |
460 | 461 | ||
461 | preempt_enable(); | 462 | preempt_enable(); |
462 | } | 463 | } |
463 | 464 | ||
464 | static void xen_convert_trap_info(const struct desc_ptr *desc, | 465 | static void xen_convert_trap_info(const struct desc_ptr *desc, |
465 | struct trap_info *traps) | 466 | struct trap_info *traps) |
466 | { | 467 | { |
467 | unsigned in, out, count; | 468 | unsigned in, out, count; |
468 | 469 | ||
469 | count = (desc->size+1) / sizeof(gate_desc); | 470 | count = (desc->size+1) / sizeof(gate_desc); |
470 | BUG_ON(count > 256); | 471 | BUG_ON(count > 256); |
471 | 472 | ||
472 | for (in = out = 0; in < count; in++) { | 473 | for (in = out = 0; in < count; in++) { |
473 | gate_desc *entry = (gate_desc*)(desc->address) + in; | 474 | gate_desc *entry = (gate_desc*)(desc->address) + in; |
474 | 475 | ||
475 | if (cvt_gate_to_trap(in, entry, &traps[out])) | 476 | if (cvt_gate_to_trap(in, entry, &traps[out])) |
476 | out++; | 477 | out++; |
477 | } | 478 | } |
478 | traps[out].address = 0; | 479 | traps[out].address = 0; |
479 | } | 480 | } |
480 | 481 | ||
481 | void xen_copy_trap_info(struct trap_info *traps) | 482 | void xen_copy_trap_info(struct trap_info *traps) |
482 | { | 483 | { |
483 | const struct desc_ptr *desc = &__get_cpu_var(idt_desc); | 484 | const struct desc_ptr *desc = &__get_cpu_var(idt_desc); |
484 | 485 | ||
485 | xen_convert_trap_info(desc, traps); | 486 | xen_convert_trap_info(desc, traps); |
486 | } | 487 | } |
487 | 488 | ||
488 | /* Load a new IDT into Xen. In principle this can be per-CPU, so we | 489 | /* Load a new IDT into Xen. In principle this can be per-CPU, so we |
489 | hold a spinlock to protect the static traps[] array (static because | 490 | hold a spinlock to protect the static traps[] array (static because |
490 | it avoids allocation, and saves stack space). */ | 491 | it avoids allocation, and saves stack space). */ |
491 | static void xen_load_idt(const struct desc_ptr *desc) | 492 | static void xen_load_idt(const struct desc_ptr *desc) |
492 | { | 493 | { |
493 | static DEFINE_SPINLOCK(lock); | 494 | static DEFINE_SPINLOCK(lock); |
494 | static struct trap_info traps[257]; | 495 | static struct trap_info traps[257]; |
495 | 496 | ||
496 | spin_lock(&lock); | 497 | spin_lock(&lock); |
497 | 498 | ||
498 | __get_cpu_var(idt_desc) = *desc; | 499 | __get_cpu_var(idt_desc) = *desc; |
499 | 500 | ||
500 | xen_convert_trap_info(desc, traps); | 501 | xen_convert_trap_info(desc, traps); |
501 | 502 | ||
502 | xen_mc_flush(); | 503 | xen_mc_flush(); |
503 | if (HYPERVISOR_set_trap_table(traps)) | 504 | if (HYPERVISOR_set_trap_table(traps)) |
504 | BUG(); | 505 | BUG(); |
505 | 506 | ||
506 | spin_unlock(&lock); | 507 | spin_unlock(&lock); |
507 | } | 508 | } |
508 | 509 | ||
509 | /* Write a GDT descriptor entry. Ignore LDT descriptors, since | 510 | /* Write a GDT descriptor entry. Ignore LDT descriptors, since |
510 | they're handled differently. */ | 511 | they're handled differently. */ |
511 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | 512 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, |
512 | const void *desc, int type) | 513 | const void *desc, int type) |
513 | { | 514 | { |
514 | preempt_disable(); | 515 | preempt_disable(); |
515 | 516 | ||
516 | switch (type) { | 517 | switch (type) { |
517 | case DESC_LDT: | 518 | case DESC_LDT: |
518 | case DESC_TSS: | 519 | case DESC_TSS: |
519 | /* ignore */ | 520 | /* ignore */ |
520 | break; | 521 | break; |
521 | 522 | ||
522 | default: { | 523 | default: { |
523 | xmaddr_t maddr = virt_to_machine(&dt[entry]); | 524 | xmaddr_t maddr = virt_to_machine(&dt[entry]); |
524 | 525 | ||
525 | xen_mc_flush(); | 526 | xen_mc_flush(); |
526 | if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) | 527 | if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) |
527 | BUG(); | 528 | BUG(); |
528 | } | 529 | } |
529 | 530 | ||
530 | } | 531 | } |
531 | 532 | ||
532 | preempt_enable(); | 533 | preempt_enable(); |
533 | } | 534 | } |
534 | 535 | ||
535 | static void xen_load_sp0(struct tss_struct *tss, | 536 | static void xen_load_sp0(struct tss_struct *tss, |
536 | struct thread_struct *thread) | 537 | struct thread_struct *thread) |
537 | { | 538 | { |
538 | struct multicall_space mcs = xen_mc_entry(0); | 539 | struct multicall_space mcs = xen_mc_entry(0); |
539 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | 540 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); |
540 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 541 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
541 | } | 542 | } |
542 | 543 | ||
543 | static void xen_set_iopl_mask(unsigned mask) | 544 | static void xen_set_iopl_mask(unsigned mask) |
544 | { | 545 | { |
545 | struct physdev_set_iopl set_iopl; | 546 | struct physdev_set_iopl set_iopl; |
546 | 547 | ||
547 | /* Force the change at ring 0. */ | 548 | /* Force the change at ring 0. */ |
548 | set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; | 549 | set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; |
549 | HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | 550 | HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); |
550 | } | 551 | } |
551 | 552 | ||
552 | static void xen_io_delay(void) | 553 | static void xen_io_delay(void) |
553 | { | 554 | { |
554 | } | 555 | } |
555 | 556 | ||
556 | #ifdef CONFIG_X86_LOCAL_APIC | 557 | #ifdef CONFIG_X86_LOCAL_APIC |
557 | static u32 xen_apic_read(u32 reg) | 558 | static u32 xen_apic_read(u32 reg) |
558 | { | 559 | { |
559 | return 0; | 560 | return 0; |
560 | } | 561 | } |
561 | 562 | ||
562 | static void xen_apic_write(u32 reg, u32 val) | 563 | static void xen_apic_write(u32 reg, u32 val) |
563 | { | 564 | { |
564 | /* Warn to see if there's any stray references */ | 565 | /* Warn to see if there's any stray references */ |
565 | WARN_ON(1); | 566 | WARN_ON(1); |
566 | } | 567 | } |
567 | 568 | ||
568 | static u64 xen_apic_icr_read(void) | 569 | static u64 xen_apic_icr_read(void) |
569 | { | 570 | { |
570 | return 0; | 571 | return 0; |
571 | } | 572 | } |
572 | 573 | ||
573 | static void xen_apic_icr_write(u32 low, u32 id) | 574 | static void xen_apic_icr_write(u32 low, u32 id) |
574 | { | 575 | { |
575 | /* Warn to see if there's any stray references */ | 576 | /* Warn to see if there's any stray references */ |
576 | WARN_ON(1); | 577 | WARN_ON(1); |
577 | } | 578 | } |
578 | 579 | ||
579 | static void xen_apic_wait_icr_idle(void) | 580 | static void xen_apic_wait_icr_idle(void) |
580 | { | 581 | { |
581 | return; | 582 | return; |
582 | } | 583 | } |
583 | 584 | ||
584 | static u32 xen_safe_apic_wait_icr_idle(void) | 585 | static u32 xen_safe_apic_wait_icr_idle(void) |
585 | { | 586 | { |
586 | return 0; | 587 | return 0; |
587 | } | 588 | } |
588 | 589 | ||
589 | static struct apic_ops xen_basic_apic_ops = { | 590 | static struct apic_ops xen_basic_apic_ops = { |
590 | .read = xen_apic_read, | 591 | .read = xen_apic_read, |
591 | .write = xen_apic_write, | 592 | .write = xen_apic_write, |
592 | .icr_read = xen_apic_icr_read, | 593 | .icr_read = xen_apic_icr_read, |
593 | .icr_write = xen_apic_icr_write, | 594 | .icr_write = xen_apic_icr_write, |
594 | .wait_icr_idle = xen_apic_wait_icr_idle, | 595 | .wait_icr_idle = xen_apic_wait_icr_idle, |
595 | .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, | 596 | .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, |
596 | }; | 597 | }; |
597 | 598 | ||
598 | #endif | 599 | #endif |
599 | 600 | ||
600 | static void xen_flush_tlb(void) | 601 | static void xen_flush_tlb(void) |
601 | { | 602 | { |
602 | struct mmuext_op *op; | 603 | struct mmuext_op *op; |
603 | struct multicall_space mcs; | 604 | struct multicall_space mcs; |
604 | 605 | ||
605 | preempt_disable(); | 606 | preempt_disable(); |
606 | 607 | ||
607 | mcs = xen_mc_entry(sizeof(*op)); | 608 | mcs = xen_mc_entry(sizeof(*op)); |
608 | 609 | ||
609 | op = mcs.args; | 610 | op = mcs.args; |
610 | op->cmd = MMUEXT_TLB_FLUSH_LOCAL; | 611 | op->cmd = MMUEXT_TLB_FLUSH_LOCAL; |
611 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 612 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
612 | 613 | ||
613 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 614 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
614 | 615 | ||
615 | preempt_enable(); | 616 | preempt_enable(); |
616 | } | 617 | } |
617 | 618 | ||
618 | static void xen_flush_tlb_single(unsigned long addr) | 619 | static void xen_flush_tlb_single(unsigned long addr) |
619 | { | 620 | { |
620 | struct mmuext_op *op; | 621 | struct mmuext_op *op; |
621 | struct multicall_space mcs; | 622 | struct multicall_space mcs; |
622 | 623 | ||
623 | preempt_disable(); | 624 | preempt_disable(); |
624 | 625 | ||
625 | mcs = xen_mc_entry(sizeof(*op)); | 626 | mcs = xen_mc_entry(sizeof(*op)); |
626 | op = mcs.args; | 627 | op = mcs.args; |
627 | op->cmd = MMUEXT_INVLPG_LOCAL; | 628 | op->cmd = MMUEXT_INVLPG_LOCAL; |
628 | op->arg1.linear_addr = addr & PAGE_MASK; | 629 | op->arg1.linear_addr = addr & PAGE_MASK; |
629 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 630 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
630 | 631 | ||
631 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 632 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
632 | 633 | ||
633 | preempt_enable(); | 634 | preempt_enable(); |
634 | } | 635 | } |
635 | 636 | ||
636 | static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, | 637 | static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, |
637 | unsigned long va) | 638 | unsigned long va) |
638 | { | 639 | { |
639 | struct { | 640 | struct { |
640 | struct mmuext_op op; | 641 | struct mmuext_op op; |
641 | cpumask_t mask; | 642 | cpumask_t mask; |
642 | } *args; | 643 | } *args; |
643 | cpumask_t cpumask = *cpus; | 644 | cpumask_t cpumask = *cpus; |
644 | struct multicall_space mcs; | 645 | struct multicall_space mcs; |
645 | 646 | ||
646 | /* | 647 | /* |
647 | * A couple of (to be removed) sanity checks: | 648 | * A couple of (to be removed) sanity checks: |
648 | * | 649 | * |
649 | * - current CPU must not be in mask | 650 | * - current CPU must not be in mask |
650 | * - mask must exist :) | 651 | * - mask must exist :) |
651 | */ | 652 | */ |
652 | BUG_ON(cpus_empty(cpumask)); | 653 | BUG_ON(cpus_empty(cpumask)); |
653 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | 654 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); |
654 | BUG_ON(!mm); | 655 | BUG_ON(!mm); |
655 | 656 | ||
656 | /* If a CPU which we ran on has gone down, OK. */ | 657 | /* If a CPU which we ran on has gone down, OK. */ |
657 | cpus_and(cpumask, cpumask, cpu_online_map); | 658 | cpus_and(cpumask, cpumask, cpu_online_map); |
658 | if (cpus_empty(cpumask)) | 659 | if (cpus_empty(cpumask)) |
659 | return; | 660 | return; |
660 | 661 | ||
661 | mcs = xen_mc_entry(sizeof(*args)); | 662 | mcs = xen_mc_entry(sizeof(*args)); |
662 | args = mcs.args; | 663 | args = mcs.args; |
663 | args->mask = cpumask; | 664 | args->mask = cpumask; |
664 | args->op.arg2.vcpumask = &args->mask; | 665 | args->op.arg2.vcpumask = &args->mask; |
665 | 666 | ||
666 | if (va == TLB_FLUSH_ALL) { | 667 | if (va == TLB_FLUSH_ALL) { |
667 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; | 668 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; |
668 | } else { | 669 | } else { |
669 | args->op.cmd = MMUEXT_INVLPG_MULTI; | 670 | args->op.cmd = MMUEXT_INVLPG_MULTI; |
670 | args->op.arg1.linear_addr = va; | 671 | args->op.arg1.linear_addr = va; |
671 | } | 672 | } |
672 | 673 | ||
673 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); | 674 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); |
674 | 675 | ||
675 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 676 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
676 | } | 677 | } |
677 | 678 | ||
678 | static void xen_clts(void) | 679 | static void xen_clts(void) |
679 | { | 680 | { |
680 | struct multicall_space mcs; | 681 | struct multicall_space mcs; |
681 | 682 | ||
682 | mcs = xen_mc_entry(0); | 683 | mcs = xen_mc_entry(0); |
683 | 684 | ||
684 | MULTI_fpu_taskswitch(mcs.mc, 0); | 685 | MULTI_fpu_taskswitch(mcs.mc, 0); |
685 | 686 | ||
686 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 687 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
687 | } | 688 | } |
688 | 689 | ||
689 | static void xen_write_cr0(unsigned long cr0) | 690 | static void xen_write_cr0(unsigned long cr0) |
690 | { | 691 | { |
691 | struct multicall_space mcs; | 692 | struct multicall_space mcs; |
692 | 693 | ||
693 | /* Only pay attention to cr0.TS; everything else is | 694 | /* Only pay attention to cr0.TS; everything else is |
694 | ignored. */ | 695 | ignored. */ |
695 | mcs = xen_mc_entry(0); | 696 | mcs = xen_mc_entry(0); |
696 | 697 | ||
697 | MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); | 698 | MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); |
698 | 699 | ||
699 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 700 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
700 | } | 701 | } |
701 | 702 | ||
702 | static void xen_write_cr2(unsigned long cr2) | 703 | static void xen_write_cr2(unsigned long cr2) |
703 | { | 704 | { |
704 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; | 705 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; |
705 | } | 706 | } |
706 | 707 | ||
707 | static unsigned long xen_read_cr2(void) | 708 | static unsigned long xen_read_cr2(void) |
708 | { | 709 | { |
709 | return x86_read_percpu(xen_vcpu)->arch.cr2; | 710 | return x86_read_percpu(xen_vcpu)->arch.cr2; |
710 | } | 711 | } |
711 | 712 | ||
712 | static unsigned long xen_read_cr2_direct(void) | 713 | static unsigned long xen_read_cr2_direct(void) |
713 | { | 714 | { |
714 | return x86_read_percpu(xen_vcpu_info.arch.cr2); | 715 | return x86_read_percpu(xen_vcpu_info.arch.cr2); |
715 | } | 716 | } |
716 | 717 | ||
717 | static void xen_write_cr4(unsigned long cr4) | 718 | static void xen_write_cr4(unsigned long cr4) |
718 | { | 719 | { |
719 | cr4 &= ~X86_CR4_PGE; | 720 | cr4 &= ~X86_CR4_PGE; |
720 | cr4 &= ~X86_CR4_PSE; | 721 | cr4 &= ~X86_CR4_PSE; |
721 | 722 | ||
722 | native_write_cr4(cr4); | 723 | native_write_cr4(cr4); |
723 | } | 724 | } |
724 | 725 | ||
725 | static unsigned long xen_read_cr3(void) | 726 | static unsigned long xen_read_cr3(void) |
726 | { | 727 | { |
727 | return x86_read_percpu(xen_cr3); | 728 | return x86_read_percpu(xen_cr3); |
728 | } | 729 | } |
729 | 730 | ||
730 | static void set_current_cr3(void *v) | 731 | static void set_current_cr3(void *v) |
731 | { | 732 | { |
732 | x86_write_percpu(xen_current_cr3, (unsigned long)v); | 733 | x86_write_percpu(xen_current_cr3, (unsigned long)v); |
733 | } | 734 | } |
734 | 735 | ||
735 | static void __xen_write_cr3(bool kernel, unsigned long cr3) | 736 | static void __xen_write_cr3(bool kernel, unsigned long cr3) |
736 | { | 737 | { |
737 | struct mmuext_op *op; | 738 | struct mmuext_op *op; |
738 | struct multicall_space mcs; | 739 | struct multicall_space mcs; |
739 | unsigned long mfn; | 740 | unsigned long mfn; |
740 | 741 | ||
741 | if (cr3) | 742 | if (cr3) |
742 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); | 743 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); |
743 | else | 744 | else |
744 | mfn = 0; | 745 | mfn = 0; |
745 | 746 | ||
746 | WARN_ON(mfn == 0 && kernel); | 747 | WARN_ON(mfn == 0 && kernel); |
747 | 748 | ||
748 | mcs = __xen_mc_entry(sizeof(*op)); | 749 | mcs = __xen_mc_entry(sizeof(*op)); |
749 | 750 | ||
750 | op = mcs.args; | 751 | op = mcs.args; |
751 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; | 752 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; |
752 | op->arg1.mfn = mfn; | 753 | op->arg1.mfn = mfn; |
753 | 754 | ||
754 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 755 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
755 | 756 | ||
756 | if (kernel) { | 757 | if (kernel) { |
757 | x86_write_percpu(xen_cr3, cr3); | 758 | x86_write_percpu(xen_cr3, cr3); |
758 | 759 | ||
759 | /* Update xen_current_cr3 once the batch has actually | 760 | /* Update xen_current_cr3 once the batch has actually |
760 | been submitted. */ | 761 | been submitted. */ |
761 | xen_mc_callback(set_current_cr3, (void *)cr3); | 762 | xen_mc_callback(set_current_cr3, (void *)cr3); |
762 | } | 763 | } |
763 | } | 764 | } |
764 | 765 | ||
765 | static void xen_write_cr3(unsigned long cr3) | 766 | static void xen_write_cr3(unsigned long cr3) |
766 | { | 767 | { |
767 | BUG_ON(preemptible()); | 768 | BUG_ON(preemptible()); |
768 | 769 | ||
769 | xen_mc_batch(); /* disables interrupts */ | 770 | xen_mc_batch(); /* disables interrupts */ |
770 | 771 | ||
771 | /* Update while interrupts are disabled, so its atomic with | 772 | /* Update while interrupts are disabled, so its atomic with |
772 | respect to ipis */ | 773 | respect to ipis */ |
773 | x86_write_percpu(xen_cr3, cr3); | 774 | x86_write_percpu(xen_cr3, cr3); |
774 | 775 | ||
775 | __xen_write_cr3(true, cr3); | 776 | __xen_write_cr3(true, cr3); |
776 | 777 | ||
777 | #ifdef CONFIG_X86_64 | 778 | #ifdef CONFIG_X86_64 |
778 | { | 779 | { |
779 | pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); | 780 | pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); |
780 | if (user_pgd) | 781 | if (user_pgd) |
781 | __xen_write_cr3(false, __pa(user_pgd)); | 782 | __xen_write_cr3(false, __pa(user_pgd)); |
782 | else | 783 | else |
783 | __xen_write_cr3(false, 0); | 784 | __xen_write_cr3(false, 0); |
784 | } | 785 | } |
785 | #endif | 786 | #endif |
786 | 787 | ||
787 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ | 788 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ |
788 | } | 789 | } |
789 | 790 | ||
790 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | 791 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) |
791 | { | 792 | { |
792 | int ret; | 793 | int ret; |
793 | 794 | ||
794 | ret = 0; | 795 | ret = 0; |
795 | 796 | ||
796 | switch (msr) { | 797 | switch (msr) { |
797 | #ifdef CONFIG_X86_64 | 798 | #ifdef CONFIG_X86_64 |
798 | unsigned which; | 799 | unsigned which; |
799 | u64 base; | 800 | u64 base; |
800 | 801 | ||
801 | case MSR_FS_BASE: which = SEGBASE_FS; goto set; | 802 | case MSR_FS_BASE: which = SEGBASE_FS; goto set; |
802 | case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; | 803 | case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; |
803 | case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; | 804 | case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; |
804 | 805 | ||
805 | set: | 806 | set: |
806 | base = ((u64)high << 32) | low; | 807 | base = ((u64)high << 32) | low; |
807 | if (HYPERVISOR_set_segment_base(which, base) != 0) | 808 | if (HYPERVISOR_set_segment_base(which, base) != 0) |
808 | ret = -EFAULT; | 809 | ret = -EFAULT; |
809 | break; | 810 | break; |
810 | #endif | 811 | #endif |
811 | 812 | ||
812 | case MSR_STAR: | 813 | case MSR_STAR: |
813 | case MSR_CSTAR: | 814 | case MSR_CSTAR: |
814 | case MSR_LSTAR: | 815 | case MSR_LSTAR: |
815 | case MSR_SYSCALL_MASK: | 816 | case MSR_SYSCALL_MASK: |
816 | case MSR_IA32_SYSENTER_CS: | 817 | case MSR_IA32_SYSENTER_CS: |
817 | case MSR_IA32_SYSENTER_ESP: | 818 | case MSR_IA32_SYSENTER_ESP: |
818 | case MSR_IA32_SYSENTER_EIP: | 819 | case MSR_IA32_SYSENTER_EIP: |
819 | /* Fast syscall setup is all done in hypercalls, so | 820 | /* Fast syscall setup is all done in hypercalls, so |
820 | these are all ignored. Stub them out here to stop | 821 | these are all ignored. Stub them out here to stop |
821 | Xen console noise. */ | 822 | Xen console noise. */ |
822 | break; | 823 | break; |
823 | 824 | ||
824 | default: | 825 | default: |
825 | ret = native_write_msr_safe(msr, low, high); | 826 | ret = native_write_msr_safe(msr, low, high); |
826 | } | 827 | } |
827 | 828 | ||
828 | return ret; | 829 | return ret; |
829 | } | 830 | } |
830 | 831 | ||
831 | /* Early in boot, while setting up the initial pagetable, assume | 832 | /* Early in boot, while setting up the initial pagetable, assume |
832 | everything is pinned. */ | 833 | everything is pinned. */ |
833 | static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | 834 | static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) |
834 | { | 835 | { |
835 | #ifdef CONFIG_FLATMEM | 836 | #ifdef CONFIG_FLATMEM |
836 | BUG_ON(mem_map); /* should only be used early */ | 837 | BUG_ON(mem_map); /* should only be used early */ |
837 | #endif | 838 | #endif |
838 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 839 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); |
839 | } | 840 | } |
840 | 841 | ||
841 | /* Early release_pte assumes that all pts are pinned, since there's | 842 | /* Early release_pte assumes that all pts are pinned, since there's |
842 | only init_mm and anything attached to that is pinned. */ | 843 | only init_mm and anything attached to that is pinned. */ |
843 | static void xen_release_pte_init(unsigned long pfn) | 844 | static void xen_release_pte_init(unsigned long pfn) |
844 | { | 845 | { |
845 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 846 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
846 | } | 847 | } |
847 | 848 | ||
848 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 849 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
849 | { | 850 | { |
850 | struct mmuext_op op; | 851 | struct mmuext_op op; |
851 | op.cmd = cmd; | 852 | op.cmd = cmd; |
852 | op.arg1.mfn = pfn_to_mfn(pfn); | 853 | op.arg1.mfn = pfn_to_mfn(pfn); |
853 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | 854 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) |
854 | BUG(); | 855 | BUG(); |
855 | } | 856 | } |
856 | 857 | ||
857 | /* This needs to make sure the new pte page is pinned iff its being | 858 | /* This needs to make sure the new pte page is pinned iff its being |
858 | attached to a pinned pagetable. */ | 859 | attached to a pinned pagetable. */ |
859 | static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) | 860 | static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) |
860 | { | 861 | { |
861 | struct page *page = pfn_to_page(pfn); | 862 | struct page *page = pfn_to_page(pfn); |
862 | 863 | ||
863 | if (PagePinned(virt_to_page(mm->pgd))) { | 864 | if (PagePinned(virt_to_page(mm->pgd))) { |
864 | SetPagePinned(page); | 865 | SetPagePinned(page); |
865 | 866 | ||
866 | vm_unmap_aliases(); | 867 | vm_unmap_aliases(); |
867 | if (!PageHighMem(page)) { | 868 | if (!PageHighMem(page)) { |
868 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); | 869 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); |
869 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | 870 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
870 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | 871 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
871 | } else { | 872 | } else { |
872 | /* make sure there are no stray mappings of | 873 | /* make sure there are no stray mappings of |
873 | this page */ | 874 | this page */ |
874 | kmap_flush_unused(); | 875 | kmap_flush_unused(); |
875 | } | 876 | } |
876 | } | 877 | } |
877 | } | 878 | } |
878 | 879 | ||
879 | static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) | 880 | static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) |
880 | { | 881 | { |
881 | xen_alloc_ptpage(mm, pfn, PT_PTE); | 882 | xen_alloc_ptpage(mm, pfn, PT_PTE); |
882 | } | 883 | } |
883 | 884 | ||
884 | static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) | 885 | static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) |
885 | { | 886 | { |
886 | xen_alloc_ptpage(mm, pfn, PT_PMD); | 887 | xen_alloc_ptpage(mm, pfn, PT_PMD); |
887 | } | 888 | } |
888 | 889 | ||
889 | static int xen_pgd_alloc(struct mm_struct *mm) | 890 | static int xen_pgd_alloc(struct mm_struct *mm) |
890 | { | 891 | { |
891 | pgd_t *pgd = mm->pgd; | 892 | pgd_t *pgd = mm->pgd; |
892 | int ret = 0; | 893 | int ret = 0; |
893 | 894 | ||
894 | BUG_ON(PagePinned(virt_to_page(pgd))); | 895 | BUG_ON(PagePinned(virt_to_page(pgd))); |
895 | 896 | ||
896 | #ifdef CONFIG_X86_64 | 897 | #ifdef CONFIG_X86_64 |
897 | { | 898 | { |
898 | struct page *page = virt_to_page(pgd); | 899 | struct page *page = virt_to_page(pgd); |
899 | pgd_t *user_pgd; | 900 | pgd_t *user_pgd; |
900 | 901 | ||
901 | BUG_ON(page->private != 0); | 902 | BUG_ON(page->private != 0); |
902 | 903 | ||
903 | ret = -ENOMEM; | 904 | ret = -ENOMEM; |
904 | 905 | ||
905 | user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | 906 | user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); |
906 | page->private = (unsigned long)user_pgd; | 907 | page->private = (unsigned long)user_pgd; |
907 | 908 | ||
908 | if (user_pgd != NULL) { | 909 | if (user_pgd != NULL) { |
909 | user_pgd[pgd_index(VSYSCALL_START)] = | 910 | user_pgd[pgd_index(VSYSCALL_START)] = |
910 | __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); | 911 | __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); |
911 | ret = 0; | 912 | ret = 0; |
912 | } | 913 | } |
913 | 914 | ||
914 | BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); | 915 | BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); |
915 | } | 916 | } |
916 | #endif | 917 | #endif |
917 | 918 | ||
918 | return ret; | 919 | return ret; |
919 | } | 920 | } |
920 | 921 | ||
921 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | 922 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) |
922 | { | 923 | { |
923 | #ifdef CONFIG_X86_64 | 924 | #ifdef CONFIG_X86_64 |
924 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | 925 | pgd_t *user_pgd = xen_get_user_pgd(pgd); |
925 | 926 | ||
926 | if (user_pgd) | 927 | if (user_pgd) |
927 | free_page((unsigned long)user_pgd); | 928 | free_page((unsigned long)user_pgd); |
928 | #endif | 929 | #endif |
929 | } | 930 | } |
930 | 931 | ||
931 | /* This should never happen until we're OK to use struct page */ | 932 | /* This should never happen until we're OK to use struct page */ |
932 | static void xen_release_ptpage(unsigned long pfn, unsigned level) | 933 | static void xen_release_ptpage(unsigned long pfn, unsigned level) |
933 | { | 934 | { |
934 | struct page *page = pfn_to_page(pfn); | 935 | struct page *page = pfn_to_page(pfn); |
935 | 936 | ||
936 | if (PagePinned(page)) { | 937 | if (PagePinned(page)) { |
937 | if (!PageHighMem(page)) { | 938 | if (!PageHighMem(page)) { |
938 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | 939 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
939 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 940 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
940 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 941 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
941 | } | 942 | } |
942 | ClearPagePinned(page); | 943 | ClearPagePinned(page); |
943 | } | 944 | } |
944 | } | 945 | } |
945 | 946 | ||
946 | static void xen_release_pte(unsigned long pfn) | 947 | static void xen_release_pte(unsigned long pfn) |
947 | { | 948 | { |
948 | xen_release_ptpage(pfn, PT_PTE); | 949 | xen_release_ptpage(pfn, PT_PTE); |
949 | } | 950 | } |
950 | 951 | ||
951 | static void xen_release_pmd(unsigned long pfn) | 952 | static void xen_release_pmd(unsigned long pfn) |
952 | { | 953 | { |
953 | xen_release_ptpage(pfn, PT_PMD); | 954 | xen_release_ptpage(pfn, PT_PMD); |
954 | } | 955 | } |
955 | 956 | ||
956 | #if PAGETABLE_LEVELS == 4 | 957 | #if PAGETABLE_LEVELS == 4 |
957 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) | 958 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) |
958 | { | 959 | { |
959 | xen_alloc_ptpage(mm, pfn, PT_PUD); | 960 | xen_alloc_ptpage(mm, pfn, PT_PUD); |
960 | } | 961 | } |
961 | 962 | ||
962 | static void xen_release_pud(unsigned long pfn) | 963 | static void xen_release_pud(unsigned long pfn) |
963 | { | 964 | { |
964 | xen_release_ptpage(pfn, PT_PUD); | 965 | xen_release_ptpage(pfn, PT_PUD); |
965 | } | 966 | } |
966 | #endif | 967 | #endif |
967 | 968 | ||
968 | #ifdef CONFIG_HIGHPTE | 969 | #ifdef CONFIG_HIGHPTE |
969 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) | 970 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) |
970 | { | 971 | { |
971 | pgprot_t prot = PAGE_KERNEL; | 972 | pgprot_t prot = PAGE_KERNEL; |
972 | 973 | ||
973 | if (PagePinned(page)) | 974 | if (PagePinned(page)) |
974 | prot = PAGE_KERNEL_RO; | 975 | prot = PAGE_KERNEL_RO; |
975 | 976 | ||
976 | if (0 && PageHighMem(page)) | 977 | if (0 && PageHighMem(page)) |
977 | printk("mapping highpte %lx type %d prot %s\n", | 978 | printk("mapping highpte %lx type %d prot %s\n", |
978 | page_to_pfn(page), type, | 979 | page_to_pfn(page), type, |
979 | (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); | 980 | (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); |
980 | 981 | ||
981 | return kmap_atomic_prot(page, type, prot); | 982 | return kmap_atomic_prot(page, type, prot); |
982 | } | 983 | } |
983 | #endif | 984 | #endif |
984 | 985 | ||
985 | #ifdef CONFIG_X86_32 | 986 | #ifdef CONFIG_X86_32 |
986 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | 987 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) |
987 | { | 988 | { |
988 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ | 989 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ |
989 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) | 990 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) |
990 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & | 991 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & |
991 | pte_val_ma(pte)); | 992 | pte_val_ma(pte)); |
992 | 993 | ||
993 | return pte; | 994 | return pte; |
994 | } | 995 | } |
995 | 996 | ||
996 | /* Init-time set_pte while constructing initial pagetables, which | 997 | /* Init-time set_pte while constructing initial pagetables, which |
997 | doesn't allow RO pagetable pages to be remapped RW */ | 998 | doesn't allow RO pagetable pages to be remapped RW */ |
998 | static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | 999 | static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) |
999 | { | 1000 | { |
1000 | pte = mask_rw_pte(ptep, pte); | 1001 | pte = mask_rw_pte(ptep, pte); |
1001 | 1002 | ||
1002 | xen_set_pte(ptep, pte); | 1003 | xen_set_pte(ptep, pte); |
1003 | } | 1004 | } |
1004 | #endif | 1005 | #endif |
1005 | 1006 | ||
1006 | static __init void xen_pagetable_setup_start(pgd_t *base) | 1007 | static __init void xen_pagetable_setup_start(pgd_t *base) |
1007 | { | 1008 | { |
1008 | } | 1009 | } |
1009 | 1010 | ||
1010 | void xen_setup_shared_info(void) | 1011 | void xen_setup_shared_info(void) |
1011 | { | 1012 | { |
1012 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 1013 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
1013 | set_fixmap(FIX_PARAVIRT_BOOTMAP, | 1014 | set_fixmap(FIX_PARAVIRT_BOOTMAP, |
1014 | xen_start_info->shared_info); | 1015 | xen_start_info->shared_info); |
1015 | 1016 | ||
1016 | HYPERVISOR_shared_info = | 1017 | HYPERVISOR_shared_info = |
1017 | (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); | 1018 | (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); |
1018 | } else | 1019 | } else |
1019 | HYPERVISOR_shared_info = | 1020 | HYPERVISOR_shared_info = |
1020 | (struct shared_info *)__va(xen_start_info->shared_info); | 1021 | (struct shared_info *)__va(xen_start_info->shared_info); |
1021 | 1022 | ||
1022 | #ifndef CONFIG_SMP | 1023 | #ifndef CONFIG_SMP |
1023 | /* In UP this is as good a place as any to set up shared info */ | 1024 | /* In UP this is as good a place as any to set up shared info */ |
1024 | xen_setup_vcpu_info_placement(); | 1025 | xen_setup_vcpu_info_placement(); |
1025 | #endif | 1026 | #endif |
1026 | 1027 | ||
1027 | xen_setup_mfn_list_list(); | 1028 | xen_setup_mfn_list_list(); |
1028 | } | 1029 | } |
1029 | 1030 | ||
1030 | static __init void xen_pagetable_setup_done(pgd_t *base) | 1031 | static __init void xen_pagetable_setup_done(pgd_t *base) |
1031 | { | 1032 | { |
1032 | xen_setup_shared_info(); | 1033 | xen_setup_shared_info(); |
1033 | } | 1034 | } |
1034 | 1035 | ||
1035 | static __init void xen_post_allocator_init(void) | 1036 | static __init void xen_post_allocator_init(void) |
1036 | { | 1037 | { |
1037 | pv_mmu_ops.set_pte = xen_set_pte; | 1038 | pv_mmu_ops.set_pte = xen_set_pte; |
1038 | pv_mmu_ops.set_pmd = xen_set_pmd; | 1039 | pv_mmu_ops.set_pmd = xen_set_pmd; |
1039 | pv_mmu_ops.set_pud = xen_set_pud; | 1040 | pv_mmu_ops.set_pud = xen_set_pud; |
1040 | #if PAGETABLE_LEVELS == 4 | 1041 | #if PAGETABLE_LEVELS == 4 |
1041 | pv_mmu_ops.set_pgd = xen_set_pgd; | 1042 | pv_mmu_ops.set_pgd = xen_set_pgd; |
1042 | #endif | 1043 | #endif |
1043 | 1044 | ||
1044 | /* This will work as long as patching hasn't happened yet | 1045 | /* This will work as long as patching hasn't happened yet |
1045 | (which it hasn't) */ | 1046 | (which it hasn't) */ |
1046 | pv_mmu_ops.alloc_pte = xen_alloc_pte; | 1047 | pv_mmu_ops.alloc_pte = xen_alloc_pte; |
1047 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; | 1048 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; |
1048 | pv_mmu_ops.release_pte = xen_release_pte; | 1049 | pv_mmu_ops.release_pte = xen_release_pte; |
1049 | pv_mmu_ops.release_pmd = xen_release_pmd; | 1050 | pv_mmu_ops.release_pmd = xen_release_pmd; |
1050 | #if PAGETABLE_LEVELS == 4 | 1051 | #if PAGETABLE_LEVELS == 4 |
1051 | pv_mmu_ops.alloc_pud = xen_alloc_pud; | 1052 | pv_mmu_ops.alloc_pud = xen_alloc_pud; |
1052 | pv_mmu_ops.release_pud = xen_release_pud; | 1053 | pv_mmu_ops.release_pud = xen_release_pud; |
1053 | #endif | 1054 | #endif |
1054 | 1055 | ||
1055 | #ifdef CONFIG_X86_64 | 1056 | #ifdef CONFIG_X86_64 |
1056 | SetPagePinned(virt_to_page(level3_user_vsyscall)); | 1057 | SetPagePinned(virt_to_page(level3_user_vsyscall)); |
1057 | #endif | 1058 | #endif |
1058 | xen_mark_init_mm_pinned(); | 1059 | xen_mark_init_mm_pinned(); |
1059 | } | 1060 | } |
1060 | 1061 | ||
1061 | /* This is called once we have the cpu_possible_map */ | 1062 | /* This is called once we have the cpu_possible_map */ |
1062 | void xen_setup_vcpu_info_placement(void) | 1063 | void xen_setup_vcpu_info_placement(void) |
1063 | { | 1064 | { |
1064 | int cpu; | 1065 | int cpu; |
1065 | 1066 | ||
1066 | for_each_possible_cpu(cpu) | 1067 | for_each_possible_cpu(cpu) |
1067 | xen_vcpu_setup(cpu); | 1068 | xen_vcpu_setup(cpu); |
1068 | 1069 | ||
1069 | /* xen_vcpu_setup managed to place the vcpu_info within the | 1070 | /* xen_vcpu_setup managed to place the vcpu_info within the |
1070 | percpu area for all cpus, so make use of it */ | 1071 | percpu area for all cpus, so make use of it */ |
1071 | if (have_vcpu_info_placement) { | 1072 | if (have_vcpu_info_placement) { |
1072 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); | 1073 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); |
1073 | 1074 | ||
1074 | pv_irq_ops.save_fl = xen_save_fl_direct; | 1075 | pv_irq_ops.save_fl = xen_save_fl_direct; |
1075 | pv_irq_ops.restore_fl = xen_restore_fl_direct; | 1076 | pv_irq_ops.restore_fl = xen_restore_fl_direct; |
1076 | pv_irq_ops.irq_disable = xen_irq_disable_direct; | 1077 | pv_irq_ops.irq_disable = xen_irq_disable_direct; |
1077 | pv_irq_ops.irq_enable = xen_irq_enable_direct; | 1078 | pv_irq_ops.irq_enable = xen_irq_enable_direct; |
1078 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; | 1079 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; |
1079 | } | 1080 | } |
1080 | } | 1081 | } |
1081 | 1082 | ||
1082 | static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | 1083 | static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, |
1083 | unsigned long addr, unsigned len) | 1084 | unsigned long addr, unsigned len) |
1084 | { | 1085 | { |
1085 | char *start, *end, *reloc; | 1086 | char *start, *end, *reloc; |
1086 | unsigned ret; | 1087 | unsigned ret; |
1087 | 1088 | ||
1088 | start = end = reloc = NULL; | 1089 | start = end = reloc = NULL; |
1089 | 1090 | ||
1090 | #define SITE(op, x) \ | 1091 | #define SITE(op, x) \ |
1091 | case PARAVIRT_PATCH(op.x): \ | 1092 | case PARAVIRT_PATCH(op.x): \ |
1092 | if (have_vcpu_info_placement) { \ | 1093 | if (have_vcpu_info_placement) { \ |
1093 | start = (char *)xen_##x##_direct; \ | 1094 | start = (char *)xen_##x##_direct; \ |
1094 | end = xen_##x##_direct_end; \ | 1095 | end = xen_##x##_direct_end; \ |
1095 | reloc = xen_##x##_direct_reloc; \ | 1096 | reloc = xen_##x##_direct_reloc; \ |
1096 | } \ | 1097 | } \ |
1097 | goto patch_site | 1098 | goto patch_site |
1098 | 1099 | ||
1099 | switch (type) { | 1100 | switch (type) { |
1100 | SITE(pv_irq_ops, irq_enable); | 1101 | SITE(pv_irq_ops, irq_enable); |
1101 | SITE(pv_irq_ops, irq_disable); | 1102 | SITE(pv_irq_ops, irq_disable); |
1102 | SITE(pv_irq_ops, save_fl); | 1103 | SITE(pv_irq_ops, save_fl); |
1103 | SITE(pv_irq_ops, restore_fl); | 1104 | SITE(pv_irq_ops, restore_fl); |
1104 | #undef SITE | 1105 | #undef SITE |
1105 | 1106 | ||
1106 | patch_site: | 1107 | patch_site: |
1107 | if (start == NULL || (end-start) > len) | 1108 | if (start == NULL || (end-start) > len) |
1108 | goto default_patch; | 1109 | goto default_patch; |
1109 | 1110 | ||
1110 | ret = paravirt_patch_insns(insnbuf, len, start, end); | 1111 | ret = paravirt_patch_insns(insnbuf, len, start, end); |
1111 | 1112 | ||
1112 | /* Note: because reloc is assigned from something that | 1113 | /* Note: because reloc is assigned from something that |
1113 | appears to be an array, gcc assumes it's non-null, | 1114 | appears to be an array, gcc assumes it's non-null, |
1114 | but doesn't know its relationship with start and | 1115 | but doesn't know its relationship with start and |
1115 | end. */ | 1116 | end. */ |
1116 | if (reloc > start && reloc < end) { | 1117 | if (reloc > start && reloc < end) { |
1117 | int reloc_off = reloc - start; | 1118 | int reloc_off = reloc - start; |
1118 | long *relocp = (long *)(insnbuf + reloc_off); | 1119 | long *relocp = (long *)(insnbuf + reloc_off); |
1119 | long delta = start - (char *)addr; | 1120 | long delta = start - (char *)addr; |
1120 | 1121 | ||
1121 | *relocp += delta; | 1122 | *relocp += delta; |
1122 | } | 1123 | } |
1123 | break; | 1124 | break; |
1124 | 1125 | ||
1125 | default_patch: | 1126 | default_patch: |
1126 | default: | 1127 | default: |
1127 | ret = paravirt_patch_default(type, clobbers, insnbuf, | 1128 | ret = paravirt_patch_default(type, clobbers, insnbuf, |
1128 | addr, len); | 1129 | addr, len); |
1129 | break; | 1130 | break; |
1130 | } | 1131 | } |
1131 | 1132 | ||
1132 | return ret; | 1133 | return ret; |
1133 | } | 1134 | } |
1134 | 1135 | ||
1135 | static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) | 1136 | static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) |
1136 | { | 1137 | { |
1137 | pte_t pte; | 1138 | pte_t pte; |
1138 | 1139 | ||
1139 | phys >>= PAGE_SHIFT; | 1140 | phys >>= PAGE_SHIFT; |
1140 | 1141 | ||
1141 | switch (idx) { | 1142 | switch (idx) { |
1142 | case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: | 1143 | case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: |
1143 | #ifdef CONFIG_X86_F00F_BUG | 1144 | #ifdef CONFIG_X86_F00F_BUG |
1144 | case FIX_F00F_IDT: | 1145 | case FIX_F00F_IDT: |
1145 | #endif | 1146 | #endif |
1146 | #ifdef CONFIG_X86_32 | 1147 | #ifdef CONFIG_X86_32 |
1147 | case FIX_WP_TEST: | 1148 | case FIX_WP_TEST: |
1148 | case FIX_VDSO: | 1149 | case FIX_VDSO: |
1149 | # ifdef CONFIG_HIGHMEM | 1150 | # ifdef CONFIG_HIGHMEM |
1150 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: | 1151 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: |
1151 | # endif | 1152 | # endif |
1152 | #else | 1153 | #else |
1153 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | 1154 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: |
1154 | #endif | 1155 | #endif |
1155 | #ifdef CONFIG_X86_LOCAL_APIC | 1156 | #ifdef CONFIG_X86_LOCAL_APIC |
1156 | case FIX_APIC_BASE: /* maps dummy local APIC */ | 1157 | case FIX_APIC_BASE: /* maps dummy local APIC */ |
1157 | #endif | 1158 | #endif |
1158 | pte = pfn_pte(phys, prot); | 1159 | pte = pfn_pte(phys, prot); |
1159 | break; | 1160 | break; |
1160 | 1161 | ||
1161 | default: | 1162 | default: |
1162 | pte = mfn_pte(phys, prot); | 1163 | pte = mfn_pte(phys, prot); |
1163 | break; | 1164 | break; |
1164 | } | 1165 | } |
1165 | 1166 | ||
1166 | __native_set_fixmap(idx, pte); | 1167 | __native_set_fixmap(idx, pte); |
1167 | 1168 | ||
1168 | #ifdef CONFIG_X86_64 | 1169 | #ifdef CONFIG_X86_64 |
1169 | /* Replicate changes to map the vsyscall page into the user | 1170 | /* Replicate changes to map the vsyscall page into the user |
1170 | pagetable vsyscall mapping. */ | 1171 | pagetable vsyscall mapping. */ |
1171 | if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { | 1172 | if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { |
1172 | unsigned long vaddr = __fix_to_virt(idx); | 1173 | unsigned long vaddr = __fix_to_virt(idx); |
1173 | set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); | 1174 | set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); |
1174 | } | 1175 | } |
1175 | #endif | 1176 | #endif |
1176 | } | 1177 | } |
1177 | 1178 | ||
1178 | static const struct pv_info xen_info __initdata = { | 1179 | static const struct pv_info xen_info __initdata = { |
1179 | .paravirt_enabled = 1, | 1180 | .paravirt_enabled = 1, |
1180 | .shared_kernel_pmd = 0, | 1181 | .shared_kernel_pmd = 0, |
1181 | 1182 | ||
1182 | .name = "Xen", | 1183 | .name = "Xen", |
1183 | }; | 1184 | }; |
1184 | 1185 | ||
1185 | static const struct pv_init_ops xen_init_ops __initdata = { | 1186 | static const struct pv_init_ops xen_init_ops __initdata = { |
1186 | .patch = xen_patch, | 1187 | .patch = xen_patch, |
1187 | 1188 | ||
1188 | .banner = xen_banner, | 1189 | .banner = xen_banner, |
1189 | .memory_setup = xen_memory_setup, | 1190 | .memory_setup = xen_memory_setup, |
1190 | .arch_setup = xen_arch_setup, | 1191 | .arch_setup = xen_arch_setup, |
1191 | .post_allocator_init = xen_post_allocator_init, | 1192 | .post_allocator_init = xen_post_allocator_init, |
1192 | }; | 1193 | }; |
1193 | 1194 | ||
1194 | static const struct pv_time_ops xen_time_ops __initdata = { | 1195 | static const struct pv_time_ops xen_time_ops __initdata = { |
1195 | .time_init = xen_time_init, | 1196 | .time_init = xen_time_init, |
1196 | 1197 | ||
1197 | .set_wallclock = xen_set_wallclock, | 1198 | .set_wallclock = xen_set_wallclock, |
1198 | .get_wallclock = xen_get_wallclock, | 1199 | .get_wallclock = xen_get_wallclock, |
1199 | .get_tsc_khz = xen_tsc_khz, | 1200 | .get_tsc_khz = xen_tsc_khz, |
1200 | .sched_clock = xen_sched_clock, | 1201 | .sched_clock = xen_sched_clock, |
1201 | }; | 1202 | }; |
1202 | 1203 | ||
1203 | static const struct pv_cpu_ops xen_cpu_ops __initdata = { | 1204 | static const struct pv_cpu_ops xen_cpu_ops __initdata = { |
1204 | .cpuid = xen_cpuid, | 1205 | .cpuid = xen_cpuid, |
1205 | 1206 | ||
1206 | .set_debugreg = xen_set_debugreg, | 1207 | .set_debugreg = xen_set_debugreg, |
1207 | .get_debugreg = xen_get_debugreg, | 1208 | .get_debugreg = xen_get_debugreg, |
1208 | 1209 | ||
1209 | .clts = xen_clts, | 1210 | .clts = xen_clts, |
1210 | 1211 | ||
1211 | .read_cr0 = native_read_cr0, | 1212 | .read_cr0 = native_read_cr0, |
1212 | .write_cr0 = xen_write_cr0, | 1213 | .write_cr0 = xen_write_cr0, |
1213 | 1214 | ||
1214 | .read_cr4 = native_read_cr4, | 1215 | .read_cr4 = native_read_cr4, |
1215 | .read_cr4_safe = native_read_cr4_safe, | 1216 | .read_cr4_safe = native_read_cr4_safe, |
1216 | .write_cr4 = xen_write_cr4, | 1217 | .write_cr4 = xen_write_cr4, |
1217 | 1218 | ||
1218 | .wbinvd = native_wbinvd, | 1219 | .wbinvd = native_wbinvd, |
1219 | 1220 | ||
1220 | .read_msr = native_read_msr_safe, | 1221 | .read_msr = native_read_msr_safe, |
1221 | .write_msr = xen_write_msr_safe, | 1222 | .write_msr = xen_write_msr_safe, |
1222 | .read_tsc = native_read_tsc, | 1223 | .read_tsc = native_read_tsc, |
1223 | .read_pmc = native_read_pmc, | 1224 | .read_pmc = native_read_pmc, |
1224 | 1225 | ||
1225 | .iret = xen_iret, | 1226 | .iret = xen_iret, |
1226 | .irq_enable_sysexit = xen_sysexit, | 1227 | .irq_enable_sysexit = xen_sysexit, |
1227 | #ifdef CONFIG_X86_64 | 1228 | #ifdef CONFIG_X86_64 |
1228 | .usergs_sysret32 = xen_sysret32, | 1229 | .usergs_sysret32 = xen_sysret32, |
1229 | .usergs_sysret64 = xen_sysret64, | 1230 | .usergs_sysret64 = xen_sysret64, |
1230 | #endif | 1231 | #endif |
1231 | 1232 | ||
1232 | .load_tr_desc = paravirt_nop, | 1233 | .load_tr_desc = paravirt_nop, |
1233 | .set_ldt = xen_set_ldt, | 1234 | .set_ldt = xen_set_ldt, |
1234 | .load_gdt = xen_load_gdt, | 1235 | .load_gdt = xen_load_gdt, |
1235 | .load_idt = xen_load_idt, | 1236 | .load_idt = xen_load_idt, |
1236 | .load_tls = xen_load_tls, | 1237 | .load_tls = xen_load_tls, |
1237 | #ifdef CONFIG_X86_64 | 1238 | #ifdef CONFIG_X86_64 |
1238 | .load_gs_index = xen_load_gs_index, | 1239 | .load_gs_index = xen_load_gs_index, |
1239 | #endif | 1240 | #endif |
1240 | 1241 | ||
1241 | .alloc_ldt = xen_alloc_ldt, | 1242 | .alloc_ldt = xen_alloc_ldt, |
1242 | .free_ldt = xen_free_ldt, | 1243 | .free_ldt = xen_free_ldt, |
1243 | 1244 | ||
1244 | .store_gdt = native_store_gdt, | 1245 | .store_gdt = native_store_gdt, |
1245 | .store_idt = native_store_idt, | 1246 | .store_idt = native_store_idt, |
1246 | .store_tr = xen_store_tr, | 1247 | .store_tr = xen_store_tr, |
1247 | 1248 | ||
1248 | .write_ldt_entry = xen_write_ldt_entry, | 1249 | .write_ldt_entry = xen_write_ldt_entry, |
1249 | .write_gdt_entry = xen_write_gdt_entry, | 1250 | .write_gdt_entry = xen_write_gdt_entry, |
1250 | .write_idt_entry = xen_write_idt_entry, | 1251 | .write_idt_entry = xen_write_idt_entry, |
1251 | .load_sp0 = xen_load_sp0, | 1252 | .load_sp0 = xen_load_sp0, |
1252 | 1253 | ||
1253 | .set_iopl_mask = xen_set_iopl_mask, | 1254 | .set_iopl_mask = xen_set_iopl_mask, |
1254 | .io_delay = xen_io_delay, | 1255 | .io_delay = xen_io_delay, |
1255 | 1256 | ||
1256 | /* Xen takes care of %gs when switching to usermode for us */ | 1257 | /* Xen takes care of %gs when switching to usermode for us */ |
1257 | .swapgs = paravirt_nop, | 1258 | .swapgs = paravirt_nop, |
1258 | 1259 | ||
1259 | .lazy_mode = { | 1260 | .lazy_mode = { |
1260 | .enter = paravirt_enter_lazy_cpu, | 1261 | .enter = paravirt_enter_lazy_cpu, |
1261 | .leave = xen_leave_lazy, | 1262 | .leave = xen_leave_lazy, |
1262 | }, | 1263 | }, |
1263 | }; | 1264 | }; |
1264 | 1265 | ||
1265 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 1266 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
1266 | #ifdef CONFIG_X86_LOCAL_APIC | 1267 | #ifdef CONFIG_X86_LOCAL_APIC |
1267 | .setup_boot_clock = paravirt_nop, | 1268 | .setup_boot_clock = paravirt_nop, |
1268 | .setup_secondary_clock = paravirt_nop, | 1269 | .setup_secondary_clock = paravirt_nop, |
1269 | .startup_ipi_hook = paravirt_nop, | 1270 | .startup_ipi_hook = paravirt_nop, |
1270 | #endif | 1271 | #endif |
1271 | }; | 1272 | }; |
1272 | 1273 | ||
1273 | static const struct pv_mmu_ops xen_mmu_ops __initdata = { | 1274 | static const struct pv_mmu_ops xen_mmu_ops __initdata = { |
1274 | .pagetable_setup_start = xen_pagetable_setup_start, | 1275 | .pagetable_setup_start = xen_pagetable_setup_start, |
1275 | .pagetable_setup_done = xen_pagetable_setup_done, | 1276 | .pagetable_setup_done = xen_pagetable_setup_done, |
1276 | 1277 | ||
1277 | .read_cr2 = xen_read_cr2, | 1278 | .read_cr2 = xen_read_cr2, |
1278 | .write_cr2 = xen_write_cr2, | 1279 | .write_cr2 = xen_write_cr2, |
1279 | 1280 | ||
1280 | .read_cr3 = xen_read_cr3, | 1281 | .read_cr3 = xen_read_cr3, |
1281 | .write_cr3 = xen_write_cr3, | 1282 | .write_cr3 = xen_write_cr3, |
1282 | 1283 | ||
1283 | .flush_tlb_user = xen_flush_tlb, | 1284 | .flush_tlb_user = xen_flush_tlb, |
1284 | .flush_tlb_kernel = xen_flush_tlb, | 1285 | .flush_tlb_kernel = xen_flush_tlb, |
1285 | .flush_tlb_single = xen_flush_tlb_single, | 1286 | .flush_tlb_single = xen_flush_tlb_single, |
1286 | .flush_tlb_others = xen_flush_tlb_others, | 1287 | .flush_tlb_others = xen_flush_tlb_others, |
1287 | 1288 | ||
1288 | .pte_update = paravirt_nop, | 1289 | .pte_update = paravirt_nop, |
1289 | .pte_update_defer = paravirt_nop, | 1290 | .pte_update_defer = paravirt_nop, |
1290 | 1291 | ||
1291 | .pgd_alloc = xen_pgd_alloc, | 1292 | .pgd_alloc = xen_pgd_alloc, |
1292 | .pgd_free = xen_pgd_free, | 1293 | .pgd_free = xen_pgd_free, |
1293 | 1294 | ||
1294 | .alloc_pte = xen_alloc_pte_init, | 1295 | .alloc_pte = xen_alloc_pte_init, |
1295 | .release_pte = xen_release_pte_init, | 1296 | .release_pte = xen_release_pte_init, |
1296 | .alloc_pmd = xen_alloc_pte_init, | 1297 | .alloc_pmd = xen_alloc_pte_init, |
1297 | .alloc_pmd_clone = paravirt_nop, | 1298 | .alloc_pmd_clone = paravirt_nop, |
1298 | .release_pmd = xen_release_pte_init, | 1299 | .release_pmd = xen_release_pte_init, |
1299 | 1300 | ||
1300 | #ifdef CONFIG_HIGHPTE | 1301 | #ifdef CONFIG_HIGHPTE |
1301 | .kmap_atomic_pte = xen_kmap_atomic_pte, | 1302 | .kmap_atomic_pte = xen_kmap_atomic_pte, |
1302 | #endif | 1303 | #endif |
1303 | 1304 | ||
1304 | #ifdef CONFIG_X86_64 | 1305 | #ifdef CONFIG_X86_64 |
1305 | .set_pte = xen_set_pte, | 1306 | .set_pte = xen_set_pte, |
1306 | #else | 1307 | #else |
1307 | .set_pte = xen_set_pte_init, | 1308 | .set_pte = xen_set_pte_init, |
1308 | #endif | 1309 | #endif |
1309 | .set_pte_at = xen_set_pte_at, | 1310 | .set_pte_at = xen_set_pte_at, |
1310 | .set_pmd = xen_set_pmd_hyper, | 1311 | .set_pmd = xen_set_pmd_hyper, |
1311 | 1312 | ||
1312 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 1313 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
1313 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 1314 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
1314 | 1315 | ||
1315 | .pte_val = xen_pte_val, | 1316 | .pte_val = xen_pte_val, |
1316 | .pte_flags = native_pte_flags, | 1317 | .pte_flags = native_pte_flags, |
1317 | .pgd_val = xen_pgd_val, | 1318 | .pgd_val = xen_pgd_val, |
1318 | 1319 | ||
1319 | .make_pte = xen_make_pte, | 1320 | .make_pte = xen_make_pte, |
1320 | .make_pgd = xen_make_pgd, | 1321 | .make_pgd = xen_make_pgd, |
1321 | 1322 | ||
1322 | #ifdef CONFIG_X86_PAE | 1323 | #ifdef CONFIG_X86_PAE |
1323 | .set_pte_atomic = xen_set_pte_atomic, | 1324 | .set_pte_atomic = xen_set_pte_atomic, |
1324 | .set_pte_present = xen_set_pte_at, | 1325 | .set_pte_present = xen_set_pte_at, |
1325 | .pte_clear = xen_pte_clear, | 1326 | .pte_clear = xen_pte_clear, |
1326 | .pmd_clear = xen_pmd_clear, | 1327 | .pmd_clear = xen_pmd_clear, |
1327 | #endif /* CONFIG_X86_PAE */ | 1328 | #endif /* CONFIG_X86_PAE */ |
1328 | .set_pud = xen_set_pud_hyper, | 1329 | .set_pud = xen_set_pud_hyper, |
1329 | 1330 | ||
1330 | .make_pmd = xen_make_pmd, | 1331 | .make_pmd = xen_make_pmd, |
1331 | .pmd_val = xen_pmd_val, | 1332 | .pmd_val = xen_pmd_val, |
1332 | 1333 | ||
1333 | #if PAGETABLE_LEVELS == 4 | 1334 | #if PAGETABLE_LEVELS == 4 |
1334 | .pud_val = xen_pud_val, | 1335 | .pud_val = xen_pud_val, |
1335 | .make_pud = xen_make_pud, | 1336 | .make_pud = xen_make_pud, |
1336 | .set_pgd = xen_set_pgd_hyper, | 1337 | .set_pgd = xen_set_pgd_hyper, |
1337 | 1338 | ||
1338 | .alloc_pud = xen_alloc_pte_init, | 1339 | .alloc_pud = xen_alloc_pte_init, |
1339 | .release_pud = xen_release_pte_init, | 1340 | .release_pud = xen_release_pte_init, |
1340 | #endif /* PAGETABLE_LEVELS == 4 */ | 1341 | #endif /* PAGETABLE_LEVELS == 4 */ |
1341 | 1342 | ||
1342 | .activate_mm = xen_activate_mm, | 1343 | .activate_mm = xen_activate_mm, |
1343 | .dup_mmap = xen_dup_mmap, | 1344 | .dup_mmap = xen_dup_mmap, |
1344 | .exit_mmap = xen_exit_mmap, | 1345 | .exit_mmap = xen_exit_mmap, |
1345 | 1346 | ||
1346 | .lazy_mode = { | 1347 | .lazy_mode = { |
1347 | .enter = paravirt_enter_lazy_mmu, | 1348 | .enter = paravirt_enter_lazy_mmu, |
1348 | .leave = xen_leave_lazy, | 1349 | .leave = xen_leave_lazy, |
1349 | }, | 1350 | }, |
1350 | 1351 | ||
1351 | .set_fixmap = xen_set_fixmap, | 1352 | .set_fixmap = xen_set_fixmap, |
1352 | }; | 1353 | }; |
1353 | 1354 | ||
1354 | static void xen_reboot(int reason) | 1355 | static void xen_reboot(int reason) |
1355 | { | 1356 | { |
1356 | struct sched_shutdown r = { .reason = reason }; | 1357 | struct sched_shutdown r = { .reason = reason }; |
1357 | 1358 | ||
1358 | #ifdef CONFIG_SMP | 1359 | #ifdef CONFIG_SMP |
1359 | smp_send_stop(); | 1360 | smp_send_stop(); |
1360 | #endif | 1361 | #endif |
1361 | 1362 | ||
1362 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) | 1363 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) |
1363 | BUG(); | 1364 | BUG(); |
1364 | } | 1365 | } |
1365 | 1366 | ||
1366 | static void xen_restart(char *msg) | 1367 | static void xen_restart(char *msg) |
1367 | { | 1368 | { |
1368 | xen_reboot(SHUTDOWN_reboot); | 1369 | xen_reboot(SHUTDOWN_reboot); |
1369 | } | 1370 | } |
1370 | 1371 | ||
1371 | static void xen_emergency_restart(void) | 1372 | static void xen_emergency_restart(void) |
1372 | { | 1373 | { |
1373 | xen_reboot(SHUTDOWN_reboot); | 1374 | xen_reboot(SHUTDOWN_reboot); |
1374 | } | 1375 | } |
1375 | 1376 | ||
1376 | static void xen_machine_halt(void) | 1377 | static void xen_machine_halt(void) |
1377 | { | 1378 | { |
1378 | xen_reboot(SHUTDOWN_poweroff); | 1379 | xen_reboot(SHUTDOWN_poweroff); |
1379 | } | 1380 | } |
1380 | 1381 | ||
1381 | static void xen_crash_shutdown(struct pt_regs *regs) | 1382 | static void xen_crash_shutdown(struct pt_regs *regs) |
1382 | { | 1383 | { |
1383 | xen_reboot(SHUTDOWN_crash); | 1384 | xen_reboot(SHUTDOWN_crash); |
1384 | } | 1385 | } |
1385 | 1386 | ||
1386 | static const struct machine_ops __initdata xen_machine_ops = { | 1387 | static const struct machine_ops __initdata xen_machine_ops = { |
1387 | .restart = xen_restart, | 1388 | .restart = xen_restart, |
1388 | .halt = xen_machine_halt, | 1389 | .halt = xen_machine_halt, |
1389 | .power_off = xen_machine_halt, | 1390 | .power_off = xen_machine_halt, |
1390 | .shutdown = xen_machine_halt, | 1391 | .shutdown = xen_machine_halt, |
1391 | .crash_shutdown = xen_crash_shutdown, | 1392 | .crash_shutdown = xen_crash_shutdown, |
1392 | .emergency_restart = xen_emergency_restart, | 1393 | .emergency_restart = xen_emergency_restart, |
1393 | }; | 1394 | }; |
1394 | 1395 | ||
1395 | 1396 | ||
1396 | static void __init xen_reserve_top(void) | 1397 | static void __init xen_reserve_top(void) |
1397 | { | 1398 | { |
1398 | #ifdef CONFIG_X86_32 | 1399 | #ifdef CONFIG_X86_32 |
1399 | unsigned long top = HYPERVISOR_VIRT_START; | 1400 | unsigned long top = HYPERVISOR_VIRT_START; |
1400 | struct xen_platform_parameters pp; | 1401 | struct xen_platform_parameters pp; |
1401 | 1402 | ||
1402 | if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) | 1403 | if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) |
1403 | top = pp.virt_start; | 1404 | top = pp.virt_start; |
1404 | 1405 | ||
1405 | reserve_top_address(-top); | 1406 | reserve_top_address(-top); |
1406 | #endif /* CONFIG_X86_32 */ | 1407 | #endif /* CONFIG_X86_32 */ |
1407 | } | 1408 | } |
1408 | 1409 | ||
1409 | /* | 1410 | /* |
1410 | * Like __va(), but returns address in the kernel mapping (which is | 1411 | * Like __va(), but returns address in the kernel mapping (which is |
1411 | * all we have until the physical memory mapping has been set up. | 1412 | * all we have until the physical memory mapping has been set up. |
1412 | */ | 1413 | */ |
1413 | static void *__ka(phys_addr_t paddr) | 1414 | static void *__ka(phys_addr_t paddr) |
1414 | { | 1415 | { |
1415 | #ifdef CONFIG_X86_64 | 1416 | #ifdef CONFIG_X86_64 |
1416 | return (void *)(paddr + __START_KERNEL_map); | 1417 | return (void *)(paddr + __START_KERNEL_map); |
1417 | #else | 1418 | #else |
1418 | return __va(paddr); | 1419 | return __va(paddr); |
1419 | #endif | 1420 | #endif |
1420 | } | 1421 | } |
1421 | 1422 | ||
1422 | /* Convert a machine address to physical address */ | 1423 | /* Convert a machine address to physical address */ |
1423 | static unsigned long m2p(phys_addr_t maddr) | 1424 | static unsigned long m2p(phys_addr_t maddr) |
1424 | { | 1425 | { |
1425 | phys_addr_t paddr; | 1426 | phys_addr_t paddr; |
1426 | 1427 | ||
1427 | maddr &= PTE_PFN_MASK; | 1428 | maddr &= PTE_PFN_MASK; |
1428 | paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; | 1429 | paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; |
1429 | 1430 | ||
1430 | return paddr; | 1431 | return paddr; |
1431 | } | 1432 | } |
1432 | 1433 | ||
1433 | /* Convert a machine address to kernel virtual */ | 1434 | /* Convert a machine address to kernel virtual */ |
1434 | static void *m2v(phys_addr_t maddr) | 1435 | static void *m2v(phys_addr_t maddr) |
1435 | { | 1436 | { |
1436 | return __ka(m2p(maddr)); | 1437 | return __ka(m2p(maddr)); |
1437 | } | 1438 | } |
1438 | 1439 | ||
1439 | static void set_page_prot(void *addr, pgprot_t prot) | 1440 | static void set_page_prot(void *addr, pgprot_t prot) |
1440 | { | 1441 | { |
1441 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | 1442 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; |
1442 | pte_t pte = pfn_pte(pfn, prot); | 1443 | pte_t pte = pfn_pte(pfn, prot); |
1443 | 1444 | ||
1444 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) | 1445 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) |
1445 | BUG(); | 1446 | BUG(); |
1446 | } | 1447 | } |
1447 | 1448 | ||
1448 | static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | 1449 | static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) |
1449 | { | 1450 | { |
1450 | unsigned pmdidx, pteidx; | 1451 | unsigned pmdidx, pteidx; |
1451 | unsigned ident_pte; | 1452 | unsigned ident_pte; |
1452 | unsigned long pfn; | 1453 | unsigned long pfn; |
1453 | 1454 | ||
1454 | ident_pte = 0; | 1455 | ident_pte = 0; |
1455 | pfn = 0; | 1456 | pfn = 0; |
1456 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { | 1457 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { |
1457 | pte_t *pte_page; | 1458 | pte_t *pte_page; |
1458 | 1459 | ||
1459 | /* Reuse or allocate a page of ptes */ | 1460 | /* Reuse or allocate a page of ptes */ |
1460 | if (pmd_present(pmd[pmdidx])) | 1461 | if (pmd_present(pmd[pmdidx])) |
1461 | pte_page = m2v(pmd[pmdidx].pmd); | 1462 | pte_page = m2v(pmd[pmdidx].pmd); |
1462 | else { | 1463 | else { |
1463 | /* Check for free pte pages */ | 1464 | /* Check for free pte pages */ |
1464 | if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) | 1465 | if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) |
1465 | break; | 1466 | break; |
1466 | 1467 | ||
1467 | pte_page = &level1_ident_pgt[ident_pte]; | 1468 | pte_page = &level1_ident_pgt[ident_pte]; |
1468 | ident_pte += PTRS_PER_PTE; | 1469 | ident_pte += PTRS_PER_PTE; |
1469 | 1470 | ||
1470 | pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); | 1471 | pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); |
1471 | } | 1472 | } |
1472 | 1473 | ||
1473 | /* Install mappings */ | 1474 | /* Install mappings */ |
1474 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { | 1475 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { |
1475 | pte_t pte; | 1476 | pte_t pte; |
1476 | 1477 | ||
1477 | if (pfn > max_pfn_mapped) | 1478 | if (pfn > max_pfn_mapped) |
1478 | max_pfn_mapped = pfn; | 1479 | max_pfn_mapped = pfn; |
1479 | 1480 | ||
1480 | if (!pte_none(pte_page[pteidx])) | 1481 | if (!pte_none(pte_page[pteidx])) |
1481 | continue; | 1482 | continue; |
1482 | 1483 | ||
1483 | pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); | 1484 | pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); |
1484 | pte_page[pteidx] = pte; | 1485 | pte_page[pteidx] = pte; |
1485 | } | 1486 | } |
1486 | } | 1487 | } |
1487 | 1488 | ||
1488 | for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) | 1489 | for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) |
1489 | set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); | 1490 | set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); |
1490 | 1491 | ||
1491 | set_page_prot(pmd, PAGE_KERNEL_RO); | 1492 | set_page_prot(pmd, PAGE_KERNEL_RO); |
1492 | } | 1493 | } |
1493 | 1494 | ||
1494 | #ifdef CONFIG_X86_64 | 1495 | #ifdef CONFIG_X86_64 |
1495 | static void convert_pfn_mfn(void *v) | 1496 | static void convert_pfn_mfn(void *v) |
1496 | { | 1497 | { |
1497 | pte_t *pte = v; | 1498 | pte_t *pte = v; |
1498 | int i; | 1499 | int i; |
1499 | 1500 | ||
1500 | /* All levels are converted the same way, so just treat them | 1501 | /* All levels are converted the same way, so just treat them |
1501 | as ptes. */ | 1502 | as ptes. */ |
1502 | for (i = 0; i < PTRS_PER_PTE; i++) | 1503 | for (i = 0; i < PTRS_PER_PTE; i++) |
1503 | pte[i] = xen_make_pte(pte[i].pte); | 1504 | pte[i] = xen_make_pte(pte[i].pte); |
1504 | } | 1505 | } |
1505 | 1506 | ||
1506 | /* | 1507 | /* |
1507 | * Set up the inital kernel pagetable. | 1508 | * Set up the inital kernel pagetable. |
1508 | * | 1509 | * |
1509 | * We can construct this by grafting the Xen provided pagetable into | 1510 | * We can construct this by grafting the Xen provided pagetable into |
1510 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into | 1511 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into |
1511 | * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This | 1512 | * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This |
1512 | * means that only the kernel has a physical mapping to start with - | 1513 | * means that only the kernel has a physical mapping to start with - |
1513 | * but that's enough to get __va working. We need to fill in the rest | 1514 | * but that's enough to get __va working. We need to fill in the rest |
1514 | * of the physical mapping once some sort of allocator has been set | 1515 | * of the physical mapping once some sort of allocator has been set |
1515 | * up. | 1516 | * up. |
1516 | */ | 1517 | */ |
1517 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 1518 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, |
1518 | unsigned long max_pfn) | 1519 | unsigned long max_pfn) |
1519 | { | 1520 | { |
1520 | pud_t *l3; | 1521 | pud_t *l3; |
1521 | pmd_t *l2; | 1522 | pmd_t *l2; |
1522 | 1523 | ||
1523 | /* Zap identity mapping */ | 1524 | /* Zap identity mapping */ |
1524 | init_level4_pgt[0] = __pgd(0); | 1525 | init_level4_pgt[0] = __pgd(0); |
1525 | 1526 | ||
1526 | /* Pre-constructed entries are in pfn, so convert to mfn */ | 1527 | /* Pre-constructed entries are in pfn, so convert to mfn */ |
1527 | convert_pfn_mfn(init_level4_pgt); | 1528 | convert_pfn_mfn(init_level4_pgt); |
1528 | convert_pfn_mfn(level3_ident_pgt); | 1529 | convert_pfn_mfn(level3_ident_pgt); |
1529 | convert_pfn_mfn(level3_kernel_pgt); | 1530 | convert_pfn_mfn(level3_kernel_pgt); |
1530 | 1531 | ||
1531 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | 1532 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); |
1532 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | 1533 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); |
1533 | 1534 | ||
1534 | memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | 1535 | memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); |
1535 | memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | 1536 | memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); |
1536 | 1537 | ||
1537 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); | 1538 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); |
1538 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); | 1539 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); |
1539 | memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | 1540 | memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); |
1540 | 1541 | ||
1541 | /* Set up identity map */ | 1542 | /* Set up identity map */ |
1542 | xen_map_identity_early(level2_ident_pgt, max_pfn); | 1543 | xen_map_identity_early(level2_ident_pgt, max_pfn); |
1543 | 1544 | ||
1544 | /* Make pagetable pieces RO */ | 1545 | /* Make pagetable pieces RO */ |
1545 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1546 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); |
1546 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | 1547 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); |
1547 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | 1548 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); |
1548 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | 1549 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); |
1549 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1550 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); |
1550 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1551 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); |
1551 | 1552 | ||
1552 | /* Pin down new L4 */ | 1553 | /* Pin down new L4 */ |
1553 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | 1554 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, |
1554 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | 1555 | PFN_DOWN(__pa_symbol(init_level4_pgt))); |
1555 | 1556 | ||
1556 | /* Unpin Xen-provided one */ | 1557 | /* Unpin Xen-provided one */ |
1557 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 1558 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
1558 | 1559 | ||
1559 | /* Switch over */ | 1560 | /* Switch over */ |
1560 | pgd = init_level4_pgt; | 1561 | pgd = init_level4_pgt; |
1561 | 1562 | ||
1562 | /* | 1563 | /* |
1563 | * At this stage there can be no user pgd, and no page | 1564 | * At this stage there can be no user pgd, and no page |
1564 | * structure to attach it to, so make sure we just set kernel | 1565 | * structure to attach it to, so make sure we just set kernel |
1565 | * pgd. | 1566 | * pgd. |
1566 | */ | 1567 | */ |
1567 | xen_mc_batch(); | 1568 | xen_mc_batch(); |
1568 | __xen_write_cr3(true, __pa(pgd)); | 1569 | __xen_write_cr3(true, __pa(pgd)); |
1569 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 1570 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1570 | 1571 | ||
1571 | reserve_early(__pa(xen_start_info->pt_base), | 1572 | reserve_early(__pa(xen_start_info->pt_base), |
1572 | __pa(xen_start_info->pt_base + | 1573 | __pa(xen_start_info->pt_base + |
1573 | xen_start_info->nr_pt_frames * PAGE_SIZE), | 1574 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
1574 | "XEN PAGETABLES"); | 1575 | "XEN PAGETABLES"); |
1575 | 1576 | ||
1576 | return pgd; | 1577 | return pgd; |
1577 | } | 1578 | } |
1578 | #else /* !CONFIG_X86_64 */ | 1579 | #else /* !CONFIG_X86_64 */ |
1579 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; | 1580 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; |
1580 | 1581 | ||
1581 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 1582 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, |
1582 | unsigned long max_pfn) | 1583 | unsigned long max_pfn) |
1583 | { | 1584 | { |
1584 | pmd_t *kernel_pmd; | 1585 | pmd_t *kernel_pmd; |
1585 | 1586 | ||
1586 | init_pg_tables_start = __pa(pgd); | 1587 | init_pg_tables_start = __pa(pgd); |
1587 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; | 1588 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; |
1588 | max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); | 1589 | max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); |
1589 | 1590 | ||
1590 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | 1591 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
1591 | memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | 1592 | memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); |
1592 | 1593 | ||
1593 | xen_map_identity_early(level2_kernel_pgt, max_pfn); | 1594 | xen_map_identity_early(level2_kernel_pgt, max_pfn); |
1594 | 1595 | ||
1595 | memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); | 1596 | memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); |
1596 | set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], | 1597 | set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], |
1597 | __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); | 1598 | __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); |
1598 | 1599 | ||
1599 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1600 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); |
1600 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | 1601 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); |
1601 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); | 1602 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); |
1602 | 1603 | ||
1603 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 1604 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
1604 | 1605 | ||
1605 | xen_write_cr3(__pa(swapper_pg_dir)); | 1606 | xen_write_cr3(__pa(swapper_pg_dir)); |
1606 | 1607 | ||
1607 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); | 1608 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); |
1608 | 1609 | ||
1609 | return swapper_pg_dir; | 1610 | return swapper_pg_dir; |
1610 | } | 1611 | } |
1611 | #endif /* CONFIG_X86_64 */ | 1612 | #endif /* CONFIG_X86_64 */ |
1612 | 1613 | ||
1613 | /* First C function to be called on Xen boot */ | 1614 | /* First C function to be called on Xen boot */ |
1614 | asmlinkage void __init xen_start_kernel(void) | 1615 | asmlinkage void __init xen_start_kernel(void) |
1615 | { | 1616 | { |
1616 | pgd_t *pgd; | 1617 | pgd_t *pgd; |
1617 | 1618 | ||
1618 | if (!xen_start_info) | 1619 | if (!xen_start_info) |
1619 | return; | 1620 | return; |
1620 | 1621 | ||
1621 | xen_domain_type = XEN_PV_DOMAIN; | 1622 | xen_domain_type = XEN_PV_DOMAIN; |
1622 | 1623 | ||
1623 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); | 1624 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); |
1624 | 1625 | ||
1625 | xen_setup_features(); | 1626 | xen_setup_features(); |
1626 | 1627 | ||
1627 | /* Install Xen paravirt ops */ | 1628 | /* Install Xen paravirt ops */ |
1628 | pv_info = xen_info; | 1629 | pv_info = xen_info; |
1629 | pv_init_ops = xen_init_ops; | 1630 | pv_init_ops = xen_init_ops; |
1630 | pv_time_ops = xen_time_ops; | 1631 | pv_time_ops = xen_time_ops; |
1631 | pv_cpu_ops = xen_cpu_ops; | 1632 | pv_cpu_ops = xen_cpu_ops; |
1632 | pv_apic_ops = xen_apic_ops; | 1633 | pv_apic_ops = xen_apic_ops; |
1633 | pv_mmu_ops = xen_mmu_ops; | 1634 | pv_mmu_ops = xen_mmu_ops; |
1634 | 1635 | ||
1635 | xen_init_irq_ops(); | 1636 | xen_init_irq_ops(); |
1636 | 1637 | ||
1637 | #ifdef CONFIG_X86_LOCAL_APIC | 1638 | #ifdef CONFIG_X86_LOCAL_APIC |
1638 | /* | 1639 | /* |
1639 | * set up the basic apic ops. | 1640 | * set up the basic apic ops. |
1640 | */ | 1641 | */ |
1641 | apic_ops = &xen_basic_apic_ops; | 1642 | apic_ops = &xen_basic_apic_ops; |
1642 | #endif | 1643 | #endif |
1643 | 1644 | ||
1644 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | 1645 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { |
1645 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; | 1646 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; |
1646 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; | 1647 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; |
1647 | } | 1648 | } |
1648 | 1649 | ||
1649 | machine_ops = xen_machine_ops; | 1650 | machine_ops = xen_machine_ops; |
1650 | 1651 | ||
1651 | #ifdef CONFIG_X86_64 | 1652 | #ifdef CONFIG_X86_64 |
1652 | /* Disable until direct per-cpu data access. */ | 1653 | /* Disable until direct per-cpu data access. */ |
1653 | have_vcpu_info_placement = 0; | 1654 | have_vcpu_info_placement = 0; |
1654 | x86_64_init_pda(); | 1655 | x86_64_init_pda(); |
1655 | #endif | 1656 | #endif |
1656 | 1657 | ||
1657 | xen_smp_init(); | 1658 | xen_smp_init(); |
1658 | 1659 | ||
1659 | /* Get mfn list */ | 1660 | /* Get mfn list */ |
1660 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1661 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
1661 | xen_build_dynamic_phys_to_machine(); | 1662 | xen_build_dynamic_phys_to_machine(); |
1662 | 1663 | ||
1663 | pgd = (pgd_t *)xen_start_info->pt_base; | 1664 | pgd = (pgd_t *)xen_start_info->pt_base; |
1664 | 1665 | ||
1665 | /* Prevent unwanted bits from being set in PTEs. */ | 1666 | /* Prevent unwanted bits from being set in PTEs. */ |
1666 | __supported_pte_mask &= ~_PAGE_GLOBAL; | 1667 | __supported_pte_mask &= ~_PAGE_GLOBAL; |
1667 | if (!xen_initial_domain()) | 1668 | if (!xen_initial_domain()) |
1668 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | 1669 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); |
1669 | 1670 | ||
1670 | /* Don't do the full vcpu_info placement stuff until we have a | 1671 | /* Don't do the full vcpu_info placement stuff until we have a |
1671 | possible map and a non-dummy shared_info. */ | 1672 | possible map and a non-dummy shared_info. */ |
1672 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | 1673 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; |
1673 | 1674 | ||
1674 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1675 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1675 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); | 1676 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); |
1676 | 1677 | ||
1677 | init_mm.pgd = pgd; | 1678 | init_mm.pgd = pgd; |
1678 | 1679 | ||
1679 | /* keep using Xen gdt for now; no urgent need to change it */ | 1680 | /* keep using Xen gdt for now; no urgent need to change it */ |
1680 | 1681 | ||
1681 | pv_info.kernel_rpl = 1; | 1682 | pv_info.kernel_rpl = 1; |
1682 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | 1683 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
1683 | pv_info.kernel_rpl = 0; | 1684 | pv_info.kernel_rpl = 0; |
1684 | 1685 | ||
1685 | /* set the limit of our address space */ | 1686 | /* set the limit of our address space */ |
1686 | xen_reserve_top(); | 1687 | xen_reserve_top(); |
1687 | 1688 | ||
1688 | #ifdef CONFIG_X86_32 | 1689 | #ifdef CONFIG_X86_32 |
1689 | /* set up basic CPUID stuff */ | 1690 | /* set up basic CPUID stuff */ |
1690 | cpu_detect(&new_cpu_data); | 1691 | cpu_detect(&new_cpu_data); |
1691 | new_cpu_data.hard_math = 1; | 1692 | new_cpu_data.hard_math = 1; |
1692 | new_cpu_data.x86_capability[0] = cpuid_edx(1); | 1693 | new_cpu_data.x86_capability[0] = cpuid_edx(1); |
1693 | #endif | 1694 | #endif |
1694 | 1695 | ||
1695 | /* Poke various useful things into boot_params */ | 1696 | /* Poke various useful things into boot_params */ |
1696 | boot_params.hdr.type_of_loader = (9 << 4) | 0; | 1697 | boot_params.hdr.type_of_loader = (9 << 4) | 0; |
1697 | boot_params.hdr.ramdisk_image = xen_start_info->mod_start | 1698 | boot_params.hdr.ramdisk_image = xen_start_info->mod_start |
1698 | ? __pa(xen_start_info->mod_start) : 0; | 1699 | ? __pa(xen_start_info->mod_start) : 0; |
1699 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1700 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1700 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); | 1701 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); |
1701 | 1702 | ||
1702 | if (!xen_initial_domain()) { | 1703 | if (!xen_initial_domain()) { |
1703 | add_preferred_console("xenboot", 0, NULL); | 1704 | add_preferred_console("xenboot", 0, NULL); |
1704 | add_preferred_console("tty", 0, NULL); | 1705 | add_preferred_console("tty", 0, NULL); |
1705 | add_preferred_console("hvc", 0, NULL); | 1706 | add_preferred_console("hvc", 0, NULL); |
1706 | } | 1707 | } |
1707 | 1708 | ||
1708 | xen_raw_console_write("about to get started...\n"); | 1709 | xen_raw_console_write("about to get started...\n"); |
1709 | 1710 | ||
1710 | /* Start the world */ | 1711 | /* Start the world */ |
1711 | #ifdef CONFIG_X86_32 | 1712 | #ifdef CONFIG_X86_32 |
1712 | i386_start_kernel(); | 1713 | i386_start_kernel(); |
1713 | #else | 1714 | #else |
1714 | x86_64_start_reservations((char *)__pa_symbol(&boot_params)); | 1715 | x86_64_start_reservations((char *)__pa_symbol(&boot_params)); |
1715 | #endif | 1716 | #endif |
1716 | } | 1717 | } |
1717 | 1718 |
drivers/xen/balloon.c
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * balloon.c | 2 | * balloon.c |
3 | * | 3 | * |
4 | * Xen balloon driver - enables returning/claiming memory to/from Xen. | 4 | * Xen balloon driver - enables returning/claiming memory to/from Xen. |
5 | * | 5 | * |
6 | * Copyright (c) 2003, B Dragovic | 6 | * Copyright (c) 2003, B Dragovic |
7 | * Copyright (c) 2003-2004, M Williamson, K Fraser | 7 | * Copyright (c) 2003-2004, M Williamson, K Fraser |
8 | * Copyright (c) 2005 Dan M. Smith, IBM Corporation | 8 | * Copyright (c) 2005 Dan M. Smith, IBM Corporation |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public License version 2 | 11 | * modify it under the terms of the GNU General Public License version 2 |
12 | * as published by the Free Software Foundation; or, when distributed | 12 | * as published by the Free Software Foundation; or, when distributed |
13 | * separately from the Linux kernel or incorporated into other | 13 | * separately from the Linux kernel or incorporated into other |
14 | * software packages, subject to the following license: | 14 | * software packages, subject to the following license: |
15 | * | 15 | * |
16 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 16 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
17 | * of this source file (the "Software"), to deal in the Software without | 17 | * of this source file (the "Software"), to deal in the Software without |
18 | * restriction, including without limitation the rights to use, copy, modify, | 18 | * restriction, including without limitation the rights to use, copy, modify, |
19 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 19 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, |
20 | * and to permit persons to whom the Software is furnished to do so, subject to | 20 | * and to permit persons to whom the Software is furnished to do so, subject to |
21 | * the following conditions: | 21 | * the following conditions: |
22 | * | 22 | * |
23 | * The above copyright notice and this permission notice shall be included in | 23 | * The above copyright notice and this permission notice shall be included in |
24 | * all copies or substantial portions of the Software. | 24 | * all copies or substantial portions of the Software. |
25 | * | 25 | * |
26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
27 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 27 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
28 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 28 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
29 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 29 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
30 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 30 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
31 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 31 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
32 | * IN THE SOFTWARE. | 32 | * IN THE SOFTWARE. |
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/kernel.h> | 35 | #include <linux/kernel.h> |
36 | #include <linux/module.h> | 36 | #include <linux/module.h> |
37 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
38 | #include <linux/errno.h> | 38 | #include <linux/errno.h> |
39 | #include <linux/mm.h> | 39 | #include <linux/mm.h> |
40 | #include <linux/bootmem.h> | 40 | #include <linux/bootmem.h> |
41 | #include <linux/pagemap.h> | 41 | #include <linux/pagemap.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/mutex.h> | 43 | #include <linux/mutex.h> |
44 | #include <linux/list.h> | 44 | #include <linux/list.h> |
45 | #include <linux/sysdev.h> | 45 | #include <linux/sysdev.h> |
46 | 46 | ||
47 | #include <asm/xen/hypervisor.h> | ||
48 | #include <asm/page.h> | 47 | #include <asm/page.h> |
49 | #include <asm/pgalloc.h> | 48 | #include <asm/pgalloc.h> |
50 | #include <asm/pgtable.h> | 49 | #include <asm/pgtable.h> |
51 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
52 | #include <asm/tlb.h> | 51 | #include <asm/tlb.h> |
53 | 52 | ||
53 | #include <asm/xen/hypervisor.h> | ||
54 | #include <asm/xen/hypercall.h> | ||
55 | #include <xen/interface/xen.h> | ||
54 | #include <xen/interface/memory.h> | 56 | #include <xen/interface/memory.h> |
55 | #include <xen/xenbus.h> | 57 | #include <xen/xenbus.h> |
56 | #include <xen/features.h> | 58 | #include <xen/features.h> |
57 | #include <xen/page.h> | 59 | #include <xen/page.h> |
58 | 60 | ||
59 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | 61 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) |
60 | 62 | ||
61 | #define BALLOON_CLASS_NAME "xen_memory" | 63 | #define BALLOON_CLASS_NAME "xen_memory" |
62 | 64 | ||
63 | struct balloon_stats { | 65 | struct balloon_stats { |
64 | /* We aim for 'current allocation' == 'target allocation'. */ | 66 | /* We aim for 'current allocation' == 'target allocation'. */ |
65 | unsigned long current_pages; | 67 | unsigned long current_pages; |
66 | unsigned long target_pages; | 68 | unsigned long target_pages; |
67 | /* We may hit the hard limit in Xen. If we do then we remember it. */ | 69 | /* We may hit the hard limit in Xen. If we do then we remember it. */ |
68 | unsigned long hard_limit; | 70 | unsigned long hard_limit; |
69 | /* | 71 | /* |
70 | * Drivers may alter the memory reservation independently, but they | 72 | * Drivers may alter the memory reservation independently, but they |
71 | * must inform the balloon driver so we avoid hitting the hard limit. | 73 | * must inform the balloon driver so we avoid hitting the hard limit. |
72 | */ | 74 | */ |
73 | unsigned long driver_pages; | 75 | unsigned long driver_pages; |
74 | /* Number of pages in high- and low-memory balloons. */ | 76 | /* Number of pages in high- and low-memory balloons. */ |
75 | unsigned long balloon_low; | 77 | unsigned long balloon_low; |
76 | unsigned long balloon_high; | 78 | unsigned long balloon_high; |
77 | }; | 79 | }; |
78 | 80 | ||
79 | static DEFINE_MUTEX(balloon_mutex); | 81 | static DEFINE_MUTEX(balloon_mutex); |
80 | 82 | ||
81 | static struct sys_device balloon_sysdev; | 83 | static struct sys_device balloon_sysdev; |
82 | 84 | ||
83 | static int register_balloon(struct sys_device *sysdev); | 85 | static int register_balloon(struct sys_device *sysdev); |
84 | 86 | ||
85 | /* | 87 | /* |
86 | * Protects atomic reservation decrease/increase against concurrent increases. | 88 | * Protects atomic reservation decrease/increase against concurrent increases. |
87 | * Also protects non-atomic updates of current_pages and driver_pages, and | 89 | * Also protects non-atomic updates of current_pages and driver_pages, and |
88 | * balloon lists. | 90 | * balloon lists. |
89 | */ | 91 | */ |
90 | static DEFINE_SPINLOCK(balloon_lock); | 92 | static DEFINE_SPINLOCK(balloon_lock); |
91 | 93 | ||
92 | static struct balloon_stats balloon_stats; | 94 | static struct balloon_stats balloon_stats; |
93 | 95 | ||
94 | /* We increase/decrease in batches which fit in a page */ | 96 | /* We increase/decrease in batches which fit in a page */ |
95 | static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; | 97 | static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; |
96 | 98 | ||
97 | /* VM /proc information for memory */ | 99 | /* VM /proc information for memory */ |
98 | extern unsigned long totalram_pages; | 100 | extern unsigned long totalram_pages; |
99 | 101 | ||
100 | #ifdef CONFIG_HIGHMEM | 102 | #ifdef CONFIG_HIGHMEM |
101 | extern unsigned long totalhigh_pages; | 103 | extern unsigned long totalhigh_pages; |
102 | #define inc_totalhigh_pages() (totalhigh_pages++) | 104 | #define inc_totalhigh_pages() (totalhigh_pages++) |
103 | #define dec_totalhigh_pages() (totalhigh_pages--) | 105 | #define dec_totalhigh_pages() (totalhigh_pages--) |
104 | #else | 106 | #else |
105 | #define inc_totalhigh_pages() do {} while(0) | 107 | #define inc_totalhigh_pages() do {} while(0) |
106 | #define dec_totalhigh_pages() do {} while(0) | 108 | #define dec_totalhigh_pages() do {} while(0) |
107 | #endif | 109 | #endif |
108 | 110 | ||
109 | /* List of ballooned pages, threaded through the mem_map array. */ | 111 | /* List of ballooned pages, threaded through the mem_map array. */ |
110 | static LIST_HEAD(ballooned_pages); | 112 | static LIST_HEAD(ballooned_pages); |
111 | 113 | ||
112 | /* Main work function, always executed in process context. */ | 114 | /* Main work function, always executed in process context. */ |
113 | static void balloon_process(struct work_struct *work); | 115 | static void balloon_process(struct work_struct *work); |
114 | static DECLARE_WORK(balloon_worker, balloon_process); | 116 | static DECLARE_WORK(balloon_worker, balloon_process); |
115 | static struct timer_list balloon_timer; | 117 | static struct timer_list balloon_timer; |
116 | 118 | ||
117 | /* When ballooning out (allocating memory to return to Xen) we don't really | 119 | /* When ballooning out (allocating memory to return to Xen) we don't really |
118 | want the kernel to try too hard since that can trigger the oom killer. */ | 120 | want the kernel to try too hard since that can trigger the oom killer. */ |
119 | #define GFP_BALLOON \ | 121 | #define GFP_BALLOON \ |
120 | (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) | 122 | (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) |
121 | 123 | ||
122 | static void scrub_page(struct page *page) | 124 | static void scrub_page(struct page *page) |
123 | { | 125 | { |
124 | #ifdef CONFIG_XEN_SCRUB_PAGES | 126 | #ifdef CONFIG_XEN_SCRUB_PAGES |
125 | clear_highpage(page); | 127 | clear_highpage(page); |
126 | #endif | 128 | #endif |
127 | } | 129 | } |
128 | 130 | ||
129 | /* balloon_append: add the given page to the balloon. */ | 131 | /* balloon_append: add the given page to the balloon. */ |
130 | static void balloon_append(struct page *page) | 132 | static void balloon_append(struct page *page) |
131 | { | 133 | { |
132 | /* Lowmem is re-populated first, so highmem pages go at list tail. */ | 134 | /* Lowmem is re-populated first, so highmem pages go at list tail. */ |
133 | if (PageHighMem(page)) { | 135 | if (PageHighMem(page)) { |
134 | list_add_tail(&page->lru, &ballooned_pages); | 136 | list_add_tail(&page->lru, &ballooned_pages); |
135 | balloon_stats.balloon_high++; | 137 | balloon_stats.balloon_high++; |
136 | dec_totalhigh_pages(); | 138 | dec_totalhigh_pages(); |
137 | } else { | 139 | } else { |
138 | list_add(&page->lru, &ballooned_pages); | 140 | list_add(&page->lru, &ballooned_pages); |
139 | balloon_stats.balloon_low++; | 141 | balloon_stats.balloon_low++; |
140 | } | 142 | } |
141 | } | 143 | } |
142 | 144 | ||
143 | /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ | 145 | /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ |
144 | static struct page *balloon_retrieve(void) | 146 | static struct page *balloon_retrieve(void) |
145 | { | 147 | { |
146 | struct page *page; | 148 | struct page *page; |
147 | 149 | ||
148 | if (list_empty(&ballooned_pages)) | 150 | if (list_empty(&ballooned_pages)) |
149 | return NULL; | 151 | return NULL; |
150 | 152 | ||
151 | page = list_entry(ballooned_pages.next, struct page, lru); | 153 | page = list_entry(ballooned_pages.next, struct page, lru); |
152 | list_del(&page->lru); | 154 | list_del(&page->lru); |
153 | 155 | ||
154 | if (PageHighMem(page)) { | 156 | if (PageHighMem(page)) { |
155 | balloon_stats.balloon_high--; | 157 | balloon_stats.balloon_high--; |
156 | inc_totalhigh_pages(); | 158 | inc_totalhigh_pages(); |
157 | } | 159 | } |
158 | else | 160 | else |
159 | balloon_stats.balloon_low--; | 161 | balloon_stats.balloon_low--; |
160 | 162 | ||
161 | return page; | 163 | return page; |
162 | } | 164 | } |
163 | 165 | ||
164 | static struct page *balloon_first_page(void) | 166 | static struct page *balloon_first_page(void) |
165 | { | 167 | { |
166 | if (list_empty(&ballooned_pages)) | 168 | if (list_empty(&ballooned_pages)) |
167 | return NULL; | 169 | return NULL; |
168 | return list_entry(ballooned_pages.next, struct page, lru); | 170 | return list_entry(ballooned_pages.next, struct page, lru); |
169 | } | 171 | } |
170 | 172 | ||
171 | static struct page *balloon_next_page(struct page *page) | 173 | static struct page *balloon_next_page(struct page *page) |
172 | { | 174 | { |
173 | struct list_head *next = page->lru.next; | 175 | struct list_head *next = page->lru.next; |
174 | if (next == &ballooned_pages) | 176 | if (next == &ballooned_pages) |
175 | return NULL; | 177 | return NULL; |
176 | return list_entry(next, struct page, lru); | 178 | return list_entry(next, struct page, lru); |
177 | } | 179 | } |
178 | 180 | ||
179 | static void balloon_alarm(unsigned long unused) | 181 | static void balloon_alarm(unsigned long unused) |
180 | { | 182 | { |
181 | schedule_work(&balloon_worker); | 183 | schedule_work(&balloon_worker); |
182 | } | 184 | } |
183 | 185 | ||
184 | static unsigned long current_target(void) | 186 | static unsigned long current_target(void) |
185 | { | 187 | { |
186 | unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit); | 188 | unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit); |
187 | 189 | ||
188 | target = min(target, | 190 | target = min(target, |
189 | balloon_stats.current_pages + | 191 | balloon_stats.current_pages + |
190 | balloon_stats.balloon_low + | 192 | balloon_stats.balloon_low + |
191 | balloon_stats.balloon_high); | 193 | balloon_stats.balloon_high); |
192 | 194 | ||
193 | return target; | 195 | return target; |
194 | } | 196 | } |
195 | 197 | ||
196 | static int increase_reservation(unsigned long nr_pages) | 198 | static int increase_reservation(unsigned long nr_pages) |
197 | { | 199 | { |
198 | unsigned long pfn, i, flags; | 200 | unsigned long pfn, i, flags; |
199 | struct page *page; | 201 | struct page *page; |
200 | long rc; | 202 | long rc; |
201 | struct xen_memory_reservation reservation = { | 203 | struct xen_memory_reservation reservation = { |
202 | .address_bits = 0, | 204 | .address_bits = 0, |
203 | .extent_order = 0, | 205 | .extent_order = 0, |
204 | .domid = DOMID_SELF | 206 | .domid = DOMID_SELF |
205 | }; | 207 | }; |
206 | 208 | ||
207 | if (nr_pages > ARRAY_SIZE(frame_list)) | 209 | if (nr_pages > ARRAY_SIZE(frame_list)) |
208 | nr_pages = ARRAY_SIZE(frame_list); | 210 | nr_pages = ARRAY_SIZE(frame_list); |
209 | 211 | ||
210 | spin_lock_irqsave(&balloon_lock, flags); | 212 | spin_lock_irqsave(&balloon_lock, flags); |
211 | 213 | ||
212 | page = balloon_first_page(); | 214 | page = balloon_first_page(); |
213 | for (i = 0; i < nr_pages; i++) { | 215 | for (i = 0; i < nr_pages; i++) { |
214 | BUG_ON(page == NULL); | 216 | BUG_ON(page == NULL); |
215 | frame_list[i] = page_to_pfn(page);; | 217 | frame_list[i] = page_to_pfn(page);; |
216 | page = balloon_next_page(page); | 218 | page = balloon_next_page(page); |
217 | } | 219 | } |
218 | 220 | ||
219 | set_xen_guest_handle(reservation.extent_start, frame_list); | 221 | set_xen_guest_handle(reservation.extent_start, frame_list); |
220 | reservation.nr_extents = nr_pages; | 222 | reservation.nr_extents = nr_pages; |
221 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | 223 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); |
222 | if (rc < nr_pages) { | 224 | if (rc < nr_pages) { |
223 | if (rc > 0) { | 225 | if (rc > 0) { |
224 | int ret; | 226 | int ret; |
225 | 227 | ||
226 | /* We hit the Xen hard limit: reprobe. */ | 228 | /* We hit the Xen hard limit: reprobe. */ |
227 | reservation.nr_extents = rc; | 229 | reservation.nr_extents = rc; |
228 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | 230 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, |
229 | &reservation); | 231 | &reservation); |
230 | BUG_ON(ret != rc); | 232 | BUG_ON(ret != rc); |
231 | } | 233 | } |
232 | if (rc >= 0) | 234 | if (rc >= 0) |
233 | balloon_stats.hard_limit = (balloon_stats.current_pages + rc - | 235 | balloon_stats.hard_limit = (balloon_stats.current_pages + rc - |
234 | balloon_stats.driver_pages); | 236 | balloon_stats.driver_pages); |
235 | goto out; | 237 | goto out; |
236 | } | 238 | } |
237 | 239 | ||
238 | for (i = 0; i < nr_pages; i++) { | 240 | for (i = 0; i < nr_pages; i++) { |
239 | page = balloon_retrieve(); | 241 | page = balloon_retrieve(); |
240 | BUG_ON(page == NULL); | 242 | BUG_ON(page == NULL); |
241 | 243 | ||
242 | pfn = page_to_pfn(page); | 244 | pfn = page_to_pfn(page); |
243 | BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && | 245 | BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && |
244 | phys_to_machine_mapping_valid(pfn)); | 246 | phys_to_machine_mapping_valid(pfn)); |
245 | 247 | ||
246 | set_phys_to_machine(pfn, frame_list[i]); | 248 | set_phys_to_machine(pfn, frame_list[i]); |
247 | 249 | ||
248 | /* Link back into the page tables if not highmem. */ | 250 | /* Link back into the page tables if not highmem. */ |
249 | if (pfn < max_low_pfn) { | 251 | if (pfn < max_low_pfn) { |
250 | int ret; | 252 | int ret; |
251 | ret = HYPERVISOR_update_va_mapping( | 253 | ret = HYPERVISOR_update_va_mapping( |
252 | (unsigned long)__va(pfn << PAGE_SHIFT), | 254 | (unsigned long)__va(pfn << PAGE_SHIFT), |
253 | mfn_pte(frame_list[i], PAGE_KERNEL), | 255 | mfn_pte(frame_list[i], PAGE_KERNEL), |
254 | 0); | 256 | 0); |
255 | BUG_ON(ret); | 257 | BUG_ON(ret); |
256 | } | 258 | } |
257 | 259 | ||
258 | /* Relinquish the page back to the allocator. */ | 260 | /* Relinquish the page back to the allocator. */ |
259 | ClearPageReserved(page); | 261 | ClearPageReserved(page); |
260 | init_page_count(page); | 262 | init_page_count(page); |
261 | __free_page(page); | 263 | __free_page(page); |
262 | } | 264 | } |
263 | 265 | ||
264 | balloon_stats.current_pages += nr_pages; | 266 | balloon_stats.current_pages += nr_pages; |
265 | totalram_pages = balloon_stats.current_pages; | 267 | totalram_pages = balloon_stats.current_pages; |
266 | 268 | ||
267 | out: | 269 | out: |
268 | spin_unlock_irqrestore(&balloon_lock, flags); | 270 | spin_unlock_irqrestore(&balloon_lock, flags); |
269 | 271 | ||
270 | return 0; | 272 | return 0; |
271 | } | 273 | } |
272 | 274 | ||
273 | static int decrease_reservation(unsigned long nr_pages) | 275 | static int decrease_reservation(unsigned long nr_pages) |
274 | { | 276 | { |
275 | unsigned long pfn, i, flags; | 277 | unsigned long pfn, i, flags; |
276 | struct page *page; | 278 | struct page *page; |
277 | int need_sleep = 0; | 279 | int need_sleep = 0; |
278 | int ret; | 280 | int ret; |
279 | struct xen_memory_reservation reservation = { | 281 | struct xen_memory_reservation reservation = { |
280 | .address_bits = 0, | 282 | .address_bits = 0, |
281 | .extent_order = 0, | 283 | .extent_order = 0, |
282 | .domid = DOMID_SELF | 284 | .domid = DOMID_SELF |
283 | }; | 285 | }; |
284 | 286 | ||
285 | if (nr_pages > ARRAY_SIZE(frame_list)) | 287 | if (nr_pages > ARRAY_SIZE(frame_list)) |
286 | nr_pages = ARRAY_SIZE(frame_list); | 288 | nr_pages = ARRAY_SIZE(frame_list); |
287 | 289 | ||
288 | for (i = 0; i < nr_pages; i++) { | 290 | for (i = 0; i < nr_pages; i++) { |
289 | if ((page = alloc_page(GFP_BALLOON)) == NULL) { | 291 | if ((page = alloc_page(GFP_BALLOON)) == NULL) { |
290 | nr_pages = i; | 292 | nr_pages = i; |
291 | need_sleep = 1; | 293 | need_sleep = 1; |
292 | break; | 294 | break; |
293 | } | 295 | } |
294 | 296 | ||
295 | pfn = page_to_pfn(page); | 297 | pfn = page_to_pfn(page); |
296 | frame_list[i] = pfn_to_mfn(pfn); | 298 | frame_list[i] = pfn_to_mfn(pfn); |
297 | 299 | ||
298 | scrub_page(page); | 300 | scrub_page(page); |
299 | } | 301 | } |
300 | 302 | ||
301 | /* Ensure that ballooned highmem pages don't have kmaps. */ | 303 | /* Ensure that ballooned highmem pages don't have kmaps. */ |
302 | kmap_flush_unused(); | 304 | kmap_flush_unused(); |
303 | flush_tlb_all(); | 305 | flush_tlb_all(); |
304 | 306 | ||
305 | spin_lock_irqsave(&balloon_lock, flags); | 307 | spin_lock_irqsave(&balloon_lock, flags); |
306 | 308 | ||
307 | /* No more mappings: invalidate P2M and add to balloon. */ | 309 | /* No more mappings: invalidate P2M and add to balloon. */ |
308 | for (i = 0; i < nr_pages; i++) { | 310 | for (i = 0; i < nr_pages; i++) { |
309 | pfn = mfn_to_pfn(frame_list[i]); | 311 | pfn = mfn_to_pfn(frame_list[i]); |
310 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 312 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
311 | balloon_append(pfn_to_page(pfn)); | 313 | balloon_append(pfn_to_page(pfn)); |
312 | } | 314 | } |
313 | 315 | ||
314 | set_xen_guest_handle(reservation.extent_start, frame_list); | 316 | set_xen_guest_handle(reservation.extent_start, frame_list); |
315 | reservation.nr_extents = nr_pages; | 317 | reservation.nr_extents = nr_pages; |
316 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | 318 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); |
317 | BUG_ON(ret != nr_pages); | 319 | BUG_ON(ret != nr_pages); |
318 | 320 | ||
319 | balloon_stats.current_pages -= nr_pages; | 321 | balloon_stats.current_pages -= nr_pages; |
320 | totalram_pages = balloon_stats.current_pages; | 322 | totalram_pages = balloon_stats.current_pages; |
321 | 323 | ||
322 | spin_unlock_irqrestore(&balloon_lock, flags); | 324 | spin_unlock_irqrestore(&balloon_lock, flags); |
323 | 325 | ||
324 | return need_sleep; | 326 | return need_sleep; |
325 | } | 327 | } |
326 | 328 | ||
327 | /* | 329 | /* |
328 | * We avoid multiple worker processes conflicting via the balloon mutex. | 330 | * We avoid multiple worker processes conflicting via the balloon mutex. |
329 | * We may of course race updates of the target counts (which are protected | 331 | * We may of course race updates of the target counts (which are protected |
330 | * by the balloon lock), or with changes to the Xen hard limit, but we will | 332 | * by the balloon lock), or with changes to the Xen hard limit, but we will |
331 | * recover from these in time. | 333 | * recover from these in time. |
332 | */ | 334 | */ |
333 | static void balloon_process(struct work_struct *work) | 335 | static void balloon_process(struct work_struct *work) |
334 | { | 336 | { |
335 | int need_sleep = 0; | 337 | int need_sleep = 0; |
336 | long credit; | 338 | long credit; |
337 | 339 | ||
338 | mutex_lock(&balloon_mutex); | 340 | mutex_lock(&balloon_mutex); |
339 | 341 | ||
340 | do { | 342 | do { |
341 | credit = current_target() - balloon_stats.current_pages; | 343 | credit = current_target() - balloon_stats.current_pages; |
342 | if (credit > 0) | 344 | if (credit > 0) |
343 | need_sleep = (increase_reservation(credit) != 0); | 345 | need_sleep = (increase_reservation(credit) != 0); |
344 | if (credit < 0) | 346 | if (credit < 0) |
345 | need_sleep = (decrease_reservation(-credit) != 0); | 347 | need_sleep = (decrease_reservation(-credit) != 0); |
346 | 348 | ||
347 | #ifndef CONFIG_PREEMPT | 349 | #ifndef CONFIG_PREEMPT |
348 | if (need_resched()) | 350 | if (need_resched()) |
349 | schedule(); | 351 | schedule(); |
350 | #endif | 352 | #endif |
351 | } while ((credit != 0) && !need_sleep); | 353 | } while ((credit != 0) && !need_sleep); |
352 | 354 | ||
353 | /* Schedule more work if there is some still to be done. */ | 355 | /* Schedule more work if there is some still to be done. */ |
354 | if (current_target() != balloon_stats.current_pages) | 356 | if (current_target() != balloon_stats.current_pages) |
355 | mod_timer(&balloon_timer, jiffies + HZ); | 357 | mod_timer(&balloon_timer, jiffies + HZ); |
356 | 358 | ||
357 | mutex_unlock(&balloon_mutex); | 359 | mutex_unlock(&balloon_mutex); |
358 | } | 360 | } |
359 | 361 | ||
360 | /* Resets the Xen limit, sets new target, and kicks off processing. */ | 362 | /* Resets the Xen limit, sets new target, and kicks off processing. */ |
361 | static void balloon_set_new_target(unsigned long target) | 363 | static void balloon_set_new_target(unsigned long target) |
362 | { | 364 | { |
363 | /* No need for lock. Not read-modify-write updates. */ | 365 | /* No need for lock. Not read-modify-write updates. */ |
364 | balloon_stats.hard_limit = ~0UL; | 366 | balloon_stats.hard_limit = ~0UL; |
365 | balloon_stats.target_pages = target; | 367 | balloon_stats.target_pages = target; |
366 | schedule_work(&balloon_worker); | 368 | schedule_work(&balloon_worker); |
367 | } | 369 | } |
368 | 370 | ||
369 | static struct xenbus_watch target_watch = | 371 | static struct xenbus_watch target_watch = |
370 | { | 372 | { |
371 | .node = "memory/target" | 373 | .node = "memory/target" |
372 | }; | 374 | }; |
373 | 375 | ||
374 | /* React to a change in the target key */ | 376 | /* React to a change in the target key */ |
375 | static void watch_target(struct xenbus_watch *watch, | 377 | static void watch_target(struct xenbus_watch *watch, |
376 | const char **vec, unsigned int len) | 378 | const char **vec, unsigned int len) |
377 | { | 379 | { |
378 | unsigned long long new_target; | 380 | unsigned long long new_target; |
379 | int err; | 381 | int err; |
380 | 382 | ||
381 | err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); | 383 | err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); |
382 | if (err != 1) { | 384 | if (err != 1) { |
383 | /* This is ok (for domain0 at least) - so just return */ | 385 | /* This is ok (for domain0 at least) - so just return */ |
384 | return; | 386 | return; |
385 | } | 387 | } |
386 | 388 | ||
387 | /* The given memory/target value is in KiB, so it needs converting to | 389 | /* The given memory/target value is in KiB, so it needs converting to |
388 | * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. | 390 | * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. |
389 | */ | 391 | */ |
390 | balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); | 392 | balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); |
391 | } | 393 | } |
392 | 394 | ||
393 | static int balloon_init_watcher(struct notifier_block *notifier, | 395 | static int balloon_init_watcher(struct notifier_block *notifier, |
394 | unsigned long event, | 396 | unsigned long event, |
395 | void *data) | 397 | void *data) |
396 | { | 398 | { |
397 | int err; | 399 | int err; |
398 | 400 | ||
399 | err = register_xenbus_watch(&target_watch); | 401 | err = register_xenbus_watch(&target_watch); |
400 | if (err) | 402 | if (err) |
401 | printk(KERN_ERR "Failed to set balloon watcher\n"); | 403 | printk(KERN_ERR "Failed to set balloon watcher\n"); |
402 | 404 | ||
403 | return NOTIFY_DONE; | 405 | return NOTIFY_DONE; |
404 | } | 406 | } |
405 | 407 | ||
406 | static struct notifier_block xenstore_notifier; | 408 | static struct notifier_block xenstore_notifier; |
407 | 409 | ||
408 | static int __init balloon_init(void) | 410 | static int __init balloon_init(void) |
409 | { | 411 | { |
410 | unsigned long pfn; | 412 | unsigned long pfn; |
411 | struct page *page; | 413 | struct page *page; |
412 | 414 | ||
413 | if (!xen_pv_domain()) | 415 | if (!xen_pv_domain()) |
414 | return -ENODEV; | 416 | return -ENODEV; |
415 | 417 | ||
416 | pr_info("xen_balloon: Initialising balloon driver.\n"); | 418 | pr_info("xen_balloon: Initialising balloon driver.\n"); |
417 | 419 | ||
418 | balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); | 420 | balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); |
419 | totalram_pages = balloon_stats.current_pages; | 421 | totalram_pages = balloon_stats.current_pages; |
420 | balloon_stats.target_pages = balloon_stats.current_pages; | 422 | balloon_stats.target_pages = balloon_stats.current_pages; |
421 | balloon_stats.balloon_low = 0; | 423 | balloon_stats.balloon_low = 0; |
422 | balloon_stats.balloon_high = 0; | 424 | balloon_stats.balloon_high = 0; |
423 | balloon_stats.driver_pages = 0UL; | 425 | balloon_stats.driver_pages = 0UL; |
424 | balloon_stats.hard_limit = ~0UL; | 426 | balloon_stats.hard_limit = ~0UL; |
425 | 427 | ||
426 | init_timer(&balloon_timer); | 428 | init_timer(&balloon_timer); |
427 | balloon_timer.data = 0; | 429 | balloon_timer.data = 0; |
428 | balloon_timer.function = balloon_alarm; | 430 | balloon_timer.function = balloon_alarm; |
429 | 431 | ||
430 | register_balloon(&balloon_sysdev); | 432 | register_balloon(&balloon_sysdev); |
431 | 433 | ||
432 | /* Initialise the balloon with excess memory space. */ | 434 | /* Initialise the balloon with excess memory space. */ |
433 | for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { | 435 | for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { |
434 | page = pfn_to_page(pfn); | 436 | page = pfn_to_page(pfn); |
435 | if (!PageReserved(page)) | 437 | if (!PageReserved(page)) |
436 | balloon_append(page); | 438 | balloon_append(page); |
437 | } | 439 | } |
438 | 440 | ||
439 | target_watch.callback = watch_target; | 441 | target_watch.callback = watch_target; |
440 | xenstore_notifier.notifier_call = balloon_init_watcher; | 442 | xenstore_notifier.notifier_call = balloon_init_watcher; |
441 | 443 | ||
442 | register_xenstore_notifier(&xenstore_notifier); | 444 | register_xenstore_notifier(&xenstore_notifier); |
443 | 445 | ||
444 | return 0; | 446 | return 0; |
445 | } | 447 | } |
446 | 448 | ||
447 | subsys_initcall(balloon_init); | 449 | subsys_initcall(balloon_init); |
448 | 450 | ||
449 | static void balloon_exit(void) | 451 | static void balloon_exit(void) |
450 | { | 452 | { |
451 | /* XXX - release balloon here */ | 453 | /* XXX - release balloon here */ |
452 | return; | 454 | return; |
453 | } | 455 | } |
454 | 456 | ||
455 | module_exit(balloon_exit); | 457 | module_exit(balloon_exit); |
456 | 458 | ||
457 | #define BALLOON_SHOW(name, format, args...) \ | 459 | #define BALLOON_SHOW(name, format, args...) \ |
458 | static ssize_t show_##name(struct sys_device *dev, \ | 460 | static ssize_t show_##name(struct sys_device *dev, \ |
459 | struct sysdev_attribute *attr, \ | 461 | struct sysdev_attribute *attr, \ |
460 | char *buf) \ | 462 | char *buf) \ |
461 | { \ | 463 | { \ |
462 | return sprintf(buf, format, ##args); \ | 464 | return sprintf(buf, format, ##args); \ |
463 | } \ | 465 | } \ |
464 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | 466 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) |
465 | 467 | ||
466 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | 468 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); |
467 | BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); | 469 | BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); |
468 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); | 470 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); |
469 | BALLOON_SHOW(hard_limit_kb, | 471 | BALLOON_SHOW(hard_limit_kb, |
470 | (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n", | 472 | (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n", |
471 | (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); | 473 | (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); |
472 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); | 474 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); |
473 | 475 | ||
474 | static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, | 476 | static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, |
475 | char *buf) | 477 | char *buf) |
476 | { | 478 | { |
477 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | 479 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); |
478 | } | 480 | } |
479 | 481 | ||
480 | static ssize_t store_target_kb(struct sys_device *dev, | 482 | static ssize_t store_target_kb(struct sys_device *dev, |
481 | struct sysdev_attribute *attr, | 483 | struct sysdev_attribute *attr, |
482 | const char *buf, | 484 | const char *buf, |
483 | size_t count) | 485 | size_t count) |
484 | { | 486 | { |
485 | char *endchar; | 487 | char *endchar; |
486 | unsigned long long target_bytes; | 488 | unsigned long long target_bytes; |
487 | 489 | ||
488 | if (!capable(CAP_SYS_ADMIN)) | 490 | if (!capable(CAP_SYS_ADMIN)) |
489 | return -EPERM; | 491 | return -EPERM; |
490 | 492 | ||
491 | target_bytes = memparse(buf, &endchar); | 493 | target_bytes = memparse(buf, &endchar); |
492 | 494 | ||
493 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | 495 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); |
494 | 496 | ||
495 | return count; | 497 | return count; |
496 | } | 498 | } |
497 | 499 | ||
498 | static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, | 500 | static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, |
499 | show_target_kb, store_target_kb); | 501 | show_target_kb, store_target_kb); |
500 | 502 | ||
501 | static struct sysdev_attribute *balloon_attrs[] = { | 503 | static struct sysdev_attribute *balloon_attrs[] = { |
502 | &attr_target_kb, | 504 | &attr_target_kb, |
503 | }; | 505 | }; |
504 | 506 | ||
505 | static struct attribute *balloon_info_attrs[] = { | 507 | static struct attribute *balloon_info_attrs[] = { |
506 | &attr_current_kb.attr, | 508 | &attr_current_kb.attr, |
507 | &attr_low_kb.attr, | 509 | &attr_low_kb.attr, |
508 | &attr_high_kb.attr, | 510 | &attr_high_kb.attr, |
509 | &attr_hard_limit_kb.attr, | 511 | &attr_hard_limit_kb.attr, |
510 | &attr_driver_kb.attr, | 512 | &attr_driver_kb.attr, |
511 | NULL | 513 | NULL |
512 | }; | 514 | }; |
513 | 515 | ||
514 | static struct attribute_group balloon_info_group = { | 516 | static struct attribute_group balloon_info_group = { |
515 | .name = "info", | 517 | .name = "info", |
516 | .attrs = balloon_info_attrs, | 518 | .attrs = balloon_info_attrs, |
517 | }; | 519 | }; |
518 | 520 | ||
519 | static struct sysdev_class balloon_sysdev_class = { | 521 | static struct sysdev_class balloon_sysdev_class = { |
520 | .name = BALLOON_CLASS_NAME, | 522 | .name = BALLOON_CLASS_NAME, |
521 | }; | 523 | }; |
522 | 524 | ||
523 | static int register_balloon(struct sys_device *sysdev) | 525 | static int register_balloon(struct sys_device *sysdev) |
524 | { | 526 | { |
525 | int i, error; | 527 | int i, error; |
526 | 528 | ||
527 | error = sysdev_class_register(&balloon_sysdev_class); | 529 | error = sysdev_class_register(&balloon_sysdev_class); |
528 | if (error) | 530 | if (error) |
529 | return error; | 531 | return error; |
530 | 532 | ||
531 | sysdev->id = 0; | 533 | sysdev->id = 0; |
532 | sysdev->cls = &balloon_sysdev_class; | 534 | sysdev->cls = &balloon_sysdev_class; |
533 | 535 | ||
534 | error = sysdev_register(sysdev); | 536 | error = sysdev_register(sysdev); |
535 | if (error) { | 537 | if (error) { |
536 | sysdev_class_unregister(&balloon_sysdev_class); | 538 | sysdev_class_unregister(&balloon_sysdev_class); |
537 | return error; | 539 | return error; |
538 | } | 540 | } |
539 | 541 | ||
540 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { | 542 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { |
541 | error = sysdev_create_file(sysdev, balloon_attrs[i]); | 543 | error = sysdev_create_file(sysdev, balloon_attrs[i]); |
542 | if (error) | 544 | if (error) |
543 | goto fail; | 545 | goto fail; |
544 | } | 546 | } |
545 | 547 | ||
546 | error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); | 548 | error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); |
547 | if (error) | 549 | if (error) |
548 | goto fail; | 550 | goto fail; |
549 | 551 | ||
550 | return 0; | 552 | return 0; |
551 | 553 | ||
552 | fail: | 554 | fail: |
553 | while (--i >= 0) | 555 | while (--i >= 0) |
554 | sysdev_remove_file(sysdev, balloon_attrs[i]); | 556 | sysdev_remove_file(sysdev, balloon_attrs[i]); |
555 | sysdev_unregister(sysdev); | 557 | sysdev_unregister(sysdev); |
556 | sysdev_class_unregister(&balloon_sysdev_class); | 558 | sysdev_class_unregister(&balloon_sysdev_class); |
557 | return error; | 559 | return error; |
558 | } | 560 | } |
559 | 561 | ||
560 | MODULE_LICENSE("GPL"); | 562 | MODULE_LICENSE("GPL"); |
drivers/xen/features.c
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * features.c | 2 | * features.c |
3 | * | 3 | * |
4 | * Xen feature flags. | 4 | * Xen feature flags. |
5 | * | 5 | * |
6 | * Copyright (c) 2006, Ian Campbell, XenSource Inc. | 6 | * Copyright (c) 2006, Ian Campbell, XenSource Inc. |
7 | */ | 7 | */ |
8 | #include <linux/types.h> | 8 | #include <linux/types.h> |
9 | #include <linux/cache.h> | 9 | #include <linux/cache.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <asm/xen/hypervisor.h> | 11 | |
12 | #include <asm/xen/hypercall.h> | ||
13 | |||
14 | #include <xen/interface/xen.h> | ||
15 | #include <xen/interface/version.h> | ||
12 | #include <xen/features.h> | 16 | #include <xen/features.h> |
13 | 17 | ||
14 | u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; | 18 | u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; |
15 | EXPORT_SYMBOL_GPL(xen_features); | 19 | EXPORT_SYMBOL_GPL(xen_features); |
16 | 20 | ||
17 | void xen_setup_features(void) | 21 | void xen_setup_features(void) |
18 | { | 22 | { |
19 | struct xen_feature_info fi; | 23 | struct xen_feature_info fi; |
20 | int i, j; | 24 | int i, j; |
21 | 25 | ||
22 | for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { | 26 | for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { |
23 | fi.submap_idx = i; | 27 | fi.submap_idx = i; |
24 | if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) | 28 | if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) |
25 | break; | 29 | break; |
26 | for (j = 0; j < 32; j++) | 30 | for (j = 0; j < 32; j++) |
27 | xen_features[i * 32 + j] = !!(fi.submap & 1<<j); | 31 | xen_features[i * 32 + j] = !!(fi.submap & 1<<j); |
28 | } | 32 | } |
29 | } | 33 | } |
30 | 34 |
drivers/xen/grant-table.c
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * grant_table.c | 2 | * grant_table.c |
3 | * | 3 | * |
4 | * Granting foreign access to our memory reservation. | 4 | * Granting foreign access to our memory reservation. |
5 | * | 5 | * |
6 | * Copyright (c) 2005-2006, Christopher Clark | 6 | * Copyright (c) 2005-2006, Christopher Clark |
7 | * Copyright (c) 2004-2005, K A Fraser | 7 | * Copyright (c) 2004-2005, K A Fraser |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or | 9 | * This program is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU General Public License version 2 | 10 | * modify it under the terms of the GNU General Public License version 2 |
11 | * as published by the Free Software Foundation; or, when distributed | 11 | * as published by the Free Software Foundation; or, when distributed |
12 | * separately from the Linux kernel or incorporated into other | 12 | * separately from the Linux kernel or incorporated into other |
13 | * software packages, subject to the following license: | 13 | * software packages, subject to the following license: |
14 | * | 14 | * |
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 15 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
16 | * of this source file (the "Software"), to deal in the Software without | 16 | * of this source file (the "Software"), to deal in the Software without |
17 | * restriction, including without limitation the rights to use, copy, modify, | 17 | * restriction, including without limitation the rights to use, copy, modify, |
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, |
19 | * and to permit persons to whom the Software is furnished to do so, subject to | 19 | * and to permit persons to whom the Software is furnished to do so, subject to |
20 | * the following conditions: | 20 | * the following conditions: |
21 | * | 21 | * |
22 | * The above copyright notice and this permission notice shall be included in | 22 | * The above copyright notice and this permission notice shall be included in |
23 | * all copies or substantial portions of the Software. | 23 | * all copies or substantial portions of the Software. |
24 | * | 24 | * |
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
31 | * IN THE SOFTWARE. | 31 | * IN THE SOFTWARE. |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <linux/module.h> | 34 | #include <linux/module.h> |
35 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
36 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
37 | #include <linux/vmalloc.h> | 37 | #include <linux/vmalloc.h> |
38 | #include <linux/uaccess.h> | 38 | #include <linux/uaccess.h> |
39 | 39 | ||
40 | #include <xen/interface/xen.h> | 40 | #include <xen/interface/xen.h> |
41 | #include <xen/page.h> | 41 | #include <xen/page.h> |
42 | #include <xen/grant_table.h> | 42 | #include <xen/grant_table.h> |
43 | #include <asm/xen/hypercall.h> | ||
43 | 44 | ||
44 | #include <asm/pgtable.h> | 45 | #include <asm/pgtable.h> |
45 | #include <asm/sync_bitops.h> | 46 | #include <asm/sync_bitops.h> |
46 | 47 | ||
47 | 48 | ||
48 | /* External tools reserve first few grant table entries. */ | 49 | /* External tools reserve first few grant table entries. */ |
49 | #define NR_RESERVED_ENTRIES 8 | 50 | #define NR_RESERVED_ENTRIES 8 |
50 | #define GNTTAB_LIST_END 0xffffffff | 51 | #define GNTTAB_LIST_END 0xffffffff |
51 | #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry)) | 52 | #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry)) |
52 | 53 | ||
53 | static grant_ref_t **gnttab_list; | 54 | static grant_ref_t **gnttab_list; |
54 | static unsigned int nr_grant_frames; | 55 | static unsigned int nr_grant_frames; |
55 | static unsigned int boot_max_nr_grant_frames; | 56 | static unsigned int boot_max_nr_grant_frames; |
56 | static int gnttab_free_count; | 57 | static int gnttab_free_count; |
57 | static grant_ref_t gnttab_free_head; | 58 | static grant_ref_t gnttab_free_head; |
58 | static DEFINE_SPINLOCK(gnttab_list_lock); | 59 | static DEFINE_SPINLOCK(gnttab_list_lock); |
59 | 60 | ||
60 | static struct grant_entry *shared; | 61 | static struct grant_entry *shared; |
61 | 62 | ||
62 | static struct gnttab_free_callback *gnttab_free_callback_list; | 63 | static struct gnttab_free_callback *gnttab_free_callback_list; |
63 | 64 | ||
64 | static int gnttab_expand(unsigned int req_entries); | 65 | static int gnttab_expand(unsigned int req_entries); |
65 | 66 | ||
66 | #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) | 67 | #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) |
67 | 68 | ||
68 | static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) | 69 | static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) |
69 | { | 70 | { |
70 | return &gnttab_list[(entry) / RPP][(entry) % RPP]; | 71 | return &gnttab_list[(entry) / RPP][(entry) % RPP]; |
71 | } | 72 | } |
72 | /* This can be used as an l-value */ | 73 | /* This can be used as an l-value */ |
73 | #define gnttab_entry(entry) (*__gnttab_entry(entry)) | 74 | #define gnttab_entry(entry) (*__gnttab_entry(entry)) |
74 | 75 | ||
75 | static int get_free_entries(unsigned count) | 76 | static int get_free_entries(unsigned count) |
76 | { | 77 | { |
77 | unsigned long flags; | 78 | unsigned long flags; |
78 | int ref, rc; | 79 | int ref, rc; |
79 | grant_ref_t head; | 80 | grant_ref_t head; |
80 | 81 | ||
81 | spin_lock_irqsave(&gnttab_list_lock, flags); | 82 | spin_lock_irqsave(&gnttab_list_lock, flags); |
82 | 83 | ||
83 | if ((gnttab_free_count < count) && | 84 | if ((gnttab_free_count < count) && |
84 | ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { | 85 | ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { |
85 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | 86 | spin_unlock_irqrestore(&gnttab_list_lock, flags); |
86 | return rc; | 87 | return rc; |
87 | } | 88 | } |
88 | 89 | ||
89 | ref = head = gnttab_free_head; | 90 | ref = head = gnttab_free_head; |
90 | gnttab_free_count -= count; | 91 | gnttab_free_count -= count; |
91 | while (count-- > 1) | 92 | while (count-- > 1) |
92 | head = gnttab_entry(head); | 93 | head = gnttab_entry(head); |
93 | gnttab_free_head = gnttab_entry(head); | 94 | gnttab_free_head = gnttab_entry(head); |
94 | gnttab_entry(head) = GNTTAB_LIST_END; | 95 | gnttab_entry(head) = GNTTAB_LIST_END; |
95 | 96 | ||
96 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | 97 | spin_unlock_irqrestore(&gnttab_list_lock, flags); |
97 | 98 | ||
98 | return ref; | 99 | return ref; |
99 | } | 100 | } |
100 | 101 | ||
101 | static void do_free_callbacks(void) | 102 | static void do_free_callbacks(void) |
102 | { | 103 | { |
103 | struct gnttab_free_callback *callback, *next; | 104 | struct gnttab_free_callback *callback, *next; |
104 | 105 | ||
105 | callback = gnttab_free_callback_list; | 106 | callback = gnttab_free_callback_list; |
106 | gnttab_free_callback_list = NULL; | 107 | gnttab_free_callback_list = NULL; |
107 | 108 | ||
108 | while (callback != NULL) { | 109 | while (callback != NULL) { |
109 | next = callback->next; | 110 | next = callback->next; |
110 | if (gnttab_free_count >= callback->count) { | 111 | if (gnttab_free_count >= callback->count) { |
111 | callback->next = NULL; | 112 | callback->next = NULL; |
112 | callback->fn(callback->arg); | 113 | callback->fn(callback->arg); |
113 | } else { | 114 | } else { |
114 | callback->next = gnttab_free_callback_list; | 115 | callback->next = gnttab_free_callback_list; |
115 | gnttab_free_callback_list = callback; | 116 | gnttab_free_callback_list = callback; |
116 | } | 117 | } |
117 | callback = next; | 118 | callback = next; |
118 | } | 119 | } |
119 | } | 120 | } |
120 | 121 | ||
121 | static inline void check_free_callbacks(void) | 122 | static inline void check_free_callbacks(void) |
122 | { | 123 | { |
123 | if (unlikely(gnttab_free_callback_list)) | 124 | if (unlikely(gnttab_free_callback_list)) |
124 | do_free_callbacks(); | 125 | do_free_callbacks(); |
125 | } | 126 | } |
126 | 127 | ||
127 | static void put_free_entry(grant_ref_t ref) | 128 | static void put_free_entry(grant_ref_t ref) |
128 | { | 129 | { |
129 | unsigned long flags; | 130 | unsigned long flags; |
130 | spin_lock_irqsave(&gnttab_list_lock, flags); | 131 | spin_lock_irqsave(&gnttab_list_lock, flags); |
131 | gnttab_entry(ref) = gnttab_free_head; | 132 | gnttab_entry(ref) = gnttab_free_head; |
132 | gnttab_free_head = ref; | 133 | gnttab_free_head = ref; |
133 | gnttab_free_count++; | 134 | gnttab_free_count++; |
134 | check_free_callbacks(); | 135 | check_free_callbacks(); |
135 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | 136 | spin_unlock_irqrestore(&gnttab_list_lock, flags); |
136 | } | 137 | } |
137 | 138 | ||
138 | static void update_grant_entry(grant_ref_t ref, domid_t domid, | 139 | static void update_grant_entry(grant_ref_t ref, domid_t domid, |
139 | unsigned long frame, unsigned flags) | 140 | unsigned long frame, unsigned flags) |
140 | { | 141 | { |
141 | /* | 142 | /* |
142 | * Introducing a valid entry into the grant table: | 143 | * Introducing a valid entry into the grant table: |
143 | * 1. Write ent->domid. | 144 | * 1. Write ent->domid. |
144 | * 2. Write ent->frame: | 145 | * 2. Write ent->frame: |
145 | * GTF_permit_access: Frame to which access is permitted. | 146 | * GTF_permit_access: Frame to which access is permitted. |
146 | * GTF_accept_transfer: Pseudo-phys frame slot being filled by new | 147 | * GTF_accept_transfer: Pseudo-phys frame slot being filled by new |
147 | * frame, or zero if none. | 148 | * frame, or zero if none. |
148 | * 3. Write memory barrier (WMB). | 149 | * 3. Write memory barrier (WMB). |
149 | * 4. Write ent->flags, inc. valid type. | 150 | * 4. Write ent->flags, inc. valid type. |
150 | */ | 151 | */ |
151 | shared[ref].frame = frame; | 152 | shared[ref].frame = frame; |
152 | shared[ref].domid = domid; | 153 | shared[ref].domid = domid; |
153 | wmb(); | 154 | wmb(); |
154 | shared[ref].flags = flags; | 155 | shared[ref].flags = flags; |
155 | } | 156 | } |
156 | 157 | ||
157 | /* | 158 | /* |
158 | * Public grant-issuing interface functions | 159 | * Public grant-issuing interface functions |
159 | */ | 160 | */ |
160 | void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, | 161 | void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, |
161 | unsigned long frame, int readonly) | 162 | unsigned long frame, int readonly) |
162 | { | 163 | { |
163 | update_grant_entry(ref, domid, frame, | 164 | update_grant_entry(ref, domid, frame, |
164 | GTF_permit_access | (readonly ? GTF_readonly : 0)); | 165 | GTF_permit_access | (readonly ? GTF_readonly : 0)); |
165 | } | 166 | } |
166 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); | 167 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); |
167 | 168 | ||
168 | int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, | 169 | int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, |
169 | int readonly) | 170 | int readonly) |
170 | { | 171 | { |
171 | int ref; | 172 | int ref; |
172 | 173 | ||
173 | ref = get_free_entries(1); | 174 | ref = get_free_entries(1); |
174 | if (unlikely(ref < 0)) | 175 | if (unlikely(ref < 0)) |
175 | return -ENOSPC; | 176 | return -ENOSPC; |
176 | 177 | ||
177 | gnttab_grant_foreign_access_ref(ref, domid, frame, readonly); | 178 | gnttab_grant_foreign_access_ref(ref, domid, frame, readonly); |
178 | 179 | ||
179 | return ref; | 180 | return ref; |
180 | } | 181 | } |
181 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); | 182 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); |
182 | 183 | ||
183 | int gnttab_query_foreign_access(grant_ref_t ref) | 184 | int gnttab_query_foreign_access(grant_ref_t ref) |
184 | { | 185 | { |
185 | u16 nflags; | 186 | u16 nflags; |
186 | 187 | ||
187 | nflags = shared[ref].flags; | 188 | nflags = shared[ref].flags; |
188 | 189 | ||
189 | return (nflags & (GTF_reading|GTF_writing)); | 190 | return (nflags & (GTF_reading|GTF_writing)); |
190 | } | 191 | } |
191 | EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); | 192 | EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); |
192 | 193 | ||
193 | int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) | 194 | int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) |
194 | { | 195 | { |
195 | u16 flags, nflags; | 196 | u16 flags, nflags; |
196 | 197 | ||
197 | nflags = shared[ref].flags; | 198 | nflags = shared[ref].flags; |
198 | do { | 199 | do { |
199 | flags = nflags; | 200 | flags = nflags; |
200 | if (flags & (GTF_reading|GTF_writing)) { | 201 | if (flags & (GTF_reading|GTF_writing)) { |
201 | printk(KERN_ALERT "WARNING: g.e. still in use!\n"); | 202 | printk(KERN_ALERT "WARNING: g.e. still in use!\n"); |
202 | return 0; | 203 | return 0; |
203 | } | 204 | } |
204 | } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags); | 205 | } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags); |
205 | 206 | ||
206 | return 1; | 207 | return 1; |
207 | } | 208 | } |
208 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); | 209 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); |
209 | 210 | ||
210 | void gnttab_end_foreign_access(grant_ref_t ref, int readonly, | 211 | void gnttab_end_foreign_access(grant_ref_t ref, int readonly, |
211 | unsigned long page) | 212 | unsigned long page) |
212 | { | 213 | { |
213 | if (gnttab_end_foreign_access_ref(ref, readonly)) { | 214 | if (gnttab_end_foreign_access_ref(ref, readonly)) { |
214 | put_free_entry(ref); | 215 | put_free_entry(ref); |
215 | if (page != 0) | 216 | if (page != 0) |
216 | free_page(page); | 217 | free_page(page); |
217 | } else { | 218 | } else { |
218 | /* XXX This needs to be fixed so that the ref and page are | 219 | /* XXX This needs to be fixed so that the ref and page are |
219 | placed on a list to be freed up later. */ | 220 | placed on a list to be freed up later. */ |
220 | printk(KERN_WARNING | 221 | printk(KERN_WARNING |
221 | "WARNING: leaking g.e. and page still in use!\n"); | 222 | "WARNING: leaking g.e. and page still in use!\n"); |
222 | } | 223 | } |
223 | } | 224 | } |
224 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); | 225 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); |
225 | 226 | ||
226 | int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) | 227 | int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) |
227 | { | 228 | { |
228 | int ref; | 229 | int ref; |
229 | 230 | ||
230 | ref = get_free_entries(1); | 231 | ref = get_free_entries(1); |
231 | if (unlikely(ref < 0)) | 232 | if (unlikely(ref < 0)) |
232 | return -ENOSPC; | 233 | return -ENOSPC; |
233 | gnttab_grant_foreign_transfer_ref(ref, domid, pfn); | 234 | gnttab_grant_foreign_transfer_ref(ref, domid, pfn); |
234 | 235 | ||
235 | return ref; | 236 | return ref; |
236 | } | 237 | } |
237 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); | 238 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); |
238 | 239 | ||
239 | void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, | 240 | void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, |
240 | unsigned long pfn) | 241 | unsigned long pfn) |
241 | { | 242 | { |
242 | update_grant_entry(ref, domid, pfn, GTF_accept_transfer); | 243 | update_grant_entry(ref, domid, pfn, GTF_accept_transfer); |
243 | } | 244 | } |
244 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); | 245 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); |
245 | 246 | ||
246 | unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) | 247 | unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) |
247 | { | 248 | { |
248 | unsigned long frame; | 249 | unsigned long frame; |
249 | u16 flags; | 250 | u16 flags; |
250 | 251 | ||
251 | /* | 252 | /* |
252 | * If a transfer is not even yet started, try to reclaim the grant | 253 | * If a transfer is not even yet started, try to reclaim the grant |
253 | * reference and return failure (== 0). | 254 | * reference and return failure (== 0). |
254 | */ | 255 | */ |
255 | while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { | 256 | while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { |
256 | if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags) | 257 | if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags) |
257 | return 0; | 258 | return 0; |
258 | cpu_relax(); | 259 | cpu_relax(); |
259 | } | 260 | } |
260 | 261 | ||
261 | /* If a transfer is in progress then wait until it is completed. */ | 262 | /* If a transfer is in progress then wait until it is completed. */ |
262 | while (!(flags & GTF_transfer_completed)) { | 263 | while (!(flags & GTF_transfer_completed)) { |
263 | flags = shared[ref].flags; | 264 | flags = shared[ref].flags; |
264 | cpu_relax(); | 265 | cpu_relax(); |
265 | } | 266 | } |
266 | 267 | ||
267 | rmb(); /* Read the frame number /after/ reading completion status. */ | 268 | rmb(); /* Read the frame number /after/ reading completion status. */ |
268 | frame = shared[ref].frame; | 269 | frame = shared[ref].frame; |
269 | BUG_ON(frame == 0); | 270 | BUG_ON(frame == 0); |
270 | 271 | ||
271 | return frame; | 272 | return frame; |
272 | } | 273 | } |
273 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); | 274 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); |
274 | 275 | ||
275 | unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) | 276 | unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) |
276 | { | 277 | { |
277 | unsigned long frame = gnttab_end_foreign_transfer_ref(ref); | 278 | unsigned long frame = gnttab_end_foreign_transfer_ref(ref); |
278 | put_free_entry(ref); | 279 | put_free_entry(ref); |
279 | return frame; | 280 | return frame; |
280 | } | 281 | } |
281 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer); | 282 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer); |
282 | 283 | ||
283 | void gnttab_free_grant_reference(grant_ref_t ref) | 284 | void gnttab_free_grant_reference(grant_ref_t ref) |
284 | { | 285 | { |
285 | put_free_entry(ref); | 286 | put_free_entry(ref); |
286 | } | 287 | } |
287 | EXPORT_SYMBOL_GPL(gnttab_free_grant_reference); | 288 | EXPORT_SYMBOL_GPL(gnttab_free_grant_reference); |
288 | 289 | ||
289 | void gnttab_free_grant_references(grant_ref_t head) | 290 | void gnttab_free_grant_references(grant_ref_t head) |
290 | { | 291 | { |
291 | grant_ref_t ref; | 292 | grant_ref_t ref; |
292 | unsigned long flags; | 293 | unsigned long flags; |
293 | int count = 1; | 294 | int count = 1; |
294 | if (head == GNTTAB_LIST_END) | 295 | if (head == GNTTAB_LIST_END) |
295 | return; | 296 | return; |
296 | spin_lock_irqsave(&gnttab_list_lock, flags); | 297 | spin_lock_irqsave(&gnttab_list_lock, flags); |
297 | ref = head; | 298 | ref = head; |
298 | while (gnttab_entry(ref) != GNTTAB_LIST_END) { | 299 | while (gnttab_entry(ref) != GNTTAB_LIST_END) { |
299 | ref = gnttab_entry(ref); | 300 | ref = gnttab_entry(ref); |
300 | count++; | 301 | count++; |
301 | } | 302 | } |
302 | gnttab_entry(ref) = gnttab_free_head; | 303 | gnttab_entry(ref) = gnttab_free_head; |
303 | gnttab_free_head = head; | 304 | gnttab_free_head = head; |
304 | gnttab_free_count += count; | 305 | gnttab_free_count += count; |
305 | check_free_callbacks(); | 306 | check_free_callbacks(); |
306 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | 307 | spin_unlock_irqrestore(&gnttab_list_lock, flags); |
307 | } | 308 | } |
308 | EXPORT_SYMBOL_GPL(gnttab_free_grant_references); | 309 | EXPORT_SYMBOL_GPL(gnttab_free_grant_references); |
309 | 310 | ||
310 | int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) | 311 | int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) |
311 | { | 312 | { |
312 | int h = get_free_entries(count); | 313 | int h = get_free_entries(count); |
313 | 314 | ||
314 | if (h < 0) | 315 | if (h < 0) |
315 | return -ENOSPC; | 316 | return -ENOSPC; |
316 | 317 | ||
317 | *head = h; | 318 | *head = h; |
318 | 319 | ||
319 | return 0; | 320 | return 0; |
320 | } | 321 | } |
321 | EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); | 322 | EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); |
322 | 323 | ||
323 | int gnttab_empty_grant_references(const grant_ref_t *private_head) | 324 | int gnttab_empty_grant_references(const grant_ref_t *private_head) |
324 | { | 325 | { |
325 | return (*private_head == GNTTAB_LIST_END); | 326 | return (*private_head == GNTTAB_LIST_END); |
326 | } | 327 | } |
327 | EXPORT_SYMBOL_GPL(gnttab_empty_grant_references); | 328 | EXPORT_SYMBOL_GPL(gnttab_empty_grant_references); |
328 | 329 | ||
329 | int gnttab_claim_grant_reference(grant_ref_t *private_head) | 330 | int gnttab_claim_grant_reference(grant_ref_t *private_head) |
330 | { | 331 | { |
331 | grant_ref_t g = *private_head; | 332 | grant_ref_t g = *private_head; |
332 | if (unlikely(g == GNTTAB_LIST_END)) | 333 | if (unlikely(g == GNTTAB_LIST_END)) |
333 | return -ENOSPC; | 334 | return -ENOSPC; |
334 | *private_head = gnttab_entry(g); | 335 | *private_head = gnttab_entry(g); |
335 | return g; | 336 | return g; |
336 | } | 337 | } |
337 | EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); | 338 | EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); |
338 | 339 | ||
339 | void gnttab_release_grant_reference(grant_ref_t *private_head, | 340 | void gnttab_release_grant_reference(grant_ref_t *private_head, |
340 | grant_ref_t release) | 341 | grant_ref_t release) |
341 | { | 342 | { |
342 | gnttab_entry(release) = *private_head; | 343 | gnttab_entry(release) = *private_head; |
343 | *private_head = release; | 344 | *private_head = release; |
344 | } | 345 | } |
345 | EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); | 346 | EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); |
346 | 347 | ||
347 | void gnttab_request_free_callback(struct gnttab_free_callback *callback, | 348 | void gnttab_request_free_callback(struct gnttab_free_callback *callback, |
348 | void (*fn)(void *), void *arg, u16 count) | 349 | void (*fn)(void *), void *arg, u16 count) |
349 | { | 350 | { |
350 | unsigned long flags; | 351 | unsigned long flags; |
351 | spin_lock_irqsave(&gnttab_list_lock, flags); | 352 | spin_lock_irqsave(&gnttab_list_lock, flags); |
352 | if (callback->next) | 353 | if (callback->next) |
353 | goto out; | 354 | goto out; |
354 | callback->fn = fn; | 355 | callback->fn = fn; |
355 | callback->arg = arg; | 356 | callback->arg = arg; |
356 | callback->count = count; | 357 | callback->count = count; |
357 | callback->next = gnttab_free_callback_list; | 358 | callback->next = gnttab_free_callback_list; |
358 | gnttab_free_callback_list = callback; | 359 | gnttab_free_callback_list = callback; |
359 | check_free_callbacks(); | 360 | check_free_callbacks(); |
360 | out: | 361 | out: |
361 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | 362 | spin_unlock_irqrestore(&gnttab_list_lock, flags); |
362 | } | 363 | } |
363 | EXPORT_SYMBOL_GPL(gnttab_request_free_callback); | 364 | EXPORT_SYMBOL_GPL(gnttab_request_free_callback); |
364 | 365 | ||
365 | void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) | 366 | void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) |
366 | { | 367 | { |
367 | struct gnttab_free_callback **pcb; | 368 | struct gnttab_free_callback **pcb; |
368 | unsigned long flags; | 369 | unsigned long flags; |
369 | 370 | ||
370 | spin_lock_irqsave(&gnttab_list_lock, flags); | 371 | spin_lock_irqsave(&gnttab_list_lock, flags); |
371 | for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { | 372 | for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { |
372 | if (*pcb == callback) { | 373 | if (*pcb == callback) { |
373 | *pcb = callback->next; | 374 | *pcb = callback->next; |
374 | break; | 375 | break; |
375 | } | 376 | } |
376 | } | 377 | } |
377 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | 378 | spin_unlock_irqrestore(&gnttab_list_lock, flags); |
378 | } | 379 | } |
379 | EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback); | 380 | EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback); |
380 | 381 | ||
381 | static int grow_gnttab_list(unsigned int more_frames) | 382 | static int grow_gnttab_list(unsigned int more_frames) |
382 | { | 383 | { |
383 | unsigned int new_nr_grant_frames, extra_entries, i; | 384 | unsigned int new_nr_grant_frames, extra_entries, i; |
384 | unsigned int nr_glist_frames, new_nr_glist_frames; | 385 | unsigned int nr_glist_frames, new_nr_glist_frames; |
385 | 386 | ||
386 | new_nr_grant_frames = nr_grant_frames + more_frames; | 387 | new_nr_grant_frames = nr_grant_frames + more_frames; |
387 | extra_entries = more_frames * GREFS_PER_GRANT_FRAME; | 388 | extra_entries = more_frames * GREFS_PER_GRANT_FRAME; |
388 | 389 | ||
389 | nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; | 390 | nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; |
390 | new_nr_glist_frames = | 391 | new_nr_glist_frames = |
391 | (new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; | 392 | (new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; |
392 | for (i = nr_glist_frames; i < new_nr_glist_frames; i++) { | 393 | for (i = nr_glist_frames; i < new_nr_glist_frames; i++) { |
393 | gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); | 394 | gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); |
394 | if (!gnttab_list[i]) | 395 | if (!gnttab_list[i]) |
395 | goto grow_nomem; | 396 | goto grow_nomem; |
396 | } | 397 | } |
397 | 398 | ||
398 | 399 | ||
399 | for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; | 400 | for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; |
400 | i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) | 401 | i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) |
401 | gnttab_entry(i) = i + 1; | 402 | gnttab_entry(i) = i + 1; |
402 | 403 | ||
403 | gnttab_entry(i) = gnttab_free_head; | 404 | gnttab_entry(i) = gnttab_free_head; |
404 | gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; | 405 | gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; |
405 | gnttab_free_count += extra_entries; | 406 | gnttab_free_count += extra_entries; |
406 | 407 | ||
407 | nr_grant_frames = new_nr_grant_frames; | 408 | nr_grant_frames = new_nr_grant_frames; |
408 | 409 | ||
409 | check_free_callbacks(); | 410 | check_free_callbacks(); |
410 | 411 | ||
411 | return 0; | 412 | return 0; |
412 | 413 | ||
413 | grow_nomem: | 414 | grow_nomem: |
414 | for ( ; i >= nr_glist_frames; i--) | 415 | for ( ; i >= nr_glist_frames; i--) |
415 | free_page((unsigned long) gnttab_list[i]); | 416 | free_page((unsigned long) gnttab_list[i]); |
416 | return -ENOMEM; | 417 | return -ENOMEM; |
417 | } | 418 | } |
418 | 419 | ||
419 | static unsigned int __max_nr_grant_frames(void) | 420 | static unsigned int __max_nr_grant_frames(void) |
420 | { | 421 | { |
421 | struct gnttab_query_size query; | 422 | struct gnttab_query_size query; |
422 | int rc; | 423 | int rc; |
423 | 424 | ||
424 | query.dom = DOMID_SELF; | 425 | query.dom = DOMID_SELF; |
425 | 426 | ||
426 | rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); | 427 | rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); |
427 | if ((rc < 0) || (query.status != GNTST_okay)) | 428 | if ((rc < 0) || (query.status != GNTST_okay)) |
428 | return 4; /* Legacy max supported number of frames */ | 429 | return 4; /* Legacy max supported number of frames */ |
429 | 430 | ||
430 | return query.max_nr_frames; | 431 | return query.max_nr_frames; |
431 | } | 432 | } |
432 | 433 | ||
433 | static inline unsigned int max_nr_grant_frames(void) | 434 | static inline unsigned int max_nr_grant_frames(void) |
434 | { | 435 | { |
435 | unsigned int xen_max = __max_nr_grant_frames(); | 436 | unsigned int xen_max = __max_nr_grant_frames(); |
436 | 437 | ||
437 | if (xen_max > boot_max_nr_grant_frames) | 438 | if (xen_max > boot_max_nr_grant_frames) |
438 | return boot_max_nr_grant_frames; | 439 | return boot_max_nr_grant_frames; |
439 | return xen_max; | 440 | return xen_max; |
440 | } | 441 | } |
441 | 442 | ||
442 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | 443 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) |
443 | { | 444 | { |
444 | struct gnttab_setup_table setup; | 445 | struct gnttab_setup_table setup; |
445 | unsigned long *frames; | 446 | unsigned long *frames; |
446 | unsigned int nr_gframes = end_idx + 1; | 447 | unsigned int nr_gframes = end_idx + 1; |
447 | int rc; | 448 | int rc; |
448 | 449 | ||
449 | frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); | 450 | frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); |
450 | if (!frames) | 451 | if (!frames) |
451 | return -ENOMEM; | 452 | return -ENOMEM; |
452 | 453 | ||
453 | setup.dom = DOMID_SELF; | 454 | setup.dom = DOMID_SELF; |
454 | setup.nr_frames = nr_gframes; | 455 | setup.nr_frames = nr_gframes; |
455 | set_xen_guest_handle(setup.frame_list, frames); | 456 | set_xen_guest_handle(setup.frame_list, frames); |
456 | 457 | ||
457 | rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); | 458 | rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); |
458 | if (rc == -ENOSYS) { | 459 | if (rc == -ENOSYS) { |
459 | kfree(frames); | 460 | kfree(frames); |
460 | return -ENOSYS; | 461 | return -ENOSYS; |
461 | } | 462 | } |
462 | 463 | ||
463 | BUG_ON(rc || setup.status); | 464 | BUG_ON(rc || setup.status); |
464 | 465 | ||
465 | rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(), | 466 | rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(), |
466 | &shared); | 467 | &shared); |
467 | BUG_ON(rc); | 468 | BUG_ON(rc); |
468 | 469 | ||
469 | kfree(frames); | 470 | kfree(frames); |
470 | 471 | ||
471 | return 0; | 472 | return 0; |
472 | } | 473 | } |
473 | 474 | ||
474 | int gnttab_resume(void) | 475 | int gnttab_resume(void) |
475 | { | 476 | { |
476 | if (max_nr_grant_frames() < nr_grant_frames) | 477 | if (max_nr_grant_frames() < nr_grant_frames) |
477 | return -ENOSYS; | 478 | return -ENOSYS; |
478 | return gnttab_map(0, nr_grant_frames - 1); | 479 | return gnttab_map(0, nr_grant_frames - 1); |
479 | } | 480 | } |
480 | 481 | ||
481 | int gnttab_suspend(void) | 482 | int gnttab_suspend(void) |
482 | { | 483 | { |
483 | arch_gnttab_unmap_shared(shared, nr_grant_frames); | 484 | arch_gnttab_unmap_shared(shared, nr_grant_frames); |
484 | return 0; | 485 | return 0; |
485 | } | 486 | } |
486 | 487 | ||
487 | static int gnttab_expand(unsigned int req_entries) | 488 | static int gnttab_expand(unsigned int req_entries) |
488 | { | 489 | { |
489 | int rc; | 490 | int rc; |
490 | unsigned int cur, extra; | 491 | unsigned int cur, extra; |
491 | 492 | ||
492 | cur = nr_grant_frames; | 493 | cur = nr_grant_frames; |
493 | extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / | 494 | extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / |
494 | GREFS_PER_GRANT_FRAME); | 495 | GREFS_PER_GRANT_FRAME); |
495 | if (cur + extra > max_nr_grant_frames()) | 496 | if (cur + extra > max_nr_grant_frames()) |
496 | return -ENOSPC; | 497 | return -ENOSPC; |
497 | 498 | ||
498 | rc = gnttab_map(cur, cur + extra - 1); | 499 | rc = gnttab_map(cur, cur + extra - 1); |
499 | if (rc == 0) | 500 | if (rc == 0) |
500 | rc = grow_gnttab_list(extra); | 501 | rc = grow_gnttab_list(extra); |
501 | 502 | ||
502 | return rc; | 503 | return rc; |
503 | } | 504 | } |
504 | 505 | ||
505 | static int __devinit gnttab_init(void) | 506 | static int __devinit gnttab_init(void) |
506 | { | 507 | { |
507 | int i; | 508 | int i; |
508 | unsigned int max_nr_glist_frames, nr_glist_frames; | 509 | unsigned int max_nr_glist_frames, nr_glist_frames; |
509 | unsigned int nr_init_grefs; | 510 | unsigned int nr_init_grefs; |
510 | 511 | ||
511 | if (!xen_domain()) | 512 | if (!xen_domain()) |
512 | return -ENODEV; | 513 | return -ENODEV; |
513 | 514 | ||
514 | nr_grant_frames = 1; | 515 | nr_grant_frames = 1; |
515 | boot_max_nr_grant_frames = __max_nr_grant_frames(); | 516 | boot_max_nr_grant_frames = __max_nr_grant_frames(); |
516 | 517 | ||
517 | /* Determine the maximum number of frames required for the | 518 | /* Determine the maximum number of frames required for the |
518 | * grant reference free list on the current hypervisor. | 519 | * grant reference free list on the current hypervisor. |
519 | */ | 520 | */ |
520 | max_nr_glist_frames = (boot_max_nr_grant_frames * | 521 | max_nr_glist_frames = (boot_max_nr_grant_frames * |
521 | GREFS_PER_GRANT_FRAME / RPP); | 522 | GREFS_PER_GRANT_FRAME / RPP); |
522 | 523 | ||
523 | gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), | 524 | gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), |
524 | GFP_KERNEL); | 525 | GFP_KERNEL); |
525 | if (gnttab_list == NULL) | 526 | if (gnttab_list == NULL) |
526 | return -ENOMEM; | 527 | return -ENOMEM; |
527 | 528 | ||
528 | nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; | 529 | nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; |
529 | for (i = 0; i < nr_glist_frames; i++) { | 530 | for (i = 0; i < nr_glist_frames; i++) { |
530 | gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); | 531 | gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); |
531 | if (gnttab_list[i] == NULL) | 532 | if (gnttab_list[i] == NULL) |
532 | goto ini_nomem; | 533 | goto ini_nomem; |
533 | } | 534 | } |
534 | 535 | ||
535 | if (gnttab_resume() < 0) | 536 | if (gnttab_resume() < 0) |
536 | return -ENODEV; | 537 | return -ENODEV; |
537 | 538 | ||
538 | nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; | 539 | nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; |
539 | 540 | ||
540 | for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) | 541 | for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) |
541 | gnttab_entry(i) = i + 1; | 542 | gnttab_entry(i) = i + 1; |
542 | 543 | ||
543 | gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; | 544 | gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; |
544 | gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; | 545 | gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; |
545 | gnttab_free_head = NR_RESERVED_ENTRIES; | 546 | gnttab_free_head = NR_RESERVED_ENTRIES; |
546 | 547 | ||
547 | printk("Grant table initialized\n"); | 548 | printk("Grant table initialized\n"); |
548 | return 0; | 549 | return 0; |
549 | 550 | ||
550 | ini_nomem: | 551 | ini_nomem: |
551 | for (i--; i >= 0; i--) | 552 | for (i--; i >= 0; i--) |
552 | free_page((unsigned long)gnttab_list[i]); | 553 | free_page((unsigned long)gnttab_list[i]); |
553 | kfree(gnttab_list); | 554 | kfree(gnttab_list); |
554 | return -ENOMEM; | 555 | return -ENOMEM; |
555 | } | 556 | } |
556 | 557 | ||
557 | core_initcall(gnttab_init); | 558 | core_initcall(gnttab_init); |
558 | 559 |
include/xen/interface/event_channel.h
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * event_channel.h | 2 | * event_channel.h |
3 | * | 3 | * |
4 | * Event channels between domains. | 4 | * Event channels between domains. |
5 | * | 5 | * |
6 | * Copyright (c) 2003-2004, K A Fraser. | 6 | * Copyright (c) 2003-2004, K A Fraser. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ | 9 | #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ |
10 | #define __XEN_PUBLIC_EVENT_CHANNEL_H__ | 10 | #define __XEN_PUBLIC_EVENT_CHANNEL_H__ |
11 | 11 | ||
12 | #include <xen/interface/xen.h> | ||
13 | |||
12 | typedef uint32_t evtchn_port_t; | 14 | typedef uint32_t evtchn_port_t; |
13 | DEFINE_GUEST_HANDLE(evtchn_port_t); | 15 | DEFINE_GUEST_HANDLE(evtchn_port_t); |
14 | 16 | ||
15 | /* | 17 | /* |
16 | * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as | 18 | * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as |
17 | * accepting interdomain bindings from domain <remote_dom>. A fresh port | 19 | * accepting interdomain bindings from domain <remote_dom>. A fresh port |
18 | * is allocated in <dom> and returned as <port>. | 20 | * is allocated in <dom> and returned as <port>. |
19 | * NOTES: | 21 | * NOTES: |
20 | * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. | 22 | * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. |
21 | * 2. <rdom> may be DOMID_SELF, allowing loopback connections. | 23 | * 2. <rdom> may be DOMID_SELF, allowing loopback connections. |
22 | */ | 24 | */ |
23 | #define EVTCHNOP_alloc_unbound 6 | 25 | #define EVTCHNOP_alloc_unbound 6 |
24 | struct evtchn_alloc_unbound { | 26 | struct evtchn_alloc_unbound { |
25 | /* IN parameters */ | 27 | /* IN parameters */ |
26 | domid_t dom, remote_dom; | 28 | domid_t dom, remote_dom; |
27 | /* OUT parameters */ | 29 | /* OUT parameters */ |
28 | evtchn_port_t port; | 30 | evtchn_port_t port; |
29 | }; | 31 | }; |
30 | 32 | ||
31 | /* | 33 | /* |
32 | * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between | 34 | * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between |
33 | * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify | 35 | * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify |
34 | * a port that is unbound and marked as accepting bindings from the calling | 36 | * a port that is unbound and marked as accepting bindings from the calling |
35 | * domain. A fresh port is allocated in the calling domain and returned as | 37 | * domain. A fresh port is allocated in the calling domain and returned as |
36 | * <local_port>. | 38 | * <local_port>. |
37 | * NOTES: | 39 | * NOTES: |
38 | * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. | 40 | * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. |
39 | */ | 41 | */ |
40 | #define EVTCHNOP_bind_interdomain 0 | 42 | #define EVTCHNOP_bind_interdomain 0 |
41 | struct evtchn_bind_interdomain { | 43 | struct evtchn_bind_interdomain { |
42 | /* IN parameters. */ | 44 | /* IN parameters. */ |
43 | domid_t remote_dom; | 45 | domid_t remote_dom; |
44 | evtchn_port_t remote_port; | 46 | evtchn_port_t remote_port; |
45 | /* OUT parameters. */ | 47 | /* OUT parameters. */ |
46 | evtchn_port_t local_port; | 48 | evtchn_port_t local_port; |
47 | }; | 49 | }; |
48 | 50 | ||
49 | /* | 51 | /* |
50 | * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified | 52 | * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified |
51 | * vcpu. | 53 | * vcpu. |
52 | * NOTES: | 54 | * NOTES: |
53 | * 1. A virtual IRQ may be bound to at most one event channel per vcpu. | 55 | * 1. A virtual IRQ may be bound to at most one event channel per vcpu. |
54 | * 2. The allocated event channel is bound to the specified vcpu. The binding | 56 | * 2. The allocated event channel is bound to the specified vcpu. The binding |
55 | * may not be changed. | 57 | * may not be changed. |
56 | */ | 58 | */ |
57 | #define EVTCHNOP_bind_virq 1 | 59 | #define EVTCHNOP_bind_virq 1 |
58 | struct evtchn_bind_virq { | 60 | struct evtchn_bind_virq { |
59 | /* IN parameters. */ | 61 | /* IN parameters. */ |
60 | uint32_t virq; | 62 | uint32_t virq; |
61 | uint32_t vcpu; | 63 | uint32_t vcpu; |
62 | /* OUT parameters. */ | 64 | /* OUT parameters. */ |
63 | evtchn_port_t port; | 65 | evtchn_port_t port; |
64 | }; | 66 | }; |
65 | 67 | ||
66 | /* | 68 | /* |
67 | * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. | 69 | * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. |
68 | * NOTES: | 70 | * NOTES: |
69 | * 1. A physical IRQ may be bound to at most one event channel per domain. | 71 | * 1. A physical IRQ may be bound to at most one event channel per domain. |
70 | * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. | 72 | * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. |
71 | */ | 73 | */ |
72 | #define EVTCHNOP_bind_pirq 2 | 74 | #define EVTCHNOP_bind_pirq 2 |
73 | struct evtchn_bind_pirq { | 75 | struct evtchn_bind_pirq { |
74 | /* IN parameters. */ | 76 | /* IN parameters. */ |
75 | uint32_t pirq; | 77 | uint32_t pirq; |
76 | #define BIND_PIRQ__WILL_SHARE 1 | 78 | #define BIND_PIRQ__WILL_SHARE 1 |
77 | uint32_t flags; /* BIND_PIRQ__* */ | 79 | uint32_t flags; /* BIND_PIRQ__* */ |
78 | /* OUT parameters. */ | 80 | /* OUT parameters. */ |
79 | evtchn_port_t port; | 81 | evtchn_port_t port; |
80 | }; | 82 | }; |
81 | 83 | ||
82 | /* | 84 | /* |
83 | * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. | 85 | * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. |
84 | * NOTES: | 86 | * NOTES: |
85 | * 1. The allocated event channel is bound to the specified vcpu. The binding | 87 | * 1. The allocated event channel is bound to the specified vcpu. The binding |
86 | * may not be changed. | 88 | * may not be changed. |
87 | */ | 89 | */ |
88 | #define EVTCHNOP_bind_ipi 7 | 90 | #define EVTCHNOP_bind_ipi 7 |
89 | struct evtchn_bind_ipi { | 91 | struct evtchn_bind_ipi { |
90 | uint32_t vcpu; | 92 | uint32_t vcpu; |
91 | /* OUT parameters. */ | 93 | /* OUT parameters. */ |
92 | evtchn_port_t port; | 94 | evtchn_port_t port; |
93 | }; | 95 | }; |
94 | 96 | ||
95 | /* | 97 | /* |
96 | * EVTCHNOP_close: Close a local event channel <port>. If the channel is | 98 | * EVTCHNOP_close: Close a local event channel <port>. If the channel is |
97 | * interdomain then the remote end is placed in the unbound state | 99 | * interdomain then the remote end is placed in the unbound state |
98 | * (EVTCHNSTAT_unbound), awaiting a new connection. | 100 | * (EVTCHNSTAT_unbound), awaiting a new connection. |
99 | */ | 101 | */ |
100 | #define EVTCHNOP_close 3 | 102 | #define EVTCHNOP_close 3 |
101 | struct evtchn_close { | 103 | struct evtchn_close { |
102 | /* IN parameters. */ | 104 | /* IN parameters. */ |
103 | evtchn_port_t port; | 105 | evtchn_port_t port; |
104 | }; | 106 | }; |
105 | 107 | ||
106 | /* | 108 | /* |
107 | * EVTCHNOP_send: Send an event to the remote end of the channel whose local | 109 | * EVTCHNOP_send: Send an event to the remote end of the channel whose local |
108 | * endpoint is <port>. | 110 | * endpoint is <port>. |
109 | */ | 111 | */ |
110 | #define EVTCHNOP_send 4 | 112 | #define EVTCHNOP_send 4 |
111 | struct evtchn_send { | 113 | struct evtchn_send { |
112 | /* IN parameters. */ | 114 | /* IN parameters. */ |
113 | evtchn_port_t port; | 115 | evtchn_port_t port; |
114 | }; | 116 | }; |
115 | 117 | ||
116 | /* | 118 | /* |
117 | * EVTCHNOP_status: Get the current status of the communication channel which | 119 | * EVTCHNOP_status: Get the current status of the communication channel which |
118 | * has an endpoint at <dom, port>. | 120 | * has an endpoint at <dom, port>. |
119 | * NOTES: | 121 | * NOTES: |
120 | * 1. <dom> may be specified as DOMID_SELF. | 122 | * 1. <dom> may be specified as DOMID_SELF. |
121 | * 2. Only a sufficiently-privileged domain may obtain the status of an event | 123 | * 2. Only a sufficiently-privileged domain may obtain the status of an event |
122 | * channel for which <dom> is not DOMID_SELF. | 124 | * channel for which <dom> is not DOMID_SELF. |
123 | */ | 125 | */ |
124 | #define EVTCHNOP_status 5 | 126 | #define EVTCHNOP_status 5 |
125 | struct evtchn_status { | 127 | struct evtchn_status { |
126 | /* IN parameters */ | 128 | /* IN parameters */ |
127 | domid_t dom; | 129 | domid_t dom; |
128 | evtchn_port_t port; | 130 | evtchn_port_t port; |
129 | /* OUT parameters */ | 131 | /* OUT parameters */ |
130 | #define EVTCHNSTAT_closed 0 /* Channel is not in use. */ | 132 | #define EVTCHNSTAT_closed 0 /* Channel is not in use. */ |
131 | #define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ | 133 | #define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ |
132 | #define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ | 134 | #define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ |
133 | #define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ | 135 | #define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ |
134 | #define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ | 136 | #define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ |
135 | #define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ | 137 | #define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ |
136 | uint32_t status; | 138 | uint32_t status; |
137 | uint32_t vcpu; /* VCPU to which this channel is bound. */ | 139 | uint32_t vcpu; /* VCPU to which this channel is bound. */ |
138 | union { | 140 | union { |
139 | struct { | 141 | struct { |
140 | domid_t dom; | 142 | domid_t dom; |
141 | } unbound; /* EVTCHNSTAT_unbound */ | 143 | } unbound; /* EVTCHNSTAT_unbound */ |
142 | struct { | 144 | struct { |
143 | domid_t dom; | 145 | domid_t dom; |
144 | evtchn_port_t port; | 146 | evtchn_port_t port; |
145 | } interdomain; /* EVTCHNSTAT_interdomain */ | 147 | } interdomain; /* EVTCHNSTAT_interdomain */ |
146 | uint32_t pirq; /* EVTCHNSTAT_pirq */ | 148 | uint32_t pirq; /* EVTCHNSTAT_pirq */ |
147 | uint32_t virq; /* EVTCHNSTAT_virq */ | 149 | uint32_t virq; /* EVTCHNSTAT_virq */ |
148 | } u; | 150 | } u; |
149 | }; | 151 | }; |
150 | 152 | ||
151 | /* | 153 | /* |
152 | * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an | 154 | * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an |
153 | * event is pending. | 155 | * event is pending. |
154 | * NOTES: | 156 | * NOTES: |
155 | * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised | 157 | * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised |
156 | * the binding. This binding cannot be changed. | 158 | * the binding. This binding cannot be changed. |
157 | * 2. All other channels notify vcpu0 by default. This default is set when | 159 | * 2. All other channels notify vcpu0 by default. This default is set when |
158 | * the channel is allocated (a port that is freed and subsequently reused | 160 | * the channel is allocated (a port that is freed and subsequently reused |
159 | * has its binding reset to vcpu0). | 161 | * has its binding reset to vcpu0). |
160 | */ | 162 | */ |
161 | #define EVTCHNOP_bind_vcpu 8 | 163 | #define EVTCHNOP_bind_vcpu 8 |
162 | struct evtchn_bind_vcpu { | 164 | struct evtchn_bind_vcpu { |
163 | /* IN parameters. */ | 165 | /* IN parameters. */ |
164 | evtchn_port_t port; | 166 | evtchn_port_t port; |
165 | uint32_t vcpu; | 167 | uint32_t vcpu; |
166 | }; | 168 | }; |
167 | 169 | ||
168 | /* | 170 | /* |
169 | * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver | 171 | * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver |
170 | * a notification to the appropriate VCPU if an event is pending. | 172 | * a notification to the appropriate VCPU if an event is pending. |
171 | */ | 173 | */ |
172 | #define EVTCHNOP_unmask 9 | 174 | #define EVTCHNOP_unmask 9 |
173 | struct evtchn_unmask { | 175 | struct evtchn_unmask { |
174 | /* IN parameters. */ | 176 | /* IN parameters. */ |
175 | evtchn_port_t port; | 177 | evtchn_port_t port; |
176 | }; | 178 | }; |
177 | 179 | ||
178 | struct evtchn_op { | 180 | struct evtchn_op { |
179 | uint32_t cmd; /* EVTCHNOP_* */ | 181 | uint32_t cmd; /* EVTCHNOP_* */ |
180 | union { | 182 | union { |
181 | struct evtchn_alloc_unbound alloc_unbound; | 183 | struct evtchn_alloc_unbound alloc_unbound; |
182 | struct evtchn_bind_interdomain bind_interdomain; | 184 | struct evtchn_bind_interdomain bind_interdomain; |
183 | struct evtchn_bind_virq bind_virq; | 185 | struct evtchn_bind_virq bind_virq; |
184 | struct evtchn_bind_pirq bind_pirq; | 186 | struct evtchn_bind_pirq bind_pirq; |
185 | struct evtchn_bind_ipi bind_ipi; | 187 | struct evtchn_bind_ipi bind_ipi; |
186 | struct evtchn_close close; | 188 | struct evtchn_close close; |
187 | struct evtchn_send send; | 189 | struct evtchn_send send; |
188 | struct evtchn_status status; | 190 | struct evtchn_status status; |
189 | struct evtchn_bind_vcpu bind_vcpu; | 191 | struct evtchn_bind_vcpu bind_vcpu; |
190 | struct evtchn_unmask unmask; | 192 | struct evtchn_unmask unmask; |
191 | } u; | 193 | } u; |
192 | }; | 194 | }; |
193 | DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); | 195 | DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); |
194 | 196 | ||
195 | #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ | 197 | #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ |
196 | 198 |