Commit ecbf29cdb3990c83d90d0c4187c89fb2ce423367

Authored by Jeremy Fitzhardinge
Committed by Ingo Molnar
1 parent f63c2f2489

xen: clean up asm/xen/hypervisor.h

Impact: cleanup

hypervisor.h had accumulated a lot of crud, including lots of spurious
#includes.  Clean it all up, and go around fixing up everything else
accordingly.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 8 changed files with 31 additions and 33 deletions Inline Diff

arch/x86/include/asm/xen/hypercall.h
1 /****************************************************************************** 1 /******************************************************************************
2 * hypercall.h 2 * hypercall.h
3 * 3 *
4 * Linux-specific hypervisor handling. 4 * Linux-specific hypervisor handling.
5 * 5 *
6 * Copyright (c) 2002-2004, K A Fraser 6 * Copyright (c) 2002-2004, K A Fraser
7 * 7 *
8 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2 9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed 10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other 11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license: 12 * software packages, subject to the following license:
13 * 13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without 15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify, 16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to 18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions: 19 * the following conditions:
20 * 20 *
21 * The above copyright notice and this permission notice shall be included in 21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software. 22 * all copies or substantial portions of the Software.
23 * 23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE. 30 * IN THE SOFTWARE.
31 */ 31 */
32 32
33 #ifndef _ASM_X86_XEN_HYPERCALL_H 33 #ifndef _ASM_X86_XEN_HYPERCALL_H
34 #define _ASM_X86_XEN_HYPERCALL_H 34 #define _ASM_X86_XEN_HYPERCALL_H
35 35
36 #include <linux/kernel.h>
37 #include <linux/spinlock.h>
36 #include <linux/errno.h> 38 #include <linux/errno.h>
37 #include <linux/string.h> 39 #include <linux/string.h>
40 #include <linux/types.h>
41
42 #include <asm/page.h>
43 #include <asm/pgtable.h>
38 44
39 #include <xen/interface/xen.h> 45 #include <xen/interface/xen.h>
40 #include <xen/interface/sched.h> 46 #include <xen/interface/sched.h>
41 #include <xen/interface/physdev.h> 47 #include <xen/interface/physdev.h>
42 48
43 /* 49 /*
44 * The hypercall asms have to meet several constraints: 50 * The hypercall asms have to meet several constraints:
45 * - Work on 32- and 64-bit. 51 * - Work on 32- and 64-bit.
46 * The two architectures put their arguments in different sets of 52 * The two architectures put their arguments in different sets of
47 * registers. 53 * registers.
48 * 54 *
49 * - Work around asm syntax quirks 55 * - Work around asm syntax quirks
50 * It isn't possible to specify one of the rNN registers in a 56 * It isn't possible to specify one of the rNN registers in a
51 * constraint, so we use explicit register variables to get the 57 * constraint, so we use explicit register variables to get the
52 * args into the right place. 58 * args into the right place.
53 * 59 *
54 * - Mark all registers as potentially clobbered 60 * - Mark all registers as potentially clobbered
55 * Even unused parameters can be clobbered by the hypervisor, so we 61 * Even unused parameters can be clobbered by the hypervisor, so we
56 * need to make sure gcc knows it. 62 * need to make sure gcc knows it.
57 * 63 *
58 * - Avoid compiler bugs. 64 * - Avoid compiler bugs.
59 * This is the tricky part. Because x86_32 has such a constrained 65 * This is the tricky part. Because x86_32 has such a constrained
60 * register set, gcc versions below 4.3 have trouble generating 66 * register set, gcc versions below 4.3 have trouble generating
61 * code when all the arg registers and memory are trashed by the 67 * code when all the arg registers and memory are trashed by the
62 * asm. There are syntactically simpler ways of achieving the 68 * asm. There are syntactically simpler ways of achieving the
63 * semantics below, but they cause the compiler to crash. 69 * semantics below, but they cause the compiler to crash.
64 * 70 *
65 * The only combination I found which works is: 71 * The only combination I found which works is:
66 * - assign the __argX variables first 72 * - assign the __argX variables first
67 * - list all actually used parameters as "+r" (__argX) 73 * - list all actually used parameters as "+r" (__argX)
68 * - clobber the rest 74 * - clobber the rest
69 * 75 *
70 * The result certainly isn't pretty, and it really shows up cpp's 76 * The result certainly isn't pretty, and it really shows up cpp's
71 * weakness as as macro language. Sorry. (But let's just give thanks 77 * weakness as as macro language. Sorry. (But let's just give thanks
72 * there aren't more than 5 arguments...) 78 * there aren't more than 5 arguments...)
73 */ 79 */
74 80
75 extern struct { char _entry[32]; } hypercall_page[]; 81 extern struct { char _entry[32]; } hypercall_page[];
76 82
77 #define __HYPERCALL "call hypercall_page+%c[offset]" 83 #define __HYPERCALL "call hypercall_page+%c[offset]"
78 #define __HYPERCALL_ENTRY(x) \ 84 #define __HYPERCALL_ENTRY(x) \
79 [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0])) 85 [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
80 86
81 #ifdef CONFIG_X86_32 87 #ifdef CONFIG_X86_32
82 #define __HYPERCALL_RETREG "eax" 88 #define __HYPERCALL_RETREG "eax"
83 #define __HYPERCALL_ARG1REG "ebx" 89 #define __HYPERCALL_ARG1REG "ebx"
84 #define __HYPERCALL_ARG2REG "ecx" 90 #define __HYPERCALL_ARG2REG "ecx"
85 #define __HYPERCALL_ARG3REG "edx" 91 #define __HYPERCALL_ARG3REG "edx"
86 #define __HYPERCALL_ARG4REG "esi" 92 #define __HYPERCALL_ARG4REG "esi"
87 #define __HYPERCALL_ARG5REG "edi" 93 #define __HYPERCALL_ARG5REG "edi"
88 #else 94 #else
89 #define __HYPERCALL_RETREG "rax" 95 #define __HYPERCALL_RETREG "rax"
90 #define __HYPERCALL_ARG1REG "rdi" 96 #define __HYPERCALL_ARG1REG "rdi"
91 #define __HYPERCALL_ARG2REG "rsi" 97 #define __HYPERCALL_ARG2REG "rsi"
92 #define __HYPERCALL_ARG3REG "rdx" 98 #define __HYPERCALL_ARG3REG "rdx"
93 #define __HYPERCALL_ARG4REG "r10" 99 #define __HYPERCALL_ARG4REG "r10"
94 #define __HYPERCALL_ARG5REG "r8" 100 #define __HYPERCALL_ARG5REG "r8"
95 #endif 101 #endif
96 102
97 #define __HYPERCALL_DECLS \ 103 #define __HYPERCALL_DECLS \
98 register unsigned long __res asm(__HYPERCALL_RETREG); \ 104 register unsigned long __res asm(__HYPERCALL_RETREG); \
99 register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \ 105 register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \
100 register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ 106 register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
101 register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ 107 register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
102 register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ 108 register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
103 register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; 109 register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
104 110
105 #define __HYPERCALL_0PARAM "=r" (__res) 111 #define __HYPERCALL_0PARAM "=r" (__res)
106 #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) 112 #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1)
107 #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) 113 #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2)
108 #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) 114 #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3)
109 #define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, "+r" (__arg4) 115 #define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, "+r" (__arg4)
110 #define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, "+r" (__arg5) 116 #define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, "+r" (__arg5)
111 117
112 #define __HYPERCALL_0ARG() 118 #define __HYPERCALL_0ARG()
113 #define __HYPERCALL_1ARG(a1) \ 119 #define __HYPERCALL_1ARG(a1) \
114 __HYPERCALL_0ARG() __arg1 = (unsigned long)(a1); 120 __HYPERCALL_0ARG() __arg1 = (unsigned long)(a1);
115 #define __HYPERCALL_2ARG(a1,a2) \ 121 #define __HYPERCALL_2ARG(a1,a2) \
116 __HYPERCALL_1ARG(a1) __arg2 = (unsigned long)(a2); 122 __HYPERCALL_1ARG(a1) __arg2 = (unsigned long)(a2);
117 #define __HYPERCALL_3ARG(a1,a2,a3) \ 123 #define __HYPERCALL_3ARG(a1,a2,a3) \
118 __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3); 124 __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3);
119 #define __HYPERCALL_4ARG(a1,a2,a3,a4) \ 125 #define __HYPERCALL_4ARG(a1,a2,a3,a4) \
120 __HYPERCALL_3ARG(a1,a2,a3) __arg4 = (unsigned long)(a4); 126 __HYPERCALL_3ARG(a1,a2,a3) __arg4 = (unsigned long)(a4);
121 #define __HYPERCALL_5ARG(a1,a2,a3,a4,a5) \ 127 #define __HYPERCALL_5ARG(a1,a2,a3,a4,a5) \
122 __HYPERCALL_4ARG(a1,a2,a3,a4) __arg5 = (unsigned long)(a5); 128 __HYPERCALL_4ARG(a1,a2,a3,a4) __arg5 = (unsigned long)(a5);
123 129
124 #define __HYPERCALL_CLOBBER5 "memory" 130 #define __HYPERCALL_CLOBBER5 "memory"
125 #define __HYPERCALL_CLOBBER4 __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG 131 #define __HYPERCALL_CLOBBER4 __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG
126 #define __HYPERCALL_CLOBBER3 __HYPERCALL_CLOBBER4, __HYPERCALL_ARG4REG 132 #define __HYPERCALL_CLOBBER3 __HYPERCALL_CLOBBER4, __HYPERCALL_ARG4REG
127 #define __HYPERCALL_CLOBBER2 __HYPERCALL_CLOBBER3, __HYPERCALL_ARG3REG 133 #define __HYPERCALL_CLOBBER2 __HYPERCALL_CLOBBER3, __HYPERCALL_ARG3REG
128 #define __HYPERCALL_CLOBBER1 __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG 134 #define __HYPERCALL_CLOBBER1 __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG
129 #define __HYPERCALL_CLOBBER0 __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG 135 #define __HYPERCALL_CLOBBER0 __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG
130 136
131 #define _hypercall0(type, name) \ 137 #define _hypercall0(type, name) \
132 ({ \ 138 ({ \
133 __HYPERCALL_DECLS; \ 139 __HYPERCALL_DECLS; \
134 __HYPERCALL_0ARG(); \ 140 __HYPERCALL_0ARG(); \
135 asm volatile (__HYPERCALL \ 141 asm volatile (__HYPERCALL \
136 : __HYPERCALL_0PARAM \ 142 : __HYPERCALL_0PARAM \
137 : __HYPERCALL_ENTRY(name) \ 143 : __HYPERCALL_ENTRY(name) \
138 : __HYPERCALL_CLOBBER0); \ 144 : __HYPERCALL_CLOBBER0); \
139 (type)__res; \ 145 (type)__res; \
140 }) 146 })
141 147
142 #define _hypercall1(type, name, a1) \ 148 #define _hypercall1(type, name, a1) \
143 ({ \ 149 ({ \
144 __HYPERCALL_DECLS; \ 150 __HYPERCALL_DECLS; \
145 __HYPERCALL_1ARG(a1); \ 151 __HYPERCALL_1ARG(a1); \
146 asm volatile (__HYPERCALL \ 152 asm volatile (__HYPERCALL \
147 : __HYPERCALL_1PARAM \ 153 : __HYPERCALL_1PARAM \
148 : __HYPERCALL_ENTRY(name) \ 154 : __HYPERCALL_ENTRY(name) \
149 : __HYPERCALL_CLOBBER1); \ 155 : __HYPERCALL_CLOBBER1); \
150 (type)__res; \ 156 (type)__res; \
151 }) 157 })
152 158
153 #define _hypercall2(type, name, a1, a2) \ 159 #define _hypercall2(type, name, a1, a2) \
154 ({ \ 160 ({ \
155 __HYPERCALL_DECLS; \ 161 __HYPERCALL_DECLS; \
156 __HYPERCALL_2ARG(a1, a2); \ 162 __HYPERCALL_2ARG(a1, a2); \
157 asm volatile (__HYPERCALL \ 163 asm volatile (__HYPERCALL \
158 : __HYPERCALL_2PARAM \ 164 : __HYPERCALL_2PARAM \
159 : __HYPERCALL_ENTRY(name) \ 165 : __HYPERCALL_ENTRY(name) \
160 : __HYPERCALL_CLOBBER2); \ 166 : __HYPERCALL_CLOBBER2); \
161 (type)__res; \ 167 (type)__res; \
162 }) 168 })
163 169
164 #define _hypercall3(type, name, a1, a2, a3) \ 170 #define _hypercall3(type, name, a1, a2, a3) \
165 ({ \ 171 ({ \
166 __HYPERCALL_DECLS; \ 172 __HYPERCALL_DECLS; \
167 __HYPERCALL_3ARG(a1, a2, a3); \ 173 __HYPERCALL_3ARG(a1, a2, a3); \
168 asm volatile (__HYPERCALL \ 174 asm volatile (__HYPERCALL \
169 : __HYPERCALL_3PARAM \ 175 : __HYPERCALL_3PARAM \
170 : __HYPERCALL_ENTRY(name) \ 176 : __HYPERCALL_ENTRY(name) \
171 : __HYPERCALL_CLOBBER3); \ 177 : __HYPERCALL_CLOBBER3); \
172 (type)__res; \ 178 (type)__res; \
173 }) 179 })
174 180
175 #define _hypercall4(type, name, a1, a2, a3, a4) \ 181 #define _hypercall4(type, name, a1, a2, a3, a4) \
176 ({ \ 182 ({ \
177 __HYPERCALL_DECLS; \ 183 __HYPERCALL_DECLS; \
178 __HYPERCALL_4ARG(a1, a2, a3, a4); \ 184 __HYPERCALL_4ARG(a1, a2, a3, a4); \
179 asm volatile (__HYPERCALL \ 185 asm volatile (__HYPERCALL \
180 : __HYPERCALL_4PARAM \ 186 : __HYPERCALL_4PARAM \
181 : __HYPERCALL_ENTRY(name) \ 187 : __HYPERCALL_ENTRY(name) \
182 : __HYPERCALL_CLOBBER4); \ 188 : __HYPERCALL_CLOBBER4); \
183 (type)__res; \ 189 (type)__res; \
184 }) 190 })
185 191
186 #define _hypercall5(type, name, a1, a2, a3, a4, a5) \ 192 #define _hypercall5(type, name, a1, a2, a3, a4, a5) \
187 ({ \ 193 ({ \
188 __HYPERCALL_DECLS; \ 194 __HYPERCALL_DECLS; \
189 __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \ 195 __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \
190 asm volatile (__HYPERCALL \ 196 asm volatile (__HYPERCALL \
191 : __HYPERCALL_5PARAM \ 197 : __HYPERCALL_5PARAM \
192 : __HYPERCALL_ENTRY(name) \ 198 : __HYPERCALL_ENTRY(name) \
193 : __HYPERCALL_CLOBBER5); \ 199 : __HYPERCALL_CLOBBER5); \
194 (type)__res; \ 200 (type)__res; \
195 }) 201 })
196 202
197 static inline int 203 static inline int
198 HYPERVISOR_set_trap_table(struct trap_info *table) 204 HYPERVISOR_set_trap_table(struct trap_info *table)
199 { 205 {
200 return _hypercall1(int, set_trap_table, table); 206 return _hypercall1(int, set_trap_table, table);
201 } 207 }
202 208
203 static inline int 209 static inline int
204 HYPERVISOR_mmu_update(struct mmu_update *req, int count, 210 HYPERVISOR_mmu_update(struct mmu_update *req, int count,
205 int *success_count, domid_t domid) 211 int *success_count, domid_t domid)
206 { 212 {
207 return _hypercall4(int, mmu_update, req, count, success_count, domid); 213 return _hypercall4(int, mmu_update, req, count, success_count, domid);
208 } 214 }
209 215
210 static inline int 216 static inline int
211 HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, 217 HYPERVISOR_mmuext_op(struct mmuext_op *op, int count,
212 int *success_count, domid_t domid) 218 int *success_count, domid_t domid)
213 { 219 {
214 return _hypercall4(int, mmuext_op, op, count, success_count, domid); 220 return _hypercall4(int, mmuext_op, op, count, success_count, domid);
215 } 221 }
216 222
217 static inline int 223 static inline int
218 HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) 224 HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
219 { 225 {
220 return _hypercall2(int, set_gdt, frame_list, entries); 226 return _hypercall2(int, set_gdt, frame_list, entries);
221 } 227 }
222 228
223 static inline int 229 static inline int
224 HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) 230 HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
225 { 231 {
226 return _hypercall2(int, stack_switch, ss, esp); 232 return _hypercall2(int, stack_switch, ss, esp);
227 } 233 }
228 234
229 #ifdef CONFIG_X86_32 235 #ifdef CONFIG_X86_32
230 static inline int 236 static inline int
231 HYPERVISOR_set_callbacks(unsigned long event_selector, 237 HYPERVISOR_set_callbacks(unsigned long event_selector,
232 unsigned long event_address, 238 unsigned long event_address,
233 unsigned long failsafe_selector, 239 unsigned long failsafe_selector,
234 unsigned long failsafe_address) 240 unsigned long failsafe_address)
235 { 241 {
236 return _hypercall4(int, set_callbacks, 242 return _hypercall4(int, set_callbacks,
237 event_selector, event_address, 243 event_selector, event_address,
238 failsafe_selector, failsafe_address); 244 failsafe_selector, failsafe_address);
239 } 245 }
240 #else /* CONFIG_X86_64 */ 246 #else /* CONFIG_X86_64 */
241 static inline int 247 static inline int
242 HYPERVISOR_set_callbacks(unsigned long event_address, 248 HYPERVISOR_set_callbacks(unsigned long event_address,
243 unsigned long failsafe_address, 249 unsigned long failsafe_address,
244 unsigned long syscall_address) 250 unsigned long syscall_address)
245 { 251 {
246 return _hypercall3(int, set_callbacks, 252 return _hypercall3(int, set_callbacks,
247 event_address, failsafe_address, 253 event_address, failsafe_address,
248 syscall_address); 254 syscall_address);
249 } 255 }
250 #endif /* CONFIG_X86_{32,64} */ 256 #endif /* CONFIG_X86_{32,64} */
251 257
252 static inline int 258 static inline int
253 HYPERVISOR_callback_op(int cmd, void *arg) 259 HYPERVISOR_callback_op(int cmd, void *arg)
254 { 260 {
255 return _hypercall2(int, callback_op, cmd, arg); 261 return _hypercall2(int, callback_op, cmd, arg);
256 } 262 }
257 263
258 static inline int 264 static inline int
259 HYPERVISOR_fpu_taskswitch(int set) 265 HYPERVISOR_fpu_taskswitch(int set)
260 { 266 {
261 return _hypercall1(int, fpu_taskswitch, set); 267 return _hypercall1(int, fpu_taskswitch, set);
262 } 268 }
263 269
264 static inline int 270 static inline int
265 HYPERVISOR_sched_op(int cmd, void *arg) 271 HYPERVISOR_sched_op(int cmd, void *arg)
266 { 272 {
267 return _hypercall2(int, sched_op_new, cmd, arg); 273 return _hypercall2(int, sched_op_new, cmd, arg);
268 } 274 }
269 275
270 static inline long 276 static inline long
271 HYPERVISOR_set_timer_op(u64 timeout) 277 HYPERVISOR_set_timer_op(u64 timeout)
272 { 278 {
273 unsigned long timeout_hi = (unsigned long)(timeout>>32); 279 unsigned long timeout_hi = (unsigned long)(timeout>>32);
274 unsigned long timeout_lo = (unsigned long)timeout; 280 unsigned long timeout_lo = (unsigned long)timeout;
275 return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); 281 return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
276 } 282 }
277 283
278 static inline int 284 static inline int
279 HYPERVISOR_set_debugreg(int reg, unsigned long value) 285 HYPERVISOR_set_debugreg(int reg, unsigned long value)
280 { 286 {
281 return _hypercall2(int, set_debugreg, reg, value); 287 return _hypercall2(int, set_debugreg, reg, value);
282 } 288 }
283 289
284 static inline unsigned long 290 static inline unsigned long
285 HYPERVISOR_get_debugreg(int reg) 291 HYPERVISOR_get_debugreg(int reg)
286 { 292 {
287 return _hypercall1(unsigned long, get_debugreg, reg); 293 return _hypercall1(unsigned long, get_debugreg, reg);
288 } 294 }
289 295
290 static inline int 296 static inline int
291 HYPERVISOR_update_descriptor(u64 ma, u64 desc) 297 HYPERVISOR_update_descriptor(u64 ma, u64 desc)
292 { 298 {
293 return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); 299 return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
294 } 300 }
295 301
296 static inline int 302 static inline int
297 HYPERVISOR_memory_op(unsigned int cmd, void *arg) 303 HYPERVISOR_memory_op(unsigned int cmd, void *arg)
298 { 304 {
299 return _hypercall2(int, memory_op, cmd, arg); 305 return _hypercall2(int, memory_op, cmd, arg);
300 } 306 }
301 307
302 static inline int 308 static inline int
303 HYPERVISOR_multicall(void *call_list, int nr_calls) 309 HYPERVISOR_multicall(void *call_list, int nr_calls)
304 { 310 {
305 return _hypercall2(int, multicall, call_list, nr_calls); 311 return _hypercall2(int, multicall, call_list, nr_calls);
306 } 312 }
307 313
308 static inline int 314 static inline int
309 HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, 315 HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
310 unsigned long flags) 316 unsigned long flags)
311 { 317 {
312 if (sizeof(new_val) == sizeof(long)) 318 if (sizeof(new_val) == sizeof(long))
313 return _hypercall3(int, update_va_mapping, va, 319 return _hypercall3(int, update_va_mapping, va,
314 new_val.pte, flags); 320 new_val.pte, flags);
315 else 321 else
316 return _hypercall4(int, update_va_mapping, va, 322 return _hypercall4(int, update_va_mapping, va,
317 new_val.pte, new_val.pte >> 32, flags); 323 new_val.pte, new_val.pte >> 32, flags);
318 } 324 }
319 325
320 static inline int 326 static inline int
321 HYPERVISOR_event_channel_op(int cmd, void *arg) 327 HYPERVISOR_event_channel_op(int cmd, void *arg)
322 { 328 {
323 int rc = _hypercall2(int, event_channel_op, cmd, arg); 329 int rc = _hypercall2(int, event_channel_op, cmd, arg);
324 if (unlikely(rc == -ENOSYS)) { 330 if (unlikely(rc == -ENOSYS)) {
325 struct evtchn_op op; 331 struct evtchn_op op;
326 op.cmd = cmd; 332 op.cmd = cmd;
327 memcpy(&op.u, arg, sizeof(op.u)); 333 memcpy(&op.u, arg, sizeof(op.u));
328 rc = _hypercall1(int, event_channel_op_compat, &op); 334 rc = _hypercall1(int, event_channel_op_compat, &op);
329 memcpy(arg, &op.u, sizeof(op.u)); 335 memcpy(arg, &op.u, sizeof(op.u));
330 } 336 }
331 return rc; 337 return rc;
332 } 338 }
333 339
334 static inline int 340 static inline int
335 HYPERVISOR_xen_version(int cmd, void *arg) 341 HYPERVISOR_xen_version(int cmd, void *arg)
336 { 342 {
337 return _hypercall2(int, xen_version, cmd, arg); 343 return _hypercall2(int, xen_version, cmd, arg);
338 } 344 }
339 345
340 static inline int 346 static inline int
341 HYPERVISOR_console_io(int cmd, int count, char *str) 347 HYPERVISOR_console_io(int cmd, int count, char *str)
342 { 348 {
343 return _hypercall3(int, console_io, cmd, count, str); 349 return _hypercall3(int, console_io, cmd, count, str);
344 } 350 }
345 351
346 static inline int 352 static inline int
347 HYPERVISOR_physdev_op(int cmd, void *arg) 353 HYPERVISOR_physdev_op(int cmd, void *arg)
348 { 354 {
349 int rc = _hypercall2(int, physdev_op, cmd, arg); 355 int rc = _hypercall2(int, physdev_op, cmd, arg);
350 if (unlikely(rc == -ENOSYS)) { 356 if (unlikely(rc == -ENOSYS)) {
351 struct physdev_op op; 357 struct physdev_op op;
352 op.cmd = cmd; 358 op.cmd = cmd;
353 memcpy(&op.u, arg, sizeof(op.u)); 359 memcpy(&op.u, arg, sizeof(op.u));
354 rc = _hypercall1(int, physdev_op_compat, &op); 360 rc = _hypercall1(int, physdev_op_compat, &op);
355 memcpy(arg, &op.u, sizeof(op.u)); 361 memcpy(arg, &op.u, sizeof(op.u));
356 } 362 }
357 return rc; 363 return rc;
358 } 364 }
359 365
360 static inline int 366 static inline int
361 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) 367 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
362 { 368 {
363 return _hypercall3(int, grant_table_op, cmd, uop, count); 369 return _hypercall3(int, grant_table_op, cmd, uop, count);
364 } 370 }
365 371
366 static inline int 372 static inline int
367 HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val, 373 HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val,
368 unsigned long flags, domid_t domid) 374 unsigned long flags, domid_t domid)
369 { 375 {
370 if (sizeof(new_val) == sizeof(long)) 376 if (sizeof(new_val) == sizeof(long))
371 return _hypercall4(int, update_va_mapping_otherdomain, va, 377 return _hypercall4(int, update_va_mapping_otherdomain, va,
372 new_val.pte, flags, domid); 378 new_val.pte, flags, domid);
373 else 379 else
374 return _hypercall5(int, update_va_mapping_otherdomain, va, 380 return _hypercall5(int, update_va_mapping_otherdomain, va,
375 new_val.pte, new_val.pte >> 32, 381 new_val.pte, new_val.pte >> 32,
376 flags, domid); 382 flags, domid);
377 } 383 }
378 384
379 static inline int 385 static inline int
380 HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) 386 HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
381 { 387 {
382 return _hypercall2(int, vm_assist, cmd, type); 388 return _hypercall2(int, vm_assist, cmd, type);
383 } 389 }
384 390
385 static inline int 391 static inline int
386 HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) 392 HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
387 { 393 {
388 return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); 394 return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
389 } 395 }
390 396
391 #ifdef CONFIG_X86_64 397 #ifdef CONFIG_X86_64
392 static inline int 398 static inline int
393 HYPERVISOR_set_segment_base(int reg, unsigned long value) 399 HYPERVISOR_set_segment_base(int reg, unsigned long value)
394 { 400 {
395 return _hypercall2(int, set_segment_base, reg, value); 401 return _hypercall2(int, set_segment_base, reg, value);
396 } 402 }
397 #endif 403 #endif
398 404
399 static inline int 405 static inline int
400 HYPERVISOR_suspend(unsigned long srec) 406 HYPERVISOR_suspend(unsigned long srec)
401 { 407 {
402 return _hypercall3(int, sched_op, SCHEDOP_shutdown, 408 return _hypercall3(int, sched_op, SCHEDOP_shutdown,
403 SHUTDOWN_suspend, srec); 409 SHUTDOWN_suspend, srec);
404 } 410 }
405 411
406 static inline int 412 static inline int
407 HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) 413 HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
408 { 414 {
409 return _hypercall2(int, nmi_op, op, arg); 415 return _hypercall2(int, nmi_op, op, arg);
410 } 416 }
411 417
412 static inline void 418 static inline void
413 MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) 419 MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
414 { 420 {
415 mcl->op = __HYPERVISOR_fpu_taskswitch; 421 mcl->op = __HYPERVISOR_fpu_taskswitch;
416 mcl->args[0] = set; 422 mcl->args[0] = set;
417 } 423 }
418 424
419 static inline void 425 static inline void
420 MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, 426 MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
421 pte_t new_val, unsigned long flags) 427 pte_t new_val, unsigned long flags)
422 { 428 {
423 mcl->op = __HYPERVISOR_update_va_mapping; 429 mcl->op = __HYPERVISOR_update_va_mapping;
424 mcl->args[0] = va; 430 mcl->args[0] = va;
425 if (sizeof(new_val) == sizeof(long)) { 431 if (sizeof(new_val) == sizeof(long)) {
426 mcl->args[1] = new_val.pte; 432 mcl->args[1] = new_val.pte;
427 mcl->args[2] = flags; 433 mcl->args[2] = flags;
428 } else { 434 } else {
429 mcl->args[1] = new_val.pte; 435 mcl->args[1] = new_val.pte;
430 mcl->args[2] = new_val.pte >> 32; 436 mcl->args[2] = new_val.pte >> 32;
431 mcl->args[3] = flags; 437 mcl->args[3] = flags;
432 } 438 }
433 } 439 }
434 440
435 static inline void 441 static inline void
436 MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd, 442 MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd,
437 void *uop, unsigned int count) 443 void *uop, unsigned int count)
438 { 444 {
439 mcl->op = __HYPERVISOR_grant_table_op; 445 mcl->op = __HYPERVISOR_grant_table_op;
440 mcl->args[0] = cmd; 446 mcl->args[0] = cmd;
441 mcl->args[1] = (unsigned long)uop; 447 mcl->args[1] = (unsigned long)uop;
442 mcl->args[2] = count; 448 mcl->args[2] = count;
443 } 449 }
444 450
445 static inline void 451 static inline void
446 MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va, 452 MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va,
447 pte_t new_val, unsigned long flags, 453 pte_t new_val, unsigned long flags,
448 domid_t domid) 454 domid_t domid)
449 { 455 {
450 mcl->op = __HYPERVISOR_update_va_mapping_otherdomain; 456 mcl->op = __HYPERVISOR_update_va_mapping_otherdomain;
451 mcl->args[0] = va; 457 mcl->args[0] = va;
452 if (sizeof(new_val) == sizeof(long)) { 458 if (sizeof(new_val) == sizeof(long)) {
453 mcl->args[1] = new_val.pte; 459 mcl->args[1] = new_val.pte;
454 mcl->args[2] = flags; 460 mcl->args[2] = flags;
455 mcl->args[3] = domid; 461 mcl->args[3] = domid;
456 } else { 462 } else {
457 mcl->args[1] = new_val.pte; 463 mcl->args[1] = new_val.pte;
458 mcl->args[2] = new_val.pte >> 32; 464 mcl->args[2] = new_val.pte >> 32;
459 mcl->args[3] = flags; 465 mcl->args[3] = flags;
460 mcl->args[4] = domid; 466 mcl->args[4] = domid;
461 } 467 }
462 } 468 }
463 469
464 static inline void 470 static inline void
465 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, 471 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
466 struct desc_struct desc) 472 struct desc_struct desc)
467 { 473 {
468 mcl->op = __HYPERVISOR_update_descriptor; 474 mcl->op = __HYPERVISOR_update_descriptor;
469 if (sizeof(maddr) == sizeof(long)) { 475 if (sizeof(maddr) == sizeof(long)) {
470 mcl->args[0] = maddr; 476 mcl->args[0] = maddr;
471 mcl->args[1] = *(unsigned long *)&desc; 477 mcl->args[1] = *(unsigned long *)&desc;
472 } else { 478 } else {
473 mcl->args[0] = maddr; 479 mcl->args[0] = maddr;
474 mcl->args[1] = maddr >> 32; 480 mcl->args[1] = maddr >> 32;
475 mcl->args[2] = desc.a; 481 mcl->args[2] = desc.a;
476 mcl->args[3] = desc.b; 482 mcl->args[3] = desc.b;
477 } 483 }
478 } 484 }
479 485
480 static inline void 486 static inline void
481 MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg) 487 MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg)
482 { 488 {
483 mcl->op = __HYPERVISOR_memory_op; 489 mcl->op = __HYPERVISOR_memory_op;
484 mcl->args[0] = cmd; 490 mcl->args[0] = cmd;
485 mcl->args[1] = (unsigned long)arg; 491 mcl->args[1] = (unsigned long)arg;
486 } 492 }
487 493
488 static inline void 494 static inline void
489 MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, 495 MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
490 int count, int *success_count, domid_t domid) 496 int count, int *success_count, domid_t domid)
491 { 497 {
492 mcl->op = __HYPERVISOR_mmu_update; 498 mcl->op = __HYPERVISOR_mmu_update;
493 mcl->args[0] = (unsigned long)req; 499 mcl->args[0] = (unsigned long)req;
494 mcl->args[1] = count; 500 mcl->args[1] = count;
495 mcl->args[2] = (unsigned long)success_count; 501 mcl->args[2] = (unsigned long)success_count;
496 mcl->args[3] = domid; 502 mcl->args[3] = domid;
497 } 503 }
498 504
499 static inline void 505 static inline void
500 MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, 506 MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
501 int *success_count, domid_t domid) 507 int *success_count, domid_t domid)
502 { 508 {
503 mcl->op = __HYPERVISOR_mmuext_op; 509 mcl->op = __HYPERVISOR_mmuext_op;
504 mcl->args[0] = (unsigned long)op; 510 mcl->args[0] = (unsigned long)op;
505 mcl->args[1] = count; 511 mcl->args[1] = count;
506 mcl->args[2] = (unsigned long)success_count; 512 mcl->args[2] = (unsigned long)success_count;
507 mcl->args[3] = domid; 513 mcl->args[3] = domid;
508 } 514 }
509 515
510 static inline void 516 static inline void
511 MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries) 517 MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries)
512 { 518 {
513 mcl->op = __HYPERVISOR_set_gdt; 519 mcl->op = __HYPERVISOR_set_gdt;
514 mcl->args[0] = (unsigned long)frames; 520 mcl->args[0] = (unsigned long)frames;
515 mcl->args[1] = entries; 521 mcl->args[1] = entries;
516 } 522 }
517 523
518 static inline void 524 static inline void
519 MULTI_stack_switch(struct multicall_entry *mcl, 525 MULTI_stack_switch(struct multicall_entry *mcl,
520 unsigned long ss, unsigned long esp) 526 unsigned long ss, unsigned long esp)
521 { 527 {
522 mcl->op = __HYPERVISOR_stack_switch; 528 mcl->op = __HYPERVISOR_stack_switch;
523 mcl->args[0] = ss; 529 mcl->args[0] = ss;
524 mcl->args[1] = esp; 530 mcl->args[1] = esp;
525 } 531 }
526 532
527 #endif /* _ASM_X86_XEN_HYPERCALL_H */ 533 #endif /* _ASM_X86_XEN_HYPERCALL_H */
528 534
arch/x86/include/asm/xen/hypervisor.h
1 /****************************************************************************** 1 /******************************************************************************
2 * hypervisor.h 2 * hypervisor.h
3 * 3 *
4 * Linux-specific hypervisor handling. 4 * Linux-specific hypervisor handling.
5 * 5 *
6 * Copyright (c) 2002-2004, K A Fraser 6 * Copyright (c) 2002-2004, K A Fraser
7 * 7 *
8 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2 9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed 10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other 11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license: 12 * software packages, subject to the following license:
13 * 13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without 15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify, 16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to 18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions: 19 * the following conditions:
20 * 20 *
21 * The above copyright notice and this permission notice shall be included in 21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software. 22 * all copies or substantial portions of the Software.
23 * 23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE. 30 * IN THE SOFTWARE.
31 */ 31 */
32 32
33 #ifndef _ASM_X86_XEN_HYPERVISOR_H 33 #ifndef _ASM_X86_XEN_HYPERVISOR_H
34 #define _ASM_X86_XEN_HYPERVISOR_H 34 #define _ASM_X86_XEN_HYPERVISOR_H
35 35
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38
39 #include <xen/interface/xen.h>
40 #include <xen/interface/version.h>
41
42 #include <asm/ptrace.h>
43 #include <asm/page.h>
44 #include <asm/desc.h>
45 #if defined(__i386__)
46 # ifdef CONFIG_X86_PAE
47 # include <asm-generic/pgtable-nopud.h>
48 # else
49 # include <asm-generic/pgtable-nopmd.h>
50 # endif
51 #endif
52 #include <asm/xen/hypercall.h>
53
54 /* arch/i386/kernel/setup.c */ 36 /* arch/i386/kernel/setup.c */
55 extern struct shared_info *HYPERVISOR_shared_info; 37 extern struct shared_info *HYPERVISOR_shared_info;
56 extern struct start_info *xen_start_info; 38 extern struct start_info *xen_start_info;
57 39
58 /* arch/i386/mach-xen/evtchn.c */
59 /* Force a proper event-channel callback from Xen. */
60 extern void force_evtchn_callback(void);
61
62 /* Turn jiffies into Xen system time. */
63 u64 jiffies_to_st(unsigned long jiffies);
64
65
66 #define MULTI_UVMFLAGS_INDEX 3
67 #define MULTI_UVMDOMID_INDEX 4
68
69 enum xen_domain_type { 40 enum xen_domain_type {
70 XEN_NATIVE, 41 XEN_NATIVE,
71 XEN_PV_DOMAIN, 42 XEN_PV_DOMAIN,
72 XEN_HVM_DOMAIN, 43 XEN_HVM_DOMAIN,
73 }; 44 };
74 45
75 extern enum xen_domain_type xen_domain_type; 46 extern enum xen_domain_type xen_domain_type;
76 47
48 #ifdef CONFIG_XEN
77 #define xen_domain() (xen_domain_type != XEN_NATIVE) 49 #define xen_domain() (xen_domain_type != XEN_NATIVE)
78 #define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) 50 #else
51 #define xen_domain() (0)
52 #endif
53
54 #define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN)
arch/x86/include/asm/xen/page.h
1 #ifndef _ASM_X86_XEN_PAGE_H 1 #ifndef _ASM_X86_XEN_PAGE_H
2 #define _ASM_X86_XEN_PAGE_H 2 #define _ASM_X86_XEN_PAGE_H
3 3
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/spinlock.h>
4 #include <linux/pfn.h> 7 #include <linux/pfn.h>
5 8
6 #include <asm/uaccess.h> 9 #include <asm/uaccess.h>
10 #include <asm/page.h>
7 #include <asm/pgtable.h> 11 #include <asm/pgtable.h>
8 12
13 #include <xen/interface/xen.h>
9 #include <xen/features.h> 14 #include <xen/features.h>
10 15
11 /* Xen machine address */ 16 /* Xen machine address */
12 typedef struct xmaddr { 17 typedef struct xmaddr {
13 phys_addr_t maddr; 18 phys_addr_t maddr;
14 } xmaddr_t; 19 } xmaddr_t;
15 20
16 /* Xen pseudo-physical address */ 21 /* Xen pseudo-physical address */
17 typedef struct xpaddr { 22 typedef struct xpaddr {
18 phys_addr_t paddr; 23 phys_addr_t paddr;
19 } xpaddr_t; 24 } xpaddr_t;
20 25
21 #define XMADDR(x) ((xmaddr_t) { .maddr = (x) }) 26 #define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
22 #define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) 27 #define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
23 28
24 /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ 29 /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
25 #define INVALID_P2M_ENTRY (~0UL) 30 #define INVALID_P2M_ENTRY (~0UL)
26 #define FOREIGN_FRAME_BIT (1UL<<31) 31 #define FOREIGN_FRAME_BIT (1UL<<31)
27 #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) 32 #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
28 33
29 /* Maximum amount of memory we can handle in a domain in pages */ 34 /* Maximum amount of memory we can handle in a domain in pages */
30 #define MAX_DOMAIN_PAGES \ 35 #define MAX_DOMAIN_PAGES \
31 ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) 36 ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
32 37
33 38
34 extern unsigned long get_phys_to_machine(unsigned long pfn); 39 extern unsigned long get_phys_to_machine(unsigned long pfn);
35 extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); 40 extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
36 41
37 static inline unsigned long pfn_to_mfn(unsigned long pfn) 42 static inline unsigned long pfn_to_mfn(unsigned long pfn)
38 { 43 {
39 if (xen_feature(XENFEAT_auto_translated_physmap)) 44 if (xen_feature(XENFEAT_auto_translated_physmap))
40 return pfn; 45 return pfn;
41 46
42 return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; 47 return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
43 } 48 }
44 49
45 static inline int phys_to_machine_mapping_valid(unsigned long pfn) 50 static inline int phys_to_machine_mapping_valid(unsigned long pfn)
46 { 51 {
47 if (xen_feature(XENFEAT_auto_translated_physmap)) 52 if (xen_feature(XENFEAT_auto_translated_physmap))
48 return 1; 53 return 1;
49 54
50 return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; 55 return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
51 } 56 }
52 57
53 static inline unsigned long mfn_to_pfn(unsigned long mfn) 58 static inline unsigned long mfn_to_pfn(unsigned long mfn)
54 { 59 {
55 unsigned long pfn; 60 unsigned long pfn;
56 61
57 if (xen_feature(XENFEAT_auto_translated_physmap)) 62 if (xen_feature(XENFEAT_auto_translated_physmap))
58 return mfn; 63 return mfn;
59 64
60 #if 0 65 #if 0
61 if (unlikely((mfn >> machine_to_phys_order) != 0)) 66 if (unlikely((mfn >> machine_to_phys_order) != 0))
62 return max_mapnr; 67 return max_mapnr;
63 #endif 68 #endif
64 69
65 pfn = 0; 70 pfn = 0;
66 /* 71 /*
67 * The array access can fail (e.g., device space beyond end of RAM). 72 * The array access can fail (e.g., device space beyond end of RAM).
68 * In such cases it doesn't matter what we return (we return garbage), 73 * In such cases it doesn't matter what we return (we return garbage),
69 * but we must handle the fault without crashing! 74 * but we must handle the fault without crashing!
70 */ 75 */
71 __get_user(pfn, &machine_to_phys_mapping[mfn]); 76 __get_user(pfn, &machine_to_phys_mapping[mfn]);
72 77
73 return pfn; 78 return pfn;
74 } 79 }
75 80
76 static inline xmaddr_t phys_to_machine(xpaddr_t phys) 81 static inline xmaddr_t phys_to_machine(xpaddr_t phys)
77 { 82 {
78 unsigned offset = phys.paddr & ~PAGE_MASK; 83 unsigned offset = phys.paddr & ~PAGE_MASK;
79 return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); 84 return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
80 } 85 }
81 86
82 static inline xpaddr_t machine_to_phys(xmaddr_t machine) 87 static inline xpaddr_t machine_to_phys(xmaddr_t machine)
83 { 88 {
84 unsigned offset = machine.maddr & ~PAGE_MASK; 89 unsigned offset = machine.maddr & ~PAGE_MASK;
85 return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); 90 return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
86 } 91 }
87 92
88 /* 93 /*
89 * We detect special mappings in one of two ways: 94 * We detect special mappings in one of two ways:
90 * 1. If the MFN is an I/O page then Xen will set the m2p entry 95 * 1. If the MFN is an I/O page then Xen will set the m2p entry
91 * to be outside our maximum possible pseudophys range. 96 * to be outside our maximum possible pseudophys range.
92 * 2. If the MFN belongs to a different domain then we will certainly 97 * 2. If the MFN belongs to a different domain then we will certainly
93 * not have MFN in our p2m table. Conversely, if the page is ours, 98 * not have MFN in our p2m table. Conversely, if the page is ours,
94 * then we'll have p2m(m2p(MFN))==MFN. 99 * then we'll have p2m(m2p(MFN))==MFN.
95 * If we detect a special mapping then it doesn't have a 'struct page'. 100 * If we detect a special mapping then it doesn't have a 'struct page'.
96 * We force !pfn_valid() by returning an out-of-range pointer. 101 * We force !pfn_valid() by returning an out-of-range pointer.
97 * 102 *
98 * NB. These checks require that, for any MFN that is not in our reservation, 103 * NB. These checks require that, for any MFN that is not in our reservation,
99 * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if 104 * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
100 * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. 105 * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
101 * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. 106 * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
102 * 107 *
103 * NB2. When deliberately mapping foreign pages into the p2m table, you *must* 108 * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
104 * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we 109 * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
105 * require. In all the cases we care about, the FOREIGN_FRAME bit is 110 * require. In all the cases we care about, the FOREIGN_FRAME bit is
106 * masked (e.g., pfn_to_mfn()) so behaviour there is correct. 111 * masked (e.g., pfn_to_mfn()) so behaviour there is correct.
107 */ 112 */
108 static inline unsigned long mfn_to_local_pfn(unsigned long mfn) 113 static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
109 { 114 {
110 extern unsigned long max_mapnr; 115 extern unsigned long max_mapnr;
111 unsigned long pfn = mfn_to_pfn(mfn); 116 unsigned long pfn = mfn_to_pfn(mfn);
112 if ((pfn < max_mapnr) 117 if ((pfn < max_mapnr)
113 && !xen_feature(XENFEAT_auto_translated_physmap) 118 && !xen_feature(XENFEAT_auto_translated_physmap)
114 && (get_phys_to_machine(pfn) != mfn)) 119 && (get_phys_to_machine(pfn) != mfn))
115 return max_mapnr; /* force !pfn_valid() */ 120 return max_mapnr; /* force !pfn_valid() */
116 /* XXX fixme; not true with sparsemem */ 121 /* XXX fixme; not true with sparsemem */
117 return pfn; 122 return pfn;
118 } 123 }
119 124
120 /* VIRT <-> MACHINE conversion */ 125 /* VIRT <-> MACHINE conversion */
121 #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) 126 #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
122 #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) 127 #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v))))
123 #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) 128 #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
124 129
125 static inline unsigned long pte_mfn(pte_t pte) 130 static inline unsigned long pte_mfn(pte_t pte)
126 { 131 {
127 return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; 132 return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
128 } 133 }
129 134
130 static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) 135 static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
131 { 136 {
132 pte_t pte; 137 pte_t pte;
133 138
134 pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | 139 pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) |
135 (pgprot_val(pgprot) & __supported_pte_mask); 140 (pgprot_val(pgprot) & __supported_pte_mask);
136 141
137 return pte; 142 return pte;
138 } 143 }
139 144
140 static inline pteval_t pte_val_ma(pte_t pte) 145 static inline pteval_t pte_val_ma(pte_t pte)
141 { 146 {
142 return pte.pte; 147 return pte.pte;
143 } 148 }
144 149
145 static inline pte_t __pte_ma(pteval_t x) 150 static inline pte_t __pte_ma(pteval_t x)
146 { 151 {
147 return (pte_t) { .pte = x }; 152 return (pte_t) { .pte = x };
148 } 153 }
149 154
150 #define pmd_val_ma(v) ((v).pmd) 155 #define pmd_val_ma(v) ((v).pmd)
151 #ifdef __PAGETABLE_PUD_FOLDED 156 #ifdef __PAGETABLE_PUD_FOLDED
152 #define pud_val_ma(v) ((v).pgd.pgd) 157 #define pud_val_ma(v) ((v).pgd.pgd)
153 #else 158 #else
154 #define pud_val_ma(v) ((v).pud) 159 #define pud_val_ma(v) ((v).pud)
155 #endif 160 #endif
156 #define __pmd_ma(x) ((pmd_t) { (x) } ) 161 #define __pmd_ma(x) ((pmd_t) { (x) } )
157 162
158 #define pgd_val_ma(x) ((x).pgd) 163 #define pgd_val_ma(x) ((x).pgd)
159 164
160 165
161 xmaddr_t arbitrary_virt_to_machine(void *address); 166 xmaddr_t arbitrary_virt_to_machine(void *address);
162 void make_lowmem_page_readonly(void *vaddr); 167 void make_lowmem_page_readonly(void *vaddr);
163 void make_lowmem_page_readwrite(void *vaddr); 168 void make_lowmem_page_readwrite(void *vaddr);
164 169
165 #endif /* _ASM_X86_XEN_PAGE_H */ 170 #endif /* _ASM_X86_XEN_PAGE_H */
166 171
arch/x86/xen/enlighten.c
1 /* 1 /*
2 * Core of Xen paravirt_ops implementation. 2 * Core of Xen paravirt_ops implementation.
3 * 3 *
4 * This file contains the xen_paravirt_ops structure itself, and the 4 * This file contains the xen_paravirt_ops structure itself, and the
5 * implementations for: 5 * implementations for:
6 * - privileged instructions 6 * - privileged instructions
7 * - interrupt flags 7 * - interrupt flags
8 * - segment operations 8 * - segment operations
9 * - booting and setup 9 * - booting and setup
10 * 10 *
11 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 11 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
12 */ 12 */
13 13
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/init.h> 15 #include <linux/init.h>
16 #include <linux/smp.h> 16 #include <linux/smp.h>
17 #include <linux/preempt.h> 17 #include <linux/preempt.h>
18 #include <linux/hardirq.h> 18 #include <linux/hardirq.h>
19 #include <linux/percpu.h> 19 #include <linux/percpu.h>
20 #include <linux/delay.h> 20 #include <linux/delay.h>
21 #include <linux/start_kernel.h> 21 #include <linux/start_kernel.h>
22 #include <linux/sched.h> 22 #include <linux/sched.h>
23 #include <linux/bootmem.h> 23 #include <linux/bootmem.h>
24 #include <linux/module.h> 24 #include <linux/module.h>
25 #include <linux/mm.h> 25 #include <linux/mm.h>
26 #include <linux/page-flags.h> 26 #include <linux/page-flags.h>
27 #include <linux/highmem.h> 27 #include <linux/highmem.h>
28 #include <linux/console.h> 28 #include <linux/console.h>
29 29
30 #include <xen/interface/xen.h> 30 #include <xen/interface/xen.h>
31 #include <xen/interface/version.h>
31 #include <xen/interface/physdev.h> 32 #include <xen/interface/physdev.h>
32 #include <xen/interface/vcpu.h> 33 #include <xen/interface/vcpu.h>
33 #include <xen/features.h> 34 #include <xen/features.h>
34 #include <xen/page.h> 35 #include <xen/page.h>
35 #include <xen/hvc-console.h> 36 #include <xen/hvc-console.h>
36 37
37 #include <asm/paravirt.h> 38 #include <asm/paravirt.h>
38 #include <asm/apic.h> 39 #include <asm/apic.h>
39 #include <asm/page.h> 40 #include <asm/page.h>
40 #include <asm/xen/hypercall.h> 41 #include <asm/xen/hypercall.h>
41 #include <asm/xen/hypervisor.h> 42 #include <asm/xen/hypervisor.h>
42 #include <asm/fixmap.h> 43 #include <asm/fixmap.h>
43 #include <asm/processor.h> 44 #include <asm/processor.h>
44 #include <asm/msr-index.h> 45 #include <asm/msr-index.h>
45 #include <asm/setup.h> 46 #include <asm/setup.h>
46 #include <asm/desc.h> 47 #include <asm/desc.h>
47 #include <asm/pgtable.h> 48 #include <asm/pgtable.h>
48 #include <asm/tlbflush.h> 49 #include <asm/tlbflush.h>
49 #include <asm/reboot.h> 50 #include <asm/reboot.h>
50 51
51 #include "xen-ops.h" 52 #include "xen-ops.h"
52 #include "mmu.h" 53 #include "mmu.h"
53 #include "multicalls.h" 54 #include "multicalls.h"
54 55
55 EXPORT_SYMBOL_GPL(hypercall_page); 56 EXPORT_SYMBOL_GPL(hypercall_page);
56 57
57 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); 58 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
58 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); 59 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
59 60
60 enum xen_domain_type xen_domain_type = XEN_NATIVE; 61 enum xen_domain_type xen_domain_type = XEN_NATIVE;
61 EXPORT_SYMBOL_GPL(xen_domain_type); 62 EXPORT_SYMBOL_GPL(xen_domain_type);
62 63
63 /* 64 /*
64 * Identity map, in addition to plain kernel map. This needs to be 65 * Identity map, in addition to plain kernel map. This needs to be
65 * large enough to allocate page table pages to allocate the rest. 66 * large enough to allocate page table pages to allocate the rest.
66 * Each page can map 2MB. 67 * Each page can map 2MB.
67 */ 68 */
68 static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; 69 static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
69 70
70 #ifdef CONFIG_X86_64 71 #ifdef CONFIG_X86_64
71 /* l3 pud for userspace vsyscall mapping */ 72 /* l3 pud for userspace vsyscall mapping */
72 static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; 73 static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
73 #endif /* CONFIG_X86_64 */ 74 #endif /* CONFIG_X86_64 */
74 75
75 /* 76 /*
76 * Note about cr3 (pagetable base) values: 77 * Note about cr3 (pagetable base) values:
77 * 78 *
78 * xen_cr3 contains the current logical cr3 value; it contains the 79 * xen_cr3 contains the current logical cr3 value; it contains the
79 * last set cr3. This may not be the current effective cr3, because 80 * last set cr3. This may not be the current effective cr3, because
80 * its update may be being lazily deferred. However, a vcpu looking 81 * its update may be being lazily deferred. However, a vcpu looking
81 * at its own cr3 can use this value knowing that it everything will 82 * at its own cr3 can use this value knowing that it everything will
82 * be self-consistent. 83 * be self-consistent.
83 * 84 *
84 * xen_current_cr3 contains the actual vcpu cr3; it is set once the 85 * xen_current_cr3 contains the actual vcpu cr3; it is set once the
85 * hypercall to set the vcpu cr3 is complete (so it may be a little 86 * hypercall to set the vcpu cr3 is complete (so it may be a little
86 * out of date, but it will never be set early). If one vcpu is 87 * out of date, but it will never be set early). If one vcpu is
87 * looking at another vcpu's cr3 value, it should use this variable. 88 * looking at another vcpu's cr3 value, it should use this variable.
88 */ 89 */
89 DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ 90 DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
90 DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ 91 DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
91 92
92 struct start_info *xen_start_info; 93 struct start_info *xen_start_info;
93 EXPORT_SYMBOL_GPL(xen_start_info); 94 EXPORT_SYMBOL_GPL(xen_start_info);
94 95
95 struct shared_info xen_dummy_shared_info; 96 struct shared_info xen_dummy_shared_info;
96 97
97 /* 98 /*
98 * Point at some empty memory to start with. We map the real shared_info 99 * Point at some empty memory to start with. We map the real shared_info
99 * page as soon as fixmap is up and running. 100 * page as soon as fixmap is up and running.
100 */ 101 */
101 struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; 102 struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
102 103
103 /* 104 /*
104 * Flag to determine whether vcpu info placement is available on all 105 * Flag to determine whether vcpu info placement is available on all
105 * VCPUs. We assume it is to start with, and then set it to zero on 106 * VCPUs. We assume it is to start with, and then set it to zero on
106 * the first failure. This is because it can succeed on some VCPUs 107 * the first failure. This is because it can succeed on some VCPUs
107 * and not others, since it can involve hypervisor memory allocation, 108 * and not others, since it can involve hypervisor memory allocation,
108 * or because the guest failed to guarantee all the appropriate 109 * or because the guest failed to guarantee all the appropriate
109 * constraints on all VCPUs (ie buffer can't cross a page boundary). 110 * constraints on all VCPUs (ie buffer can't cross a page boundary).
110 * 111 *
111 * Note that any particular CPU may be using a placed vcpu structure, 112 * Note that any particular CPU may be using a placed vcpu structure,
112 * but we can only optimise if the all are. 113 * but we can only optimise if the all are.
113 * 114 *
114 * 0: not available, 1: available 115 * 0: not available, 1: available
115 */ 116 */
116 static int have_vcpu_info_placement = 117 static int have_vcpu_info_placement =
117 #ifdef CONFIG_X86_32 118 #ifdef CONFIG_X86_32
118 1 119 1
119 #else 120 #else
120 0 121 0
121 #endif 122 #endif
122 ; 123 ;
123 124
124 125
125 static void xen_vcpu_setup(int cpu) 126 static void xen_vcpu_setup(int cpu)
126 { 127 {
127 struct vcpu_register_vcpu_info info; 128 struct vcpu_register_vcpu_info info;
128 int err; 129 int err;
129 struct vcpu_info *vcpup; 130 struct vcpu_info *vcpup;
130 131
131 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 132 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
132 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 133 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
133 134
134 if (!have_vcpu_info_placement) 135 if (!have_vcpu_info_placement)
135 return; /* already tested, not available */ 136 return; /* already tested, not available */
136 137
137 vcpup = &per_cpu(xen_vcpu_info, cpu); 138 vcpup = &per_cpu(xen_vcpu_info, cpu);
138 139
139 info.mfn = virt_to_mfn(vcpup); 140 info.mfn = virt_to_mfn(vcpup);
140 info.offset = offset_in_page(vcpup); 141 info.offset = offset_in_page(vcpup);
141 142
142 printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", 143 printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
143 cpu, vcpup, info.mfn, info.offset); 144 cpu, vcpup, info.mfn, info.offset);
144 145
145 /* Check to see if the hypervisor will put the vcpu_info 146 /* Check to see if the hypervisor will put the vcpu_info
146 structure where we want it, which allows direct access via 147 structure where we want it, which allows direct access via
147 a percpu-variable. */ 148 a percpu-variable. */
148 err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); 149 err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
149 150
150 if (err) { 151 if (err) {
151 printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); 152 printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
152 have_vcpu_info_placement = 0; 153 have_vcpu_info_placement = 0;
153 } else { 154 } else {
154 /* This cpu is using the registered vcpu info, even if 155 /* This cpu is using the registered vcpu info, even if
155 later ones fail to. */ 156 later ones fail to. */
156 per_cpu(xen_vcpu, cpu) = vcpup; 157 per_cpu(xen_vcpu, cpu) = vcpup;
157 158
158 printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n", 159 printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
159 cpu, vcpup); 160 cpu, vcpup);
160 } 161 }
161 } 162 }
162 163
163 /* 164 /*
164 * On restore, set the vcpu placement up again. 165 * On restore, set the vcpu placement up again.
165 * If it fails, then we're in a bad state, since 166 * If it fails, then we're in a bad state, since
166 * we can't back out from using it... 167 * we can't back out from using it...
167 */ 168 */
168 void xen_vcpu_restore(void) 169 void xen_vcpu_restore(void)
169 { 170 {
170 if (have_vcpu_info_placement) { 171 if (have_vcpu_info_placement) {
171 int cpu; 172 int cpu;
172 173
173 for_each_online_cpu(cpu) { 174 for_each_online_cpu(cpu) {
174 bool other_cpu = (cpu != smp_processor_id()); 175 bool other_cpu = (cpu != smp_processor_id());
175 176
176 if (other_cpu && 177 if (other_cpu &&
177 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) 178 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
178 BUG(); 179 BUG();
179 180
180 xen_vcpu_setup(cpu); 181 xen_vcpu_setup(cpu);
181 182
182 if (other_cpu && 183 if (other_cpu &&
183 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) 184 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
184 BUG(); 185 BUG();
185 } 186 }
186 187
187 BUG_ON(!have_vcpu_info_placement); 188 BUG_ON(!have_vcpu_info_placement);
188 } 189 }
189 } 190 }
190 191
191 static void __init xen_banner(void) 192 static void __init xen_banner(void)
192 { 193 {
193 unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); 194 unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
194 struct xen_extraversion extra; 195 struct xen_extraversion extra;
195 HYPERVISOR_xen_version(XENVER_extraversion, &extra); 196 HYPERVISOR_xen_version(XENVER_extraversion, &extra);
196 197
197 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 198 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
198 pv_info.name); 199 pv_info.name);
199 printk(KERN_INFO "Xen version: %d.%d%s%s\n", 200 printk(KERN_INFO "Xen version: %d.%d%s%s\n",
200 version >> 16, version & 0xffff, extra.extraversion, 201 version >> 16, version & 0xffff, extra.extraversion,
201 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 202 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
202 } 203 }
203 204
204 static void xen_cpuid(unsigned int *ax, unsigned int *bx, 205 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
205 unsigned int *cx, unsigned int *dx) 206 unsigned int *cx, unsigned int *dx)
206 { 207 {
207 unsigned maskedx = ~0; 208 unsigned maskedx = ~0;
208 209
209 /* 210 /*
210 * Mask out inconvenient features, to try and disable as many 211 * Mask out inconvenient features, to try and disable as many
211 * unsupported kernel subsystems as possible. 212 * unsupported kernel subsystems as possible.
212 */ 213 */
213 if (*ax == 1) 214 if (*ax == 1)
214 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ 215 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
215 (1 << X86_FEATURE_ACPI) | /* disable ACPI */ 216 (1 << X86_FEATURE_ACPI) | /* disable ACPI */
216 (1 << X86_FEATURE_MCE) | /* disable MCE */ 217 (1 << X86_FEATURE_MCE) | /* disable MCE */
217 (1 << X86_FEATURE_MCA) | /* disable MCA */ 218 (1 << X86_FEATURE_MCA) | /* disable MCA */
218 (1 << X86_FEATURE_ACC)); /* thermal monitoring */ 219 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
219 220
220 asm(XEN_EMULATE_PREFIX "cpuid" 221 asm(XEN_EMULATE_PREFIX "cpuid"
221 : "=a" (*ax), 222 : "=a" (*ax),
222 "=b" (*bx), 223 "=b" (*bx),
223 "=c" (*cx), 224 "=c" (*cx),
224 "=d" (*dx) 225 "=d" (*dx)
225 : "0" (*ax), "2" (*cx)); 226 : "0" (*ax), "2" (*cx));
226 *dx &= maskedx; 227 *dx &= maskedx;
227 } 228 }
228 229
229 static void xen_set_debugreg(int reg, unsigned long val) 230 static void xen_set_debugreg(int reg, unsigned long val)
230 { 231 {
231 HYPERVISOR_set_debugreg(reg, val); 232 HYPERVISOR_set_debugreg(reg, val);
232 } 233 }
233 234
234 static unsigned long xen_get_debugreg(int reg) 235 static unsigned long xen_get_debugreg(int reg)
235 { 236 {
236 return HYPERVISOR_get_debugreg(reg); 237 return HYPERVISOR_get_debugreg(reg);
237 } 238 }
238 239
239 static void xen_leave_lazy(void) 240 static void xen_leave_lazy(void)
240 { 241 {
241 paravirt_leave_lazy(paravirt_get_lazy_mode()); 242 paravirt_leave_lazy(paravirt_get_lazy_mode());
242 xen_mc_flush(); 243 xen_mc_flush();
243 } 244 }
244 245
245 static unsigned long xen_store_tr(void) 246 static unsigned long xen_store_tr(void)
246 { 247 {
247 return 0; 248 return 0;
248 } 249 }
249 250
250 /* 251 /*
251 * Set the page permissions for a particular virtual address. If the 252 * Set the page permissions for a particular virtual address. If the
252 * address is a vmalloc mapping (or other non-linear mapping), then 253 * address is a vmalloc mapping (or other non-linear mapping), then
253 * find the linear mapping of the page and also set its protections to 254 * find the linear mapping of the page and also set its protections to
254 * match. 255 * match.
255 */ 256 */
256 static void set_aliased_prot(void *v, pgprot_t prot) 257 static void set_aliased_prot(void *v, pgprot_t prot)
257 { 258 {
258 int level; 259 int level;
259 pte_t *ptep; 260 pte_t *ptep;
260 pte_t pte; 261 pte_t pte;
261 unsigned long pfn; 262 unsigned long pfn;
262 struct page *page; 263 struct page *page;
263 264
264 ptep = lookup_address((unsigned long)v, &level); 265 ptep = lookup_address((unsigned long)v, &level);
265 BUG_ON(ptep == NULL); 266 BUG_ON(ptep == NULL);
266 267
267 pfn = pte_pfn(*ptep); 268 pfn = pte_pfn(*ptep);
268 page = pfn_to_page(pfn); 269 page = pfn_to_page(pfn);
269 270
270 pte = pfn_pte(pfn, prot); 271 pte = pfn_pte(pfn, prot);
271 272
272 if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) 273 if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
273 BUG(); 274 BUG();
274 275
275 if (!PageHighMem(page)) { 276 if (!PageHighMem(page)) {
276 void *av = __va(PFN_PHYS(pfn)); 277 void *av = __va(PFN_PHYS(pfn));
277 278
278 if (av != v) 279 if (av != v)
279 if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) 280 if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
280 BUG(); 281 BUG();
281 } else 282 } else
282 kmap_flush_unused(); 283 kmap_flush_unused();
283 } 284 }
284 285
285 static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) 286 static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
286 { 287 {
287 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; 288 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
288 int i; 289 int i;
289 290
290 for(i = 0; i < entries; i += entries_per_page) 291 for(i = 0; i < entries; i += entries_per_page)
291 set_aliased_prot(ldt + i, PAGE_KERNEL_RO); 292 set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
292 } 293 }
293 294
294 static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) 295 static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
295 { 296 {
296 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; 297 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
297 int i; 298 int i;
298 299
299 for(i = 0; i < entries; i += entries_per_page) 300 for(i = 0; i < entries; i += entries_per_page)
300 set_aliased_prot(ldt + i, PAGE_KERNEL); 301 set_aliased_prot(ldt + i, PAGE_KERNEL);
301 } 302 }
302 303
303 static void xen_set_ldt(const void *addr, unsigned entries) 304 static void xen_set_ldt(const void *addr, unsigned entries)
304 { 305 {
305 struct mmuext_op *op; 306 struct mmuext_op *op;
306 struct multicall_space mcs = xen_mc_entry(sizeof(*op)); 307 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
307 308
308 op = mcs.args; 309 op = mcs.args;
309 op->cmd = MMUEXT_SET_LDT; 310 op->cmd = MMUEXT_SET_LDT;
310 op->arg1.linear_addr = (unsigned long)addr; 311 op->arg1.linear_addr = (unsigned long)addr;
311 op->arg2.nr_ents = entries; 312 op->arg2.nr_ents = entries;
312 313
313 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 314 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
314 315
315 xen_mc_issue(PARAVIRT_LAZY_CPU); 316 xen_mc_issue(PARAVIRT_LAZY_CPU);
316 } 317 }
317 318
318 static void xen_load_gdt(const struct desc_ptr *dtr) 319 static void xen_load_gdt(const struct desc_ptr *dtr)
319 { 320 {
320 unsigned long *frames; 321 unsigned long *frames;
321 unsigned long va = dtr->address; 322 unsigned long va = dtr->address;
322 unsigned int size = dtr->size + 1; 323 unsigned int size = dtr->size + 1;
323 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; 324 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
324 int f; 325 int f;
325 struct multicall_space mcs; 326 struct multicall_space mcs;
326 327
327 /* A GDT can be up to 64k in size, which corresponds to 8192 328 /* A GDT can be up to 64k in size, which corresponds to 8192
328 8-byte entries, or 16 4k pages.. */ 329 8-byte entries, or 16 4k pages.. */
329 330
330 BUG_ON(size > 65536); 331 BUG_ON(size > 65536);
331 BUG_ON(va & ~PAGE_MASK); 332 BUG_ON(va & ~PAGE_MASK);
332 333
333 mcs = xen_mc_entry(sizeof(*frames) * pages); 334 mcs = xen_mc_entry(sizeof(*frames) * pages);
334 frames = mcs.args; 335 frames = mcs.args;
335 336
336 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { 337 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
337 frames[f] = virt_to_mfn(va); 338 frames[f] = virt_to_mfn(va);
338 make_lowmem_page_readonly((void *)va); 339 make_lowmem_page_readonly((void *)va);
339 } 340 }
340 341
341 MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); 342 MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
342 343
343 xen_mc_issue(PARAVIRT_LAZY_CPU); 344 xen_mc_issue(PARAVIRT_LAZY_CPU);
344 } 345 }
345 346
346 static void load_TLS_descriptor(struct thread_struct *t, 347 static void load_TLS_descriptor(struct thread_struct *t,
347 unsigned int cpu, unsigned int i) 348 unsigned int cpu, unsigned int i)
348 { 349 {
349 struct desc_struct *gdt = get_cpu_gdt_table(cpu); 350 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
350 xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); 351 xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
351 struct multicall_space mc = __xen_mc_entry(0); 352 struct multicall_space mc = __xen_mc_entry(0);
352 353
353 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); 354 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
354 } 355 }
355 356
356 static void xen_load_tls(struct thread_struct *t, unsigned int cpu) 357 static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
357 { 358 {
358 /* 359 /*
359 * XXX sleazy hack: If we're being called in a lazy-cpu zone, 360 * XXX sleazy hack: If we're being called in a lazy-cpu zone,
360 * it means we're in a context switch, and %gs has just been 361 * it means we're in a context switch, and %gs has just been
361 * saved. This means we can zero it out to prevent faults on 362 * saved. This means we can zero it out to prevent faults on
362 * exit from the hypervisor if the next process has no %gs. 363 * exit from the hypervisor if the next process has no %gs.
363 * Either way, it has been saved, and the new value will get 364 * Either way, it has been saved, and the new value will get
364 * loaded properly. This will go away as soon as Xen has been 365 * loaded properly. This will go away as soon as Xen has been
365 * modified to not save/restore %gs for normal hypercalls. 366 * modified to not save/restore %gs for normal hypercalls.
366 * 367 *
367 * On x86_64, this hack is not used for %gs, because gs points 368 * On x86_64, this hack is not used for %gs, because gs points
368 * to KERNEL_GS_BASE (and uses it for PDA references), so we 369 * to KERNEL_GS_BASE (and uses it for PDA references), so we
369 * must not zero %gs on x86_64 370 * must not zero %gs on x86_64
370 * 371 *
371 * For x86_64, we need to zero %fs, otherwise we may get an 372 * For x86_64, we need to zero %fs, otherwise we may get an
372 * exception between the new %fs descriptor being loaded and 373 * exception between the new %fs descriptor being loaded and
373 * %fs being effectively cleared at __switch_to(). 374 * %fs being effectively cleared at __switch_to().
374 */ 375 */
375 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { 376 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
376 #ifdef CONFIG_X86_32 377 #ifdef CONFIG_X86_32
377 loadsegment(gs, 0); 378 loadsegment(gs, 0);
378 #else 379 #else
379 loadsegment(fs, 0); 380 loadsegment(fs, 0);
380 #endif 381 #endif
381 } 382 }
382 383
383 xen_mc_batch(); 384 xen_mc_batch();
384 385
385 load_TLS_descriptor(t, cpu, 0); 386 load_TLS_descriptor(t, cpu, 0);
386 load_TLS_descriptor(t, cpu, 1); 387 load_TLS_descriptor(t, cpu, 1);
387 load_TLS_descriptor(t, cpu, 2); 388 load_TLS_descriptor(t, cpu, 2);
388 389
389 xen_mc_issue(PARAVIRT_LAZY_CPU); 390 xen_mc_issue(PARAVIRT_LAZY_CPU);
390 } 391 }
391 392
392 #ifdef CONFIG_X86_64 393 #ifdef CONFIG_X86_64
393 static void xen_load_gs_index(unsigned int idx) 394 static void xen_load_gs_index(unsigned int idx)
394 { 395 {
395 if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) 396 if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
396 BUG(); 397 BUG();
397 } 398 }
398 #endif 399 #endif
399 400
400 static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, 401 static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
401 const void *ptr) 402 const void *ptr)
402 { 403 {
403 xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); 404 xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
404 u64 entry = *(u64 *)ptr; 405 u64 entry = *(u64 *)ptr;
405 406
406 preempt_disable(); 407 preempt_disable();
407 408
408 xen_mc_flush(); 409 xen_mc_flush();
409 if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) 410 if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
410 BUG(); 411 BUG();
411 412
412 preempt_enable(); 413 preempt_enable();
413 } 414 }
414 415
415 static int cvt_gate_to_trap(int vector, const gate_desc *val, 416 static int cvt_gate_to_trap(int vector, const gate_desc *val,
416 struct trap_info *info) 417 struct trap_info *info)
417 { 418 {
418 if (val->type != 0xf && val->type != 0xe) 419 if (val->type != 0xf && val->type != 0xe)
419 return 0; 420 return 0;
420 421
421 info->vector = vector; 422 info->vector = vector;
422 info->address = gate_offset(*val); 423 info->address = gate_offset(*val);
423 info->cs = gate_segment(*val); 424 info->cs = gate_segment(*val);
424 info->flags = val->dpl; 425 info->flags = val->dpl;
425 /* interrupt gates clear IF */ 426 /* interrupt gates clear IF */
426 if (val->type == 0xe) 427 if (val->type == 0xe)
427 info->flags |= 4; 428 info->flags |= 4;
428 429
429 return 1; 430 return 1;
430 } 431 }
431 432
432 /* Locations of each CPU's IDT */ 433 /* Locations of each CPU's IDT */
433 static DEFINE_PER_CPU(struct desc_ptr, idt_desc); 434 static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
434 435
435 /* Set an IDT entry. If the entry is part of the current IDT, then 436 /* Set an IDT entry. If the entry is part of the current IDT, then
436 also update Xen. */ 437 also update Xen. */
437 static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) 438 static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
438 { 439 {
439 unsigned long p = (unsigned long)&dt[entrynum]; 440 unsigned long p = (unsigned long)&dt[entrynum];
440 unsigned long start, end; 441 unsigned long start, end;
441 442
442 preempt_disable(); 443 preempt_disable();
443 444
444 start = __get_cpu_var(idt_desc).address; 445 start = __get_cpu_var(idt_desc).address;
445 end = start + __get_cpu_var(idt_desc).size + 1; 446 end = start + __get_cpu_var(idt_desc).size + 1;
446 447
447 xen_mc_flush(); 448 xen_mc_flush();
448 449
449 native_write_idt_entry(dt, entrynum, g); 450 native_write_idt_entry(dt, entrynum, g);
450 451
451 if (p >= start && (p + 8) <= end) { 452 if (p >= start && (p + 8) <= end) {
452 struct trap_info info[2]; 453 struct trap_info info[2];
453 454
454 info[1].address = 0; 455 info[1].address = 0;
455 456
456 if (cvt_gate_to_trap(entrynum, g, &info[0])) 457 if (cvt_gate_to_trap(entrynum, g, &info[0]))
457 if (HYPERVISOR_set_trap_table(info)) 458 if (HYPERVISOR_set_trap_table(info))
458 BUG(); 459 BUG();
459 } 460 }
460 461
461 preempt_enable(); 462 preempt_enable();
462 } 463 }
463 464
464 static void xen_convert_trap_info(const struct desc_ptr *desc, 465 static void xen_convert_trap_info(const struct desc_ptr *desc,
465 struct trap_info *traps) 466 struct trap_info *traps)
466 { 467 {
467 unsigned in, out, count; 468 unsigned in, out, count;
468 469
469 count = (desc->size+1) / sizeof(gate_desc); 470 count = (desc->size+1) / sizeof(gate_desc);
470 BUG_ON(count > 256); 471 BUG_ON(count > 256);
471 472
472 for (in = out = 0; in < count; in++) { 473 for (in = out = 0; in < count; in++) {
473 gate_desc *entry = (gate_desc*)(desc->address) + in; 474 gate_desc *entry = (gate_desc*)(desc->address) + in;
474 475
475 if (cvt_gate_to_trap(in, entry, &traps[out])) 476 if (cvt_gate_to_trap(in, entry, &traps[out]))
476 out++; 477 out++;
477 } 478 }
478 traps[out].address = 0; 479 traps[out].address = 0;
479 } 480 }
480 481
481 void xen_copy_trap_info(struct trap_info *traps) 482 void xen_copy_trap_info(struct trap_info *traps)
482 { 483 {
483 const struct desc_ptr *desc = &__get_cpu_var(idt_desc); 484 const struct desc_ptr *desc = &__get_cpu_var(idt_desc);
484 485
485 xen_convert_trap_info(desc, traps); 486 xen_convert_trap_info(desc, traps);
486 } 487 }
487 488
488 /* Load a new IDT into Xen. In principle this can be per-CPU, so we 489 /* Load a new IDT into Xen. In principle this can be per-CPU, so we
489 hold a spinlock to protect the static traps[] array (static because 490 hold a spinlock to protect the static traps[] array (static because
490 it avoids allocation, and saves stack space). */ 491 it avoids allocation, and saves stack space). */
491 static void xen_load_idt(const struct desc_ptr *desc) 492 static void xen_load_idt(const struct desc_ptr *desc)
492 { 493 {
493 static DEFINE_SPINLOCK(lock); 494 static DEFINE_SPINLOCK(lock);
494 static struct trap_info traps[257]; 495 static struct trap_info traps[257];
495 496
496 spin_lock(&lock); 497 spin_lock(&lock);
497 498
498 __get_cpu_var(idt_desc) = *desc; 499 __get_cpu_var(idt_desc) = *desc;
499 500
500 xen_convert_trap_info(desc, traps); 501 xen_convert_trap_info(desc, traps);
501 502
502 xen_mc_flush(); 503 xen_mc_flush();
503 if (HYPERVISOR_set_trap_table(traps)) 504 if (HYPERVISOR_set_trap_table(traps))
504 BUG(); 505 BUG();
505 506
506 spin_unlock(&lock); 507 spin_unlock(&lock);
507 } 508 }
508 509
509 /* Write a GDT descriptor entry. Ignore LDT descriptors, since 510 /* Write a GDT descriptor entry. Ignore LDT descriptors, since
510 they're handled differently. */ 511 they're handled differently. */
511 static void xen_write_gdt_entry(struct desc_struct *dt, int entry, 512 static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
512 const void *desc, int type) 513 const void *desc, int type)
513 { 514 {
514 preempt_disable(); 515 preempt_disable();
515 516
516 switch (type) { 517 switch (type) {
517 case DESC_LDT: 518 case DESC_LDT:
518 case DESC_TSS: 519 case DESC_TSS:
519 /* ignore */ 520 /* ignore */
520 break; 521 break;
521 522
522 default: { 523 default: {
523 xmaddr_t maddr = virt_to_machine(&dt[entry]); 524 xmaddr_t maddr = virt_to_machine(&dt[entry]);
524 525
525 xen_mc_flush(); 526 xen_mc_flush();
526 if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) 527 if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
527 BUG(); 528 BUG();
528 } 529 }
529 530
530 } 531 }
531 532
532 preempt_enable(); 533 preempt_enable();
533 } 534 }
534 535
535 static void xen_load_sp0(struct tss_struct *tss, 536 static void xen_load_sp0(struct tss_struct *tss,
536 struct thread_struct *thread) 537 struct thread_struct *thread)
537 { 538 {
538 struct multicall_space mcs = xen_mc_entry(0); 539 struct multicall_space mcs = xen_mc_entry(0);
539 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); 540 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
540 xen_mc_issue(PARAVIRT_LAZY_CPU); 541 xen_mc_issue(PARAVIRT_LAZY_CPU);
541 } 542 }
542 543
543 static void xen_set_iopl_mask(unsigned mask) 544 static void xen_set_iopl_mask(unsigned mask)
544 { 545 {
545 struct physdev_set_iopl set_iopl; 546 struct physdev_set_iopl set_iopl;
546 547
547 /* Force the change at ring 0. */ 548 /* Force the change at ring 0. */
548 set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; 549 set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
549 HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 550 HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
550 } 551 }
551 552
552 static void xen_io_delay(void) 553 static void xen_io_delay(void)
553 { 554 {
554 } 555 }
555 556
556 #ifdef CONFIG_X86_LOCAL_APIC 557 #ifdef CONFIG_X86_LOCAL_APIC
557 static u32 xen_apic_read(u32 reg) 558 static u32 xen_apic_read(u32 reg)
558 { 559 {
559 return 0; 560 return 0;
560 } 561 }
561 562
562 static void xen_apic_write(u32 reg, u32 val) 563 static void xen_apic_write(u32 reg, u32 val)
563 { 564 {
564 /* Warn to see if there's any stray references */ 565 /* Warn to see if there's any stray references */
565 WARN_ON(1); 566 WARN_ON(1);
566 } 567 }
567 568
568 static u64 xen_apic_icr_read(void) 569 static u64 xen_apic_icr_read(void)
569 { 570 {
570 return 0; 571 return 0;
571 } 572 }
572 573
573 static void xen_apic_icr_write(u32 low, u32 id) 574 static void xen_apic_icr_write(u32 low, u32 id)
574 { 575 {
575 /* Warn to see if there's any stray references */ 576 /* Warn to see if there's any stray references */
576 WARN_ON(1); 577 WARN_ON(1);
577 } 578 }
578 579
579 static void xen_apic_wait_icr_idle(void) 580 static void xen_apic_wait_icr_idle(void)
580 { 581 {
581 return; 582 return;
582 } 583 }
583 584
584 static u32 xen_safe_apic_wait_icr_idle(void) 585 static u32 xen_safe_apic_wait_icr_idle(void)
585 { 586 {
586 return 0; 587 return 0;
587 } 588 }
588 589
589 static struct apic_ops xen_basic_apic_ops = { 590 static struct apic_ops xen_basic_apic_ops = {
590 .read = xen_apic_read, 591 .read = xen_apic_read,
591 .write = xen_apic_write, 592 .write = xen_apic_write,
592 .icr_read = xen_apic_icr_read, 593 .icr_read = xen_apic_icr_read,
593 .icr_write = xen_apic_icr_write, 594 .icr_write = xen_apic_icr_write,
594 .wait_icr_idle = xen_apic_wait_icr_idle, 595 .wait_icr_idle = xen_apic_wait_icr_idle,
595 .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, 596 .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
596 }; 597 };
597 598
598 #endif 599 #endif
599 600
600 static void xen_flush_tlb(void) 601 static void xen_flush_tlb(void)
601 { 602 {
602 struct mmuext_op *op; 603 struct mmuext_op *op;
603 struct multicall_space mcs; 604 struct multicall_space mcs;
604 605
605 preempt_disable(); 606 preempt_disable();
606 607
607 mcs = xen_mc_entry(sizeof(*op)); 608 mcs = xen_mc_entry(sizeof(*op));
608 609
609 op = mcs.args; 610 op = mcs.args;
610 op->cmd = MMUEXT_TLB_FLUSH_LOCAL; 611 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
611 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 612 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
612 613
613 xen_mc_issue(PARAVIRT_LAZY_MMU); 614 xen_mc_issue(PARAVIRT_LAZY_MMU);
614 615
615 preempt_enable(); 616 preempt_enable();
616 } 617 }
617 618
618 static void xen_flush_tlb_single(unsigned long addr) 619 static void xen_flush_tlb_single(unsigned long addr)
619 { 620 {
620 struct mmuext_op *op; 621 struct mmuext_op *op;
621 struct multicall_space mcs; 622 struct multicall_space mcs;
622 623
623 preempt_disable(); 624 preempt_disable();
624 625
625 mcs = xen_mc_entry(sizeof(*op)); 626 mcs = xen_mc_entry(sizeof(*op));
626 op = mcs.args; 627 op = mcs.args;
627 op->cmd = MMUEXT_INVLPG_LOCAL; 628 op->cmd = MMUEXT_INVLPG_LOCAL;
628 op->arg1.linear_addr = addr & PAGE_MASK; 629 op->arg1.linear_addr = addr & PAGE_MASK;
629 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 630 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
630 631
631 xen_mc_issue(PARAVIRT_LAZY_MMU); 632 xen_mc_issue(PARAVIRT_LAZY_MMU);
632 633
633 preempt_enable(); 634 preempt_enable();
634 } 635 }
635 636
636 static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, 637 static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
637 unsigned long va) 638 unsigned long va)
638 { 639 {
639 struct { 640 struct {
640 struct mmuext_op op; 641 struct mmuext_op op;
641 cpumask_t mask; 642 cpumask_t mask;
642 } *args; 643 } *args;
643 cpumask_t cpumask = *cpus; 644 cpumask_t cpumask = *cpus;
644 struct multicall_space mcs; 645 struct multicall_space mcs;
645 646
646 /* 647 /*
647 * A couple of (to be removed) sanity checks: 648 * A couple of (to be removed) sanity checks:
648 * 649 *
649 * - current CPU must not be in mask 650 * - current CPU must not be in mask
650 * - mask must exist :) 651 * - mask must exist :)
651 */ 652 */
652 BUG_ON(cpus_empty(cpumask)); 653 BUG_ON(cpus_empty(cpumask));
653 BUG_ON(cpu_isset(smp_processor_id(), cpumask)); 654 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
654 BUG_ON(!mm); 655 BUG_ON(!mm);
655 656
656 /* If a CPU which we ran on has gone down, OK. */ 657 /* If a CPU which we ran on has gone down, OK. */
657 cpus_and(cpumask, cpumask, cpu_online_map); 658 cpus_and(cpumask, cpumask, cpu_online_map);
658 if (cpus_empty(cpumask)) 659 if (cpus_empty(cpumask))
659 return; 660 return;
660 661
661 mcs = xen_mc_entry(sizeof(*args)); 662 mcs = xen_mc_entry(sizeof(*args));
662 args = mcs.args; 663 args = mcs.args;
663 args->mask = cpumask; 664 args->mask = cpumask;
664 args->op.arg2.vcpumask = &args->mask; 665 args->op.arg2.vcpumask = &args->mask;
665 666
666 if (va == TLB_FLUSH_ALL) { 667 if (va == TLB_FLUSH_ALL) {
667 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; 668 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
668 } else { 669 } else {
669 args->op.cmd = MMUEXT_INVLPG_MULTI; 670 args->op.cmd = MMUEXT_INVLPG_MULTI;
670 args->op.arg1.linear_addr = va; 671 args->op.arg1.linear_addr = va;
671 } 672 }
672 673
673 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); 674 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
674 675
675 xen_mc_issue(PARAVIRT_LAZY_MMU); 676 xen_mc_issue(PARAVIRT_LAZY_MMU);
676 } 677 }
677 678
678 static void xen_clts(void) 679 static void xen_clts(void)
679 { 680 {
680 struct multicall_space mcs; 681 struct multicall_space mcs;
681 682
682 mcs = xen_mc_entry(0); 683 mcs = xen_mc_entry(0);
683 684
684 MULTI_fpu_taskswitch(mcs.mc, 0); 685 MULTI_fpu_taskswitch(mcs.mc, 0);
685 686
686 xen_mc_issue(PARAVIRT_LAZY_CPU); 687 xen_mc_issue(PARAVIRT_LAZY_CPU);
687 } 688 }
688 689
689 static void xen_write_cr0(unsigned long cr0) 690 static void xen_write_cr0(unsigned long cr0)
690 { 691 {
691 struct multicall_space mcs; 692 struct multicall_space mcs;
692 693
693 /* Only pay attention to cr0.TS; everything else is 694 /* Only pay attention to cr0.TS; everything else is
694 ignored. */ 695 ignored. */
695 mcs = xen_mc_entry(0); 696 mcs = xen_mc_entry(0);
696 697
697 MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); 698 MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
698 699
699 xen_mc_issue(PARAVIRT_LAZY_CPU); 700 xen_mc_issue(PARAVIRT_LAZY_CPU);
700 } 701 }
701 702
702 static void xen_write_cr2(unsigned long cr2) 703 static void xen_write_cr2(unsigned long cr2)
703 { 704 {
704 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 705 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
705 } 706 }
706 707
707 static unsigned long xen_read_cr2(void) 708 static unsigned long xen_read_cr2(void)
708 { 709 {
709 return x86_read_percpu(xen_vcpu)->arch.cr2; 710 return x86_read_percpu(xen_vcpu)->arch.cr2;
710 } 711 }
711 712
712 static unsigned long xen_read_cr2_direct(void) 713 static unsigned long xen_read_cr2_direct(void)
713 { 714 {
714 return x86_read_percpu(xen_vcpu_info.arch.cr2); 715 return x86_read_percpu(xen_vcpu_info.arch.cr2);
715 } 716 }
716 717
717 static void xen_write_cr4(unsigned long cr4) 718 static void xen_write_cr4(unsigned long cr4)
718 { 719 {
719 cr4 &= ~X86_CR4_PGE; 720 cr4 &= ~X86_CR4_PGE;
720 cr4 &= ~X86_CR4_PSE; 721 cr4 &= ~X86_CR4_PSE;
721 722
722 native_write_cr4(cr4); 723 native_write_cr4(cr4);
723 } 724 }
724 725
725 static unsigned long xen_read_cr3(void) 726 static unsigned long xen_read_cr3(void)
726 { 727 {
727 return x86_read_percpu(xen_cr3); 728 return x86_read_percpu(xen_cr3);
728 } 729 }
729 730
730 static void set_current_cr3(void *v) 731 static void set_current_cr3(void *v)
731 { 732 {
732 x86_write_percpu(xen_current_cr3, (unsigned long)v); 733 x86_write_percpu(xen_current_cr3, (unsigned long)v);
733 } 734 }
734 735
735 static void __xen_write_cr3(bool kernel, unsigned long cr3) 736 static void __xen_write_cr3(bool kernel, unsigned long cr3)
736 { 737 {
737 struct mmuext_op *op; 738 struct mmuext_op *op;
738 struct multicall_space mcs; 739 struct multicall_space mcs;
739 unsigned long mfn; 740 unsigned long mfn;
740 741
741 if (cr3) 742 if (cr3)
742 mfn = pfn_to_mfn(PFN_DOWN(cr3)); 743 mfn = pfn_to_mfn(PFN_DOWN(cr3));
743 else 744 else
744 mfn = 0; 745 mfn = 0;
745 746
746 WARN_ON(mfn == 0 && kernel); 747 WARN_ON(mfn == 0 && kernel);
747 748
748 mcs = __xen_mc_entry(sizeof(*op)); 749 mcs = __xen_mc_entry(sizeof(*op));
749 750
750 op = mcs.args; 751 op = mcs.args;
751 op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; 752 op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
752 op->arg1.mfn = mfn; 753 op->arg1.mfn = mfn;
753 754
754 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 755 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
755 756
756 if (kernel) { 757 if (kernel) {
757 x86_write_percpu(xen_cr3, cr3); 758 x86_write_percpu(xen_cr3, cr3);
758 759
759 /* Update xen_current_cr3 once the batch has actually 760 /* Update xen_current_cr3 once the batch has actually
760 been submitted. */ 761 been submitted. */
761 xen_mc_callback(set_current_cr3, (void *)cr3); 762 xen_mc_callback(set_current_cr3, (void *)cr3);
762 } 763 }
763 } 764 }
764 765
765 static void xen_write_cr3(unsigned long cr3) 766 static void xen_write_cr3(unsigned long cr3)
766 { 767 {
767 BUG_ON(preemptible()); 768 BUG_ON(preemptible());
768 769
769 xen_mc_batch(); /* disables interrupts */ 770 xen_mc_batch(); /* disables interrupts */
770 771
771 /* Update while interrupts are disabled, so its atomic with 772 /* Update while interrupts are disabled, so its atomic with
772 respect to ipis */ 773 respect to ipis */
773 x86_write_percpu(xen_cr3, cr3); 774 x86_write_percpu(xen_cr3, cr3);
774 775
775 __xen_write_cr3(true, cr3); 776 __xen_write_cr3(true, cr3);
776 777
777 #ifdef CONFIG_X86_64 778 #ifdef CONFIG_X86_64
778 { 779 {
779 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); 780 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
780 if (user_pgd) 781 if (user_pgd)
781 __xen_write_cr3(false, __pa(user_pgd)); 782 __xen_write_cr3(false, __pa(user_pgd));
782 else 783 else
783 __xen_write_cr3(false, 0); 784 __xen_write_cr3(false, 0);
784 } 785 }
785 #endif 786 #endif
786 787
787 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ 788 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
788 } 789 }
789 790
790 static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) 791 static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
791 { 792 {
792 int ret; 793 int ret;
793 794
794 ret = 0; 795 ret = 0;
795 796
796 switch (msr) { 797 switch (msr) {
797 #ifdef CONFIG_X86_64 798 #ifdef CONFIG_X86_64
798 unsigned which; 799 unsigned which;
799 u64 base; 800 u64 base;
800 801
801 case MSR_FS_BASE: which = SEGBASE_FS; goto set; 802 case MSR_FS_BASE: which = SEGBASE_FS; goto set;
802 case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; 803 case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
803 case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; 804 case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
804 805
805 set: 806 set:
806 base = ((u64)high << 32) | low; 807 base = ((u64)high << 32) | low;
807 if (HYPERVISOR_set_segment_base(which, base) != 0) 808 if (HYPERVISOR_set_segment_base(which, base) != 0)
808 ret = -EFAULT; 809 ret = -EFAULT;
809 break; 810 break;
810 #endif 811 #endif
811 812
812 case MSR_STAR: 813 case MSR_STAR:
813 case MSR_CSTAR: 814 case MSR_CSTAR:
814 case MSR_LSTAR: 815 case MSR_LSTAR:
815 case MSR_SYSCALL_MASK: 816 case MSR_SYSCALL_MASK:
816 case MSR_IA32_SYSENTER_CS: 817 case MSR_IA32_SYSENTER_CS:
817 case MSR_IA32_SYSENTER_ESP: 818 case MSR_IA32_SYSENTER_ESP:
818 case MSR_IA32_SYSENTER_EIP: 819 case MSR_IA32_SYSENTER_EIP:
819 /* Fast syscall setup is all done in hypercalls, so 820 /* Fast syscall setup is all done in hypercalls, so
820 these are all ignored. Stub them out here to stop 821 these are all ignored. Stub them out here to stop
821 Xen console noise. */ 822 Xen console noise. */
822 break; 823 break;
823 824
824 default: 825 default:
825 ret = native_write_msr_safe(msr, low, high); 826 ret = native_write_msr_safe(msr, low, high);
826 } 827 }
827 828
828 return ret; 829 return ret;
829 } 830 }
830 831
831 /* Early in boot, while setting up the initial pagetable, assume 832 /* Early in boot, while setting up the initial pagetable, assume
832 everything is pinned. */ 833 everything is pinned. */
833 static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) 834 static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
834 { 835 {
835 #ifdef CONFIG_FLATMEM 836 #ifdef CONFIG_FLATMEM
836 BUG_ON(mem_map); /* should only be used early */ 837 BUG_ON(mem_map); /* should only be used early */
837 #endif 838 #endif
838 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 839 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
839 } 840 }
840 841
841 /* Early release_pte assumes that all pts are pinned, since there's 842 /* Early release_pte assumes that all pts are pinned, since there's
842 only init_mm and anything attached to that is pinned. */ 843 only init_mm and anything attached to that is pinned. */
843 static void xen_release_pte_init(unsigned long pfn) 844 static void xen_release_pte_init(unsigned long pfn)
844 { 845 {
845 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 846 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
846 } 847 }
847 848
848 static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 849 static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
849 { 850 {
850 struct mmuext_op op; 851 struct mmuext_op op;
851 op.cmd = cmd; 852 op.cmd = cmd;
852 op.arg1.mfn = pfn_to_mfn(pfn); 853 op.arg1.mfn = pfn_to_mfn(pfn);
853 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) 854 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
854 BUG(); 855 BUG();
855 } 856 }
856 857
857 /* This needs to make sure the new pte page is pinned iff its being 858 /* This needs to make sure the new pte page is pinned iff its being
858 attached to a pinned pagetable. */ 859 attached to a pinned pagetable. */
859 static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) 860 static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
860 { 861 {
861 struct page *page = pfn_to_page(pfn); 862 struct page *page = pfn_to_page(pfn);
862 863
863 if (PagePinned(virt_to_page(mm->pgd))) { 864 if (PagePinned(virt_to_page(mm->pgd))) {
864 SetPagePinned(page); 865 SetPagePinned(page);
865 866
866 vm_unmap_aliases(); 867 vm_unmap_aliases();
867 if (!PageHighMem(page)) { 868 if (!PageHighMem(page)) {
868 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); 869 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
869 if (level == PT_PTE && USE_SPLIT_PTLOCKS) 870 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
870 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); 871 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
871 } else { 872 } else {
872 /* make sure there are no stray mappings of 873 /* make sure there are no stray mappings of
873 this page */ 874 this page */
874 kmap_flush_unused(); 875 kmap_flush_unused();
875 } 876 }
876 } 877 }
877 } 878 }
878 879
879 static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) 880 static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
880 { 881 {
881 xen_alloc_ptpage(mm, pfn, PT_PTE); 882 xen_alloc_ptpage(mm, pfn, PT_PTE);
882 } 883 }
883 884
884 static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) 885 static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
885 { 886 {
886 xen_alloc_ptpage(mm, pfn, PT_PMD); 887 xen_alloc_ptpage(mm, pfn, PT_PMD);
887 } 888 }
888 889
889 static int xen_pgd_alloc(struct mm_struct *mm) 890 static int xen_pgd_alloc(struct mm_struct *mm)
890 { 891 {
891 pgd_t *pgd = mm->pgd; 892 pgd_t *pgd = mm->pgd;
892 int ret = 0; 893 int ret = 0;
893 894
894 BUG_ON(PagePinned(virt_to_page(pgd))); 895 BUG_ON(PagePinned(virt_to_page(pgd)));
895 896
896 #ifdef CONFIG_X86_64 897 #ifdef CONFIG_X86_64
897 { 898 {
898 struct page *page = virt_to_page(pgd); 899 struct page *page = virt_to_page(pgd);
899 pgd_t *user_pgd; 900 pgd_t *user_pgd;
900 901
901 BUG_ON(page->private != 0); 902 BUG_ON(page->private != 0);
902 903
903 ret = -ENOMEM; 904 ret = -ENOMEM;
904 905
905 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 906 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
906 page->private = (unsigned long)user_pgd; 907 page->private = (unsigned long)user_pgd;
907 908
908 if (user_pgd != NULL) { 909 if (user_pgd != NULL) {
909 user_pgd[pgd_index(VSYSCALL_START)] = 910 user_pgd[pgd_index(VSYSCALL_START)] =
910 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); 911 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
911 ret = 0; 912 ret = 0;
912 } 913 }
913 914
914 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); 915 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
915 } 916 }
916 #endif 917 #endif
917 918
918 return ret; 919 return ret;
919 } 920 }
920 921
921 static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) 922 static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
922 { 923 {
923 #ifdef CONFIG_X86_64 924 #ifdef CONFIG_X86_64
924 pgd_t *user_pgd = xen_get_user_pgd(pgd); 925 pgd_t *user_pgd = xen_get_user_pgd(pgd);
925 926
926 if (user_pgd) 927 if (user_pgd)
927 free_page((unsigned long)user_pgd); 928 free_page((unsigned long)user_pgd);
928 #endif 929 #endif
929 } 930 }
930 931
931 /* This should never happen until we're OK to use struct page */ 932 /* This should never happen until we're OK to use struct page */
932 static void xen_release_ptpage(unsigned long pfn, unsigned level) 933 static void xen_release_ptpage(unsigned long pfn, unsigned level)
933 { 934 {
934 struct page *page = pfn_to_page(pfn); 935 struct page *page = pfn_to_page(pfn);
935 936
936 if (PagePinned(page)) { 937 if (PagePinned(page)) {
937 if (!PageHighMem(page)) { 938 if (!PageHighMem(page)) {
938 if (level == PT_PTE && USE_SPLIT_PTLOCKS) 939 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
939 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); 940 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
940 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 941 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
941 } 942 }
942 ClearPagePinned(page); 943 ClearPagePinned(page);
943 } 944 }
944 } 945 }
945 946
946 static void xen_release_pte(unsigned long pfn) 947 static void xen_release_pte(unsigned long pfn)
947 { 948 {
948 xen_release_ptpage(pfn, PT_PTE); 949 xen_release_ptpage(pfn, PT_PTE);
949 } 950 }
950 951
951 static void xen_release_pmd(unsigned long pfn) 952 static void xen_release_pmd(unsigned long pfn)
952 { 953 {
953 xen_release_ptpage(pfn, PT_PMD); 954 xen_release_ptpage(pfn, PT_PMD);
954 } 955 }
955 956
956 #if PAGETABLE_LEVELS == 4 957 #if PAGETABLE_LEVELS == 4
957 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) 958 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
958 { 959 {
959 xen_alloc_ptpage(mm, pfn, PT_PUD); 960 xen_alloc_ptpage(mm, pfn, PT_PUD);
960 } 961 }
961 962
962 static void xen_release_pud(unsigned long pfn) 963 static void xen_release_pud(unsigned long pfn)
963 { 964 {
964 xen_release_ptpage(pfn, PT_PUD); 965 xen_release_ptpage(pfn, PT_PUD);
965 } 966 }
966 #endif 967 #endif
967 968
968 #ifdef CONFIG_HIGHPTE 969 #ifdef CONFIG_HIGHPTE
969 static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) 970 static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
970 { 971 {
971 pgprot_t prot = PAGE_KERNEL; 972 pgprot_t prot = PAGE_KERNEL;
972 973
973 if (PagePinned(page)) 974 if (PagePinned(page))
974 prot = PAGE_KERNEL_RO; 975 prot = PAGE_KERNEL_RO;
975 976
976 if (0 && PageHighMem(page)) 977 if (0 && PageHighMem(page))
977 printk("mapping highpte %lx type %d prot %s\n", 978 printk("mapping highpte %lx type %d prot %s\n",
978 page_to_pfn(page), type, 979 page_to_pfn(page), type,
979 (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); 980 (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
980 981
981 return kmap_atomic_prot(page, type, prot); 982 return kmap_atomic_prot(page, type, prot);
982 } 983 }
983 #endif 984 #endif
984 985
985 #ifdef CONFIG_X86_32 986 #ifdef CONFIG_X86_32
986 static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) 987 static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
987 { 988 {
988 /* If there's an existing pte, then don't allow _PAGE_RW to be set */ 989 /* If there's an existing pte, then don't allow _PAGE_RW to be set */
989 if (pte_val_ma(*ptep) & _PAGE_PRESENT) 990 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
990 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & 991 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
991 pte_val_ma(pte)); 992 pte_val_ma(pte));
992 993
993 return pte; 994 return pte;
994 } 995 }
995 996
996 /* Init-time set_pte while constructing initial pagetables, which 997 /* Init-time set_pte while constructing initial pagetables, which
997 doesn't allow RO pagetable pages to be remapped RW */ 998 doesn't allow RO pagetable pages to be remapped RW */
998 static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) 999 static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
999 { 1000 {
1000 pte = mask_rw_pte(ptep, pte); 1001 pte = mask_rw_pte(ptep, pte);
1001 1002
1002 xen_set_pte(ptep, pte); 1003 xen_set_pte(ptep, pte);
1003 } 1004 }
1004 #endif 1005 #endif
1005 1006
1006 static __init void xen_pagetable_setup_start(pgd_t *base) 1007 static __init void xen_pagetable_setup_start(pgd_t *base)
1007 { 1008 {
1008 } 1009 }
1009 1010
1010 void xen_setup_shared_info(void) 1011 void xen_setup_shared_info(void)
1011 { 1012 {
1012 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 1013 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1013 set_fixmap(FIX_PARAVIRT_BOOTMAP, 1014 set_fixmap(FIX_PARAVIRT_BOOTMAP,
1014 xen_start_info->shared_info); 1015 xen_start_info->shared_info);
1015 1016
1016 HYPERVISOR_shared_info = 1017 HYPERVISOR_shared_info =
1017 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); 1018 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
1018 } else 1019 } else
1019 HYPERVISOR_shared_info = 1020 HYPERVISOR_shared_info =
1020 (struct shared_info *)__va(xen_start_info->shared_info); 1021 (struct shared_info *)__va(xen_start_info->shared_info);
1021 1022
1022 #ifndef CONFIG_SMP 1023 #ifndef CONFIG_SMP
1023 /* In UP this is as good a place as any to set up shared info */ 1024 /* In UP this is as good a place as any to set up shared info */
1024 xen_setup_vcpu_info_placement(); 1025 xen_setup_vcpu_info_placement();
1025 #endif 1026 #endif
1026 1027
1027 xen_setup_mfn_list_list(); 1028 xen_setup_mfn_list_list();
1028 } 1029 }
1029 1030
1030 static __init void xen_pagetable_setup_done(pgd_t *base) 1031 static __init void xen_pagetable_setup_done(pgd_t *base)
1031 { 1032 {
1032 xen_setup_shared_info(); 1033 xen_setup_shared_info();
1033 } 1034 }
1034 1035
1035 static __init void xen_post_allocator_init(void) 1036 static __init void xen_post_allocator_init(void)
1036 { 1037 {
1037 pv_mmu_ops.set_pte = xen_set_pte; 1038 pv_mmu_ops.set_pte = xen_set_pte;
1038 pv_mmu_ops.set_pmd = xen_set_pmd; 1039 pv_mmu_ops.set_pmd = xen_set_pmd;
1039 pv_mmu_ops.set_pud = xen_set_pud; 1040 pv_mmu_ops.set_pud = xen_set_pud;
1040 #if PAGETABLE_LEVELS == 4 1041 #if PAGETABLE_LEVELS == 4
1041 pv_mmu_ops.set_pgd = xen_set_pgd; 1042 pv_mmu_ops.set_pgd = xen_set_pgd;
1042 #endif 1043 #endif
1043 1044
1044 /* This will work as long as patching hasn't happened yet 1045 /* This will work as long as patching hasn't happened yet
1045 (which it hasn't) */ 1046 (which it hasn't) */
1046 pv_mmu_ops.alloc_pte = xen_alloc_pte; 1047 pv_mmu_ops.alloc_pte = xen_alloc_pte;
1047 pv_mmu_ops.alloc_pmd = xen_alloc_pmd; 1048 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
1048 pv_mmu_ops.release_pte = xen_release_pte; 1049 pv_mmu_ops.release_pte = xen_release_pte;
1049 pv_mmu_ops.release_pmd = xen_release_pmd; 1050 pv_mmu_ops.release_pmd = xen_release_pmd;
1050 #if PAGETABLE_LEVELS == 4 1051 #if PAGETABLE_LEVELS == 4
1051 pv_mmu_ops.alloc_pud = xen_alloc_pud; 1052 pv_mmu_ops.alloc_pud = xen_alloc_pud;
1052 pv_mmu_ops.release_pud = xen_release_pud; 1053 pv_mmu_ops.release_pud = xen_release_pud;
1053 #endif 1054 #endif
1054 1055
1055 #ifdef CONFIG_X86_64 1056 #ifdef CONFIG_X86_64
1056 SetPagePinned(virt_to_page(level3_user_vsyscall)); 1057 SetPagePinned(virt_to_page(level3_user_vsyscall));
1057 #endif 1058 #endif
1058 xen_mark_init_mm_pinned(); 1059 xen_mark_init_mm_pinned();
1059 } 1060 }
1060 1061
1061 /* This is called once we have the cpu_possible_map */ 1062 /* This is called once we have the cpu_possible_map */
1062 void xen_setup_vcpu_info_placement(void) 1063 void xen_setup_vcpu_info_placement(void)
1063 { 1064 {
1064 int cpu; 1065 int cpu;
1065 1066
1066 for_each_possible_cpu(cpu) 1067 for_each_possible_cpu(cpu)
1067 xen_vcpu_setup(cpu); 1068 xen_vcpu_setup(cpu);
1068 1069
1069 /* xen_vcpu_setup managed to place the vcpu_info within the 1070 /* xen_vcpu_setup managed to place the vcpu_info within the
1070 percpu area for all cpus, so make use of it */ 1071 percpu area for all cpus, so make use of it */
1071 if (have_vcpu_info_placement) { 1072 if (have_vcpu_info_placement) {
1072 printk(KERN_INFO "Xen: using vcpu_info placement\n"); 1073 printk(KERN_INFO "Xen: using vcpu_info placement\n");
1073 1074
1074 pv_irq_ops.save_fl = xen_save_fl_direct; 1075 pv_irq_ops.save_fl = xen_save_fl_direct;
1075 pv_irq_ops.restore_fl = xen_restore_fl_direct; 1076 pv_irq_ops.restore_fl = xen_restore_fl_direct;
1076 pv_irq_ops.irq_disable = xen_irq_disable_direct; 1077 pv_irq_ops.irq_disable = xen_irq_disable_direct;
1077 pv_irq_ops.irq_enable = xen_irq_enable_direct; 1078 pv_irq_ops.irq_enable = xen_irq_enable_direct;
1078 pv_mmu_ops.read_cr2 = xen_read_cr2_direct; 1079 pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
1079 } 1080 }
1080 } 1081 }
1081 1082
1082 static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, 1083 static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
1083 unsigned long addr, unsigned len) 1084 unsigned long addr, unsigned len)
1084 { 1085 {
1085 char *start, *end, *reloc; 1086 char *start, *end, *reloc;
1086 unsigned ret; 1087 unsigned ret;
1087 1088
1088 start = end = reloc = NULL; 1089 start = end = reloc = NULL;
1089 1090
1090 #define SITE(op, x) \ 1091 #define SITE(op, x) \
1091 case PARAVIRT_PATCH(op.x): \ 1092 case PARAVIRT_PATCH(op.x): \
1092 if (have_vcpu_info_placement) { \ 1093 if (have_vcpu_info_placement) { \
1093 start = (char *)xen_##x##_direct; \ 1094 start = (char *)xen_##x##_direct; \
1094 end = xen_##x##_direct_end; \ 1095 end = xen_##x##_direct_end; \
1095 reloc = xen_##x##_direct_reloc; \ 1096 reloc = xen_##x##_direct_reloc; \
1096 } \ 1097 } \
1097 goto patch_site 1098 goto patch_site
1098 1099
1099 switch (type) { 1100 switch (type) {
1100 SITE(pv_irq_ops, irq_enable); 1101 SITE(pv_irq_ops, irq_enable);
1101 SITE(pv_irq_ops, irq_disable); 1102 SITE(pv_irq_ops, irq_disable);
1102 SITE(pv_irq_ops, save_fl); 1103 SITE(pv_irq_ops, save_fl);
1103 SITE(pv_irq_ops, restore_fl); 1104 SITE(pv_irq_ops, restore_fl);
1104 #undef SITE 1105 #undef SITE
1105 1106
1106 patch_site: 1107 patch_site:
1107 if (start == NULL || (end-start) > len) 1108 if (start == NULL || (end-start) > len)
1108 goto default_patch; 1109 goto default_patch;
1109 1110
1110 ret = paravirt_patch_insns(insnbuf, len, start, end); 1111 ret = paravirt_patch_insns(insnbuf, len, start, end);
1111 1112
1112 /* Note: because reloc is assigned from something that 1113 /* Note: because reloc is assigned from something that
1113 appears to be an array, gcc assumes it's non-null, 1114 appears to be an array, gcc assumes it's non-null,
1114 but doesn't know its relationship with start and 1115 but doesn't know its relationship with start and
1115 end. */ 1116 end. */
1116 if (reloc > start && reloc < end) { 1117 if (reloc > start && reloc < end) {
1117 int reloc_off = reloc - start; 1118 int reloc_off = reloc - start;
1118 long *relocp = (long *)(insnbuf + reloc_off); 1119 long *relocp = (long *)(insnbuf + reloc_off);
1119 long delta = start - (char *)addr; 1120 long delta = start - (char *)addr;
1120 1121
1121 *relocp += delta; 1122 *relocp += delta;
1122 } 1123 }
1123 break; 1124 break;
1124 1125
1125 default_patch: 1126 default_patch:
1126 default: 1127 default:
1127 ret = paravirt_patch_default(type, clobbers, insnbuf, 1128 ret = paravirt_patch_default(type, clobbers, insnbuf,
1128 addr, len); 1129 addr, len);
1129 break; 1130 break;
1130 } 1131 }
1131 1132
1132 return ret; 1133 return ret;
1133 } 1134 }
1134 1135
1135 static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) 1136 static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
1136 { 1137 {
1137 pte_t pte; 1138 pte_t pte;
1138 1139
1139 phys >>= PAGE_SHIFT; 1140 phys >>= PAGE_SHIFT;
1140 1141
1141 switch (idx) { 1142 switch (idx) {
1142 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: 1143 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
1143 #ifdef CONFIG_X86_F00F_BUG 1144 #ifdef CONFIG_X86_F00F_BUG
1144 case FIX_F00F_IDT: 1145 case FIX_F00F_IDT:
1145 #endif 1146 #endif
1146 #ifdef CONFIG_X86_32 1147 #ifdef CONFIG_X86_32
1147 case FIX_WP_TEST: 1148 case FIX_WP_TEST:
1148 case FIX_VDSO: 1149 case FIX_VDSO:
1149 # ifdef CONFIG_HIGHMEM 1150 # ifdef CONFIG_HIGHMEM
1150 case FIX_KMAP_BEGIN ... FIX_KMAP_END: 1151 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
1151 # endif 1152 # endif
1152 #else 1153 #else
1153 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: 1154 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
1154 #endif 1155 #endif
1155 #ifdef CONFIG_X86_LOCAL_APIC 1156 #ifdef CONFIG_X86_LOCAL_APIC
1156 case FIX_APIC_BASE: /* maps dummy local APIC */ 1157 case FIX_APIC_BASE: /* maps dummy local APIC */
1157 #endif 1158 #endif
1158 pte = pfn_pte(phys, prot); 1159 pte = pfn_pte(phys, prot);
1159 break; 1160 break;
1160 1161
1161 default: 1162 default:
1162 pte = mfn_pte(phys, prot); 1163 pte = mfn_pte(phys, prot);
1163 break; 1164 break;
1164 } 1165 }
1165 1166
1166 __native_set_fixmap(idx, pte); 1167 __native_set_fixmap(idx, pte);
1167 1168
1168 #ifdef CONFIG_X86_64 1169 #ifdef CONFIG_X86_64
1169 /* Replicate changes to map the vsyscall page into the user 1170 /* Replicate changes to map the vsyscall page into the user
1170 pagetable vsyscall mapping. */ 1171 pagetable vsyscall mapping. */
1171 if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { 1172 if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
1172 unsigned long vaddr = __fix_to_virt(idx); 1173 unsigned long vaddr = __fix_to_virt(idx);
1173 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); 1174 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
1174 } 1175 }
1175 #endif 1176 #endif
1176 } 1177 }
1177 1178
1178 static const struct pv_info xen_info __initdata = { 1179 static const struct pv_info xen_info __initdata = {
1179 .paravirt_enabled = 1, 1180 .paravirt_enabled = 1,
1180 .shared_kernel_pmd = 0, 1181 .shared_kernel_pmd = 0,
1181 1182
1182 .name = "Xen", 1183 .name = "Xen",
1183 }; 1184 };
1184 1185
1185 static const struct pv_init_ops xen_init_ops __initdata = { 1186 static const struct pv_init_ops xen_init_ops __initdata = {
1186 .patch = xen_patch, 1187 .patch = xen_patch,
1187 1188
1188 .banner = xen_banner, 1189 .banner = xen_banner,
1189 .memory_setup = xen_memory_setup, 1190 .memory_setup = xen_memory_setup,
1190 .arch_setup = xen_arch_setup, 1191 .arch_setup = xen_arch_setup,
1191 .post_allocator_init = xen_post_allocator_init, 1192 .post_allocator_init = xen_post_allocator_init,
1192 }; 1193 };
1193 1194
1194 static const struct pv_time_ops xen_time_ops __initdata = { 1195 static const struct pv_time_ops xen_time_ops __initdata = {
1195 .time_init = xen_time_init, 1196 .time_init = xen_time_init,
1196 1197
1197 .set_wallclock = xen_set_wallclock, 1198 .set_wallclock = xen_set_wallclock,
1198 .get_wallclock = xen_get_wallclock, 1199 .get_wallclock = xen_get_wallclock,
1199 .get_tsc_khz = xen_tsc_khz, 1200 .get_tsc_khz = xen_tsc_khz,
1200 .sched_clock = xen_sched_clock, 1201 .sched_clock = xen_sched_clock,
1201 }; 1202 };
1202 1203
1203 static const struct pv_cpu_ops xen_cpu_ops __initdata = { 1204 static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1204 .cpuid = xen_cpuid, 1205 .cpuid = xen_cpuid,
1205 1206
1206 .set_debugreg = xen_set_debugreg, 1207 .set_debugreg = xen_set_debugreg,
1207 .get_debugreg = xen_get_debugreg, 1208 .get_debugreg = xen_get_debugreg,
1208 1209
1209 .clts = xen_clts, 1210 .clts = xen_clts,
1210 1211
1211 .read_cr0 = native_read_cr0, 1212 .read_cr0 = native_read_cr0,
1212 .write_cr0 = xen_write_cr0, 1213 .write_cr0 = xen_write_cr0,
1213 1214
1214 .read_cr4 = native_read_cr4, 1215 .read_cr4 = native_read_cr4,
1215 .read_cr4_safe = native_read_cr4_safe, 1216 .read_cr4_safe = native_read_cr4_safe,
1216 .write_cr4 = xen_write_cr4, 1217 .write_cr4 = xen_write_cr4,
1217 1218
1218 .wbinvd = native_wbinvd, 1219 .wbinvd = native_wbinvd,
1219 1220
1220 .read_msr = native_read_msr_safe, 1221 .read_msr = native_read_msr_safe,
1221 .write_msr = xen_write_msr_safe, 1222 .write_msr = xen_write_msr_safe,
1222 .read_tsc = native_read_tsc, 1223 .read_tsc = native_read_tsc,
1223 .read_pmc = native_read_pmc, 1224 .read_pmc = native_read_pmc,
1224 1225
1225 .iret = xen_iret, 1226 .iret = xen_iret,
1226 .irq_enable_sysexit = xen_sysexit, 1227 .irq_enable_sysexit = xen_sysexit,
1227 #ifdef CONFIG_X86_64 1228 #ifdef CONFIG_X86_64
1228 .usergs_sysret32 = xen_sysret32, 1229 .usergs_sysret32 = xen_sysret32,
1229 .usergs_sysret64 = xen_sysret64, 1230 .usergs_sysret64 = xen_sysret64,
1230 #endif 1231 #endif
1231 1232
1232 .load_tr_desc = paravirt_nop, 1233 .load_tr_desc = paravirt_nop,
1233 .set_ldt = xen_set_ldt, 1234 .set_ldt = xen_set_ldt,
1234 .load_gdt = xen_load_gdt, 1235 .load_gdt = xen_load_gdt,
1235 .load_idt = xen_load_idt, 1236 .load_idt = xen_load_idt,
1236 .load_tls = xen_load_tls, 1237 .load_tls = xen_load_tls,
1237 #ifdef CONFIG_X86_64 1238 #ifdef CONFIG_X86_64
1238 .load_gs_index = xen_load_gs_index, 1239 .load_gs_index = xen_load_gs_index,
1239 #endif 1240 #endif
1240 1241
1241 .alloc_ldt = xen_alloc_ldt, 1242 .alloc_ldt = xen_alloc_ldt,
1242 .free_ldt = xen_free_ldt, 1243 .free_ldt = xen_free_ldt,
1243 1244
1244 .store_gdt = native_store_gdt, 1245 .store_gdt = native_store_gdt,
1245 .store_idt = native_store_idt, 1246 .store_idt = native_store_idt,
1246 .store_tr = xen_store_tr, 1247 .store_tr = xen_store_tr,
1247 1248
1248 .write_ldt_entry = xen_write_ldt_entry, 1249 .write_ldt_entry = xen_write_ldt_entry,
1249 .write_gdt_entry = xen_write_gdt_entry, 1250 .write_gdt_entry = xen_write_gdt_entry,
1250 .write_idt_entry = xen_write_idt_entry, 1251 .write_idt_entry = xen_write_idt_entry,
1251 .load_sp0 = xen_load_sp0, 1252 .load_sp0 = xen_load_sp0,
1252 1253
1253 .set_iopl_mask = xen_set_iopl_mask, 1254 .set_iopl_mask = xen_set_iopl_mask,
1254 .io_delay = xen_io_delay, 1255 .io_delay = xen_io_delay,
1255 1256
1256 /* Xen takes care of %gs when switching to usermode for us */ 1257 /* Xen takes care of %gs when switching to usermode for us */
1257 .swapgs = paravirt_nop, 1258 .swapgs = paravirt_nop,
1258 1259
1259 .lazy_mode = { 1260 .lazy_mode = {
1260 .enter = paravirt_enter_lazy_cpu, 1261 .enter = paravirt_enter_lazy_cpu,
1261 .leave = xen_leave_lazy, 1262 .leave = xen_leave_lazy,
1262 }, 1263 },
1263 }; 1264 };
1264 1265
1265 static const struct pv_apic_ops xen_apic_ops __initdata = { 1266 static const struct pv_apic_ops xen_apic_ops __initdata = {
1266 #ifdef CONFIG_X86_LOCAL_APIC 1267 #ifdef CONFIG_X86_LOCAL_APIC
1267 .setup_boot_clock = paravirt_nop, 1268 .setup_boot_clock = paravirt_nop,
1268 .setup_secondary_clock = paravirt_nop, 1269 .setup_secondary_clock = paravirt_nop,
1269 .startup_ipi_hook = paravirt_nop, 1270 .startup_ipi_hook = paravirt_nop,
1270 #endif 1271 #endif
1271 }; 1272 };
1272 1273
1273 static const struct pv_mmu_ops xen_mmu_ops __initdata = { 1274 static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1274 .pagetable_setup_start = xen_pagetable_setup_start, 1275 .pagetable_setup_start = xen_pagetable_setup_start,
1275 .pagetable_setup_done = xen_pagetable_setup_done, 1276 .pagetable_setup_done = xen_pagetable_setup_done,
1276 1277
1277 .read_cr2 = xen_read_cr2, 1278 .read_cr2 = xen_read_cr2,
1278 .write_cr2 = xen_write_cr2, 1279 .write_cr2 = xen_write_cr2,
1279 1280
1280 .read_cr3 = xen_read_cr3, 1281 .read_cr3 = xen_read_cr3,
1281 .write_cr3 = xen_write_cr3, 1282 .write_cr3 = xen_write_cr3,
1282 1283
1283 .flush_tlb_user = xen_flush_tlb, 1284 .flush_tlb_user = xen_flush_tlb,
1284 .flush_tlb_kernel = xen_flush_tlb, 1285 .flush_tlb_kernel = xen_flush_tlb,
1285 .flush_tlb_single = xen_flush_tlb_single, 1286 .flush_tlb_single = xen_flush_tlb_single,
1286 .flush_tlb_others = xen_flush_tlb_others, 1287 .flush_tlb_others = xen_flush_tlb_others,
1287 1288
1288 .pte_update = paravirt_nop, 1289 .pte_update = paravirt_nop,
1289 .pte_update_defer = paravirt_nop, 1290 .pte_update_defer = paravirt_nop,
1290 1291
1291 .pgd_alloc = xen_pgd_alloc, 1292 .pgd_alloc = xen_pgd_alloc,
1292 .pgd_free = xen_pgd_free, 1293 .pgd_free = xen_pgd_free,
1293 1294
1294 .alloc_pte = xen_alloc_pte_init, 1295 .alloc_pte = xen_alloc_pte_init,
1295 .release_pte = xen_release_pte_init, 1296 .release_pte = xen_release_pte_init,
1296 .alloc_pmd = xen_alloc_pte_init, 1297 .alloc_pmd = xen_alloc_pte_init,
1297 .alloc_pmd_clone = paravirt_nop, 1298 .alloc_pmd_clone = paravirt_nop,
1298 .release_pmd = xen_release_pte_init, 1299 .release_pmd = xen_release_pte_init,
1299 1300
1300 #ifdef CONFIG_HIGHPTE 1301 #ifdef CONFIG_HIGHPTE
1301 .kmap_atomic_pte = xen_kmap_atomic_pte, 1302 .kmap_atomic_pte = xen_kmap_atomic_pte,
1302 #endif 1303 #endif
1303 1304
1304 #ifdef CONFIG_X86_64 1305 #ifdef CONFIG_X86_64
1305 .set_pte = xen_set_pte, 1306 .set_pte = xen_set_pte,
1306 #else 1307 #else
1307 .set_pte = xen_set_pte_init, 1308 .set_pte = xen_set_pte_init,
1308 #endif 1309 #endif
1309 .set_pte_at = xen_set_pte_at, 1310 .set_pte_at = xen_set_pte_at,
1310 .set_pmd = xen_set_pmd_hyper, 1311 .set_pmd = xen_set_pmd_hyper,
1311 1312
1312 .ptep_modify_prot_start = __ptep_modify_prot_start, 1313 .ptep_modify_prot_start = __ptep_modify_prot_start,
1313 .ptep_modify_prot_commit = __ptep_modify_prot_commit, 1314 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
1314 1315
1315 .pte_val = xen_pte_val, 1316 .pte_val = xen_pte_val,
1316 .pte_flags = native_pte_flags, 1317 .pte_flags = native_pte_flags,
1317 .pgd_val = xen_pgd_val, 1318 .pgd_val = xen_pgd_val,
1318 1319
1319 .make_pte = xen_make_pte, 1320 .make_pte = xen_make_pte,
1320 .make_pgd = xen_make_pgd, 1321 .make_pgd = xen_make_pgd,
1321 1322
1322 #ifdef CONFIG_X86_PAE 1323 #ifdef CONFIG_X86_PAE
1323 .set_pte_atomic = xen_set_pte_atomic, 1324 .set_pte_atomic = xen_set_pte_atomic,
1324 .set_pte_present = xen_set_pte_at, 1325 .set_pte_present = xen_set_pte_at,
1325 .pte_clear = xen_pte_clear, 1326 .pte_clear = xen_pte_clear,
1326 .pmd_clear = xen_pmd_clear, 1327 .pmd_clear = xen_pmd_clear,
1327 #endif /* CONFIG_X86_PAE */ 1328 #endif /* CONFIG_X86_PAE */
1328 .set_pud = xen_set_pud_hyper, 1329 .set_pud = xen_set_pud_hyper,
1329 1330
1330 .make_pmd = xen_make_pmd, 1331 .make_pmd = xen_make_pmd,
1331 .pmd_val = xen_pmd_val, 1332 .pmd_val = xen_pmd_val,
1332 1333
1333 #if PAGETABLE_LEVELS == 4 1334 #if PAGETABLE_LEVELS == 4
1334 .pud_val = xen_pud_val, 1335 .pud_val = xen_pud_val,
1335 .make_pud = xen_make_pud, 1336 .make_pud = xen_make_pud,
1336 .set_pgd = xen_set_pgd_hyper, 1337 .set_pgd = xen_set_pgd_hyper,
1337 1338
1338 .alloc_pud = xen_alloc_pte_init, 1339 .alloc_pud = xen_alloc_pte_init,
1339 .release_pud = xen_release_pte_init, 1340 .release_pud = xen_release_pte_init,
1340 #endif /* PAGETABLE_LEVELS == 4 */ 1341 #endif /* PAGETABLE_LEVELS == 4 */
1341 1342
1342 .activate_mm = xen_activate_mm, 1343 .activate_mm = xen_activate_mm,
1343 .dup_mmap = xen_dup_mmap, 1344 .dup_mmap = xen_dup_mmap,
1344 .exit_mmap = xen_exit_mmap, 1345 .exit_mmap = xen_exit_mmap,
1345 1346
1346 .lazy_mode = { 1347 .lazy_mode = {
1347 .enter = paravirt_enter_lazy_mmu, 1348 .enter = paravirt_enter_lazy_mmu,
1348 .leave = xen_leave_lazy, 1349 .leave = xen_leave_lazy,
1349 }, 1350 },
1350 1351
1351 .set_fixmap = xen_set_fixmap, 1352 .set_fixmap = xen_set_fixmap,
1352 }; 1353 };
1353 1354
1354 static void xen_reboot(int reason) 1355 static void xen_reboot(int reason)
1355 { 1356 {
1356 struct sched_shutdown r = { .reason = reason }; 1357 struct sched_shutdown r = { .reason = reason };
1357 1358
1358 #ifdef CONFIG_SMP 1359 #ifdef CONFIG_SMP
1359 smp_send_stop(); 1360 smp_send_stop();
1360 #endif 1361 #endif
1361 1362
1362 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) 1363 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
1363 BUG(); 1364 BUG();
1364 } 1365 }
1365 1366
1366 static void xen_restart(char *msg) 1367 static void xen_restart(char *msg)
1367 { 1368 {
1368 xen_reboot(SHUTDOWN_reboot); 1369 xen_reboot(SHUTDOWN_reboot);
1369 } 1370 }
1370 1371
1371 static void xen_emergency_restart(void) 1372 static void xen_emergency_restart(void)
1372 { 1373 {
1373 xen_reboot(SHUTDOWN_reboot); 1374 xen_reboot(SHUTDOWN_reboot);
1374 } 1375 }
1375 1376
1376 static void xen_machine_halt(void) 1377 static void xen_machine_halt(void)
1377 { 1378 {
1378 xen_reboot(SHUTDOWN_poweroff); 1379 xen_reboot(SHUTDOWN_poweroff);
1379 } 1380 }
1380 1381
1381 static void xen_crash_shutdown(struct pt_regs *regs) 1382 static void xen_crash_shutdown(struct pt_regs *regs)
1382 { 1383 {
1383 xen_reboot(SHUTDOWN_crash); 1384 xen_reboot(SHUTDOWN_crash);
1384 } 1385 }
1385 1386
1386 static const struct machine_ops __initdata xen_machine_ops = { 1387 static const struct machine_ops __initdata xen_machine_ops = {
1387 .restart = xen_restart, 1388 .restart = xen_restart,
1388 .halt = xen_machine_halt, 1389 .halt = xen_machine_halt,
1389 .power_off = xen_machine_halt, 1390 .power_off = xen_machine_halt,
1390 .shutdown = xen_machine_halt, 1391 .shutdown = xen_machine_halt,
1391 .crash_shutdown = xen_crash_shutdown, 1392 .crash_shutdown = xen_crash_shutdown,
1392 .emergency_restart = xen_emergency_restart, 1393 .emergency_restart = xen_emergency_restart,
1393 }; 1394 };
1394 1395
1395 1396
1396 static void __init xen_reserve_top(void) 1397 static void __init xen_reserve_top(void)
1397 { 1398 {
1398 #ifdef CONFIG_X86_32 1399 #ifdef CONFIG_X86_32
1399 unsigned long top = HYPERVISOR_VIRT_START; 1400 unsigned long top = HYPERVISOR_VIRT_START;
1400 struct xen_platform_parameters pp; 1401 struct xen_platform_parameters pp;
1401 1402
1402 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) 1403 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
1403 top = pp.virt_start; 1404 top = pp.virt_start;
1404 1405
1405 reserve_top_address(-top); 1406 reserve_top_address(-top);
1406 #endif /* CONFIG_X86_32 */ 1407 #endif /* CONFIG_X86_32 */
1407 } 1408 }
1408 1409
1409 /* 1410 /*
1410 * Like __va(), but returns address in the kernel mapping (which is 1411 * Like __va(), but returns address in the kernel mapping (which is
1411 * all we have until the physical memory mapping has been set up. 1412 * all we have until the physical memory mapping has been set up.
1412 */ 1413 */
1413 static void *__ka(phys_addr_t paddr) 1414 static void *__ka(phys_addr_t paddr)
1414 { 1415 {
1415 #ifdef CONFIG_X86_64 1416 #ifdef CONFIG_X86_64
1416 return (void *)(paddr + __START_KERNEL_map); 1417 return (void *)(paddr + __START_KERNEL_map);
1417 #else 1418 #else
1418 return __va(paddr); 1419 return __va(paddr);
1419 #endif 1420 #endif
1420 } 1421 }
1421 1422
1422 /* Convert a machine address to physical address */ 1423 /* Convert a machine address to physical address */
1423 static unsigned long m2p(phys_addr_t maddr) 1424 static unsigned long m2p(phys_addr_t maddr)
1424 { 1425 {
1425 phys_addr_t paddr; 1426 phys_addr_t paddr;
1426 1427
1427 maddr &= PTE_PFN_MASK; 1428 maddr &= PTE_PFN_MASK;
1428 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; 1429 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1429 1430
1430 return paddr; 1431 return paddr;
1431 } 1432 }
1432 1433
1433 /* Convert a machine address to kernel virtual */ 1434 /* Convert a machine address to kernel virtual */
1434 static void *m2v(phys_addr_t maddr) 1435 static void *m2v(phys_addr_t maddr)
1435 { 1436 {
1436 return __ka(m2p(maddr)); 1437 return __ka(m2p(maddr));
1437 } 1438 }
1438 1439
1439 static void set_page_prot(void *addr, pgprot_t prot) 1440 static void set_page_prot(void *addr, pgprot_t prot)
1440 { 1441 {
1441 unsigned long pfn = __pa(addr) >> PAGE_SHIFT; 1442 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1442 pte_t pte = pfn_pte(pfn, prot); 1443 pte_t pte = pfn_pte(pfn, prot);
1443 1444
1444 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) 1445 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
1445 BUG(); 1446 BUG();
1446 } 1447 }
1447 1448
1448 static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) 1449 static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1449 { 1450 {
1450 unsigned pmdidx, pteidx; 1451 unsigned pmdidx, pteidx;
1451 unsigned ident_pte; 1452 unsigned ident_pte;
1452 unsigned long pfn; 1453 unsigned long pfn;
1453 1454
1454 ident_pte = 0; 1455 ident_pte = 0;
1455 pfn = 0; 1456 pfn = 0;
1456 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { 1457 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1457 pte_t *pte_page; 1458 pte_t *pte_page;
1458 1459
1459 /* Reuse or allocate a page of ptes */ 1460 /* Reuse or allocate a page of ptes */
1460 if (pmd_present(pmd[pmdidx])) 1461 if (pmd_present(pmd[pmdidx]))
1461 pte_page = m2v(pmd[pmdidx].pmd); 1462 pte_page = m2v(pmd[pmdidx].pmd);
1462 else { 1463 else {
1463 /* Check for free pte pages */ 1464 /* Check for free pte pages */
1464 if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) 1465 if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
1465 break; 1466 break;
1466 1467
1467 pte_page = &level1_ident_pgt[ident_pte]; 1468 pte_page = &level1_ident_pgt[ident_pte];
1468 ident_pte += PTRS_PER_PTE; 1469 ident_pte += PTRS_PER_PTE;
1469 1470
1470 pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); 1471 pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
1471 } 1472 }
1472 1473
1473 /* Install mappings */ 1474 /* Install mappings */
1474 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { 1475 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1475 pte_t pte; 1476 pte_t pte;
1476 1477
1477 if (pfn > max_pfn_mapped) 1478 if (pfn > max_pfn_mapped)
1478 max_pfn_mapped = pfn; 1479 max_pfn_mapped = pfn;
1479 1480
1480 if (!pte_none(pte_page[pteidx])) 1481 if (!pte_none(pte_page[pteidx]))
1481 continue; 1482 continue;
1482 1483
1483 pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); 1484 pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
1484 pte_page[pteidx] = pte; 1485 pte_page[pteidx] = pte;
1485 } 1486 }
1486 } 1487 }
1487 1488
1488 for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) 1489 for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1489 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); 1490 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1490 1491
1491 set_page_prot(pmd, PAGE_KERNEL_RO); 1492 set_page_prot(pmd, PAGE_KERNEL_RO);
1492 } 1493 }
1493 1494
1494 #ifdef CONFIG_X86_64 1495 #ifdef CONFIG_X86_64
1495 static void convert_pfn_mfn(void *v) 1496 static void convert_pfn_mfn(void *v)
1496 { 1497 {
1497 pte_t *pte = v; 1498 pte_t *pte = v;
1498 int i; 1499 int i;
1499 1500
1500 /* All levels are converted the same way, so just treat them 1501 /* All levels are converted the same way, so just treat them
1501 as ptes. */ 1502 as ptes. */
1502 for (i = 0; i < PTRS_PER_PTE; i++) 1503 for (i = 0; i < PTRS_PER_PTE; i++)
1503 pte[i] = xen_make_pte(pte[i].pte); 1504 pte[i] = xen_make_pte(pte[i].pte);
1504 } 1505 }
1505 1506
1506 /* 1507 /*
1507 * Set up the inital kernel pagetable. 1508 * Set up the inital kernel pagetable.
1508 * 1509 *
1509 * We can construct this by grafting the Xen provided pagetable into 1510 * We can construct this by grafting the Xen provided pagetable into
1510 * head_64.S's preconstructed pagetables. We copy the Xen L2's into 1511 * head_64.S's preconstructed pagetables. We copy the Xen L2's into
1511 * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This 1512 * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
1512 * means that only the kernel has a physical mapping to start with - 1513 * means that only the kernel has a physical mapping to start with -
1513 * but that's enough to get __va working. We need to fill in the rest 1514 * but that's enough to get __va working. We need to fill in the rest
1514 * of the physical mapping once some sort of allocator has been set 1515 * of the physical mapping once some sort of allocator has been set
1515 * up. 1516 * up.
1516 */ 1517 */
1517 static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, 1518 static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1518 unsigned long max_pfn) 1519 unsigned long max_pfn)
1519 { 1520 {
1520 pud_t *l3; 1521 pud_t *l3;
1521 pmd_t *l2; 1522 pmd_t *l2;
1522 1523
1523 /* Zap identity mapping */ 1524 /* Zap identity mapping */
1524 init_level4_pgt[0] = __pgd(0); 1525 init_level4_pgt[0] = __pgd(0);
1525 1526
1526 /* Pre-constructed entries are in pfn, so convert to mfn */ 1527 /* Pre-constructed entries are in pfn, so convert to mfn */
1527 convert_pfn_mfn(init_level4_pgt); 1528 convert_pfn_mfn(init_level4_pgt);
1528 convert_pfn_mfn(level3_ident_pgt); 1529 convert_pfn_mfn(level3_ident_pgt);
1529 convert_pfn_mfn(level3_kernel_pgt); 1530 convert_pfn_mfn(level3_kernel_pgt);
1530 1531
1531 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); 1532 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1532 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); 1533 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1533 1534
1534 memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1535 memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1535 memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1536 memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1536 1537
1537 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); 1538 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
1538 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); 1539 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
1539 memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1540 memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
1540 1541
1541 /* Set up identity map */ 1542 /* Set up identity map */
1542 xen_map_identity_early(level2_ident_pgt, max_pfn); 1543 xen_map_identity_early(level2_ident_pgt, max_pfn);
1543 1544
1544 /* Make pagetable pieces RO */ 1545 /* Make pagetable pieces RO */
1545 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); 1546 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1546 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); 1547 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1547 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); 1548 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1548 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); 1549 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1549 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1550 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1550 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1551 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1551 1552
1552 /* Pin down new L4 */ 1553 /* Pin down new L4 */
1553 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, 1554 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1554 PFN_DOWN(__pa_symbol(init_level4_pgt))); 1555 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1555 1556
1556 /* Unpin Xen-provided one */ 1557 /* Unpin Xen-provided one */
1557 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 1558 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1558 1559
1559 /* Switch over */ 1560 /* Switch over */
1560 pgd = init_level4_pgt; 1561 pgd = init_level4_pgt;
1561 1562
1562 /* 1563 /*
1563 * At this stage there can be no user pgd, and no page 1564 * At this stage there can be no user pgd, and no page
1564 * structure to attach it to, so make sure we just set kernel 1565 * structure to attach it to, so make sure we just set kernel
1565 * pgd. 1566 * pgd.
1566 */ 1567 */
1567 xen_mc_batch(); 1568 xen_mc_batch();
1568 __xen_write_cr3(true, __pa(pgd)); 1569 __xen_write_cr3(true, __pa(pgd));
1569 xen_mc_issue(PARAVIRT_LAZY_CPU); 1570 xen_mc_issue(PARAVIRT_LAZY_CPU);
1570 1571
1571 reserve_early(__pa(xen_start_info->pt_base), 1572 reserve_early(__pa(xen_start_info->pt_base),
1572 __pa(xen_start_info->pt_base + 1573 __pa(xen_start_info->pt_base +
1573 xen_start_info->nr_pt_frames * PAGE_SIZE), 1574 xen_start_info->nr_pt_frames * PAGE_SIZE),
1574 "XEN PAGETABLES"); 1575 "XEN PAGETABLES");
1575 1576
1576 return pgd; 1577 return pgd;
1577 } 1578 }
1578 #else /* !CONFIG_X86_64 */ 1579 #else /* !CONFIG_X86_64 */
1579 static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; 1580 static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
1580 1581
1581 static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, 1582 static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1582 unsigned long max_pfn) 1583 unsigned long max_pfn)
1583 { 1584 {
1584 pmd_t *kernel_pmd; 1585 pmd_t *kernel_pmd;
1585 1586
1586 init_pg_tables_start = __pa(pgd); 1587 init_pg_tables_start = __pa(pgd);
1587 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1588 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
1588 max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); 1589 max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
1589 1590
1590 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 1591 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1591 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 1592 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
1592 1593
1593 xen_map_identity_early(level2_kernel_pgt, max_pfn); 1594 xen_map_identity_early(level2_kernel_pgt, max_pfn);
1594 1595
1595 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); 1596 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
1596 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], 1597 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
1597 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); 1598 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
1598 1599
1599 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1600 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1600 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); 1601 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
1601 set_page_prot(empty_zero_page, PAGE_KERNEL_RO); 1602 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
1602 1603
1603 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 1604 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1604 1605
1605 xen_write_cr3(__pa(swapper_pg_dir)); 1606 xen_write_cr3(__pa(swapper_pg_dir));
1606 1607
1607 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); 1608 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
1608 1609
1609 return swapper_pg_dir; 1610 return swapper_pg_dir;
1610 } 1611 }
1611 #endif /* CONFIG_X86_64 */ 1612 #endif /* CONFIG_X86_64 */
1612 1613
1613 /* First C function to be called on Xen boot */ 1614 /* First C function to be called on Xen boot */
1614 asmlinkage void __init xen_start_kernel(void) 1615 asmlinkage void __init xen_start_kernel(void)
1615 { 1616 {
1616 pgd_t *pgd; 1617 pgd_t *pgd;
1617 1618
1618 if (!xen_start_info) 1619 if (!xen_start_info)
1619 return; 1620 return;
1620 1621
1621 xen_domain_type = XEN_PV_DOMAIN; 1622 xen_domain_type = XEN_PV_DOMAIN;
1622 1623
1623 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); 1624 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
1624 1625
1625 xen_setup_features(); 1626 xen_setup_features();
1626 1627
1627 /* Install Xen paravirt ops */ 1628 /* Install Xen paravirt ops */
1628 pv_info = xen_info; 1629 pv_info = xen_info;
1629 pv_init_ops = xen_init_ops; 1630 pv_init_ops = xen_init_ops;
1630 pv_time_ops = xen_time_ops; 1631 pv_time_ops = xen_time_ops;
1631 pv_cpu_ops = xen_cpu_ops; 1632 pv_cpu_ops = xen_cpu_ops;
1632 pv_apic_ops = xen_apic_ops; 1633 pv_apic_ops = xen_apic_ops;
1633 pv_mmu_ops = xen_mmu_ops; 1634 pv_mmu_ops = xen_mmu_ops;
1634 1635
1635 xen_init_irq_ops(); 1636 xen_init_irq_ops();
1636 1637
1637 #ifdef CONFIG_X86_LOCAL_APIC 1638 #ifdef CONFIG_X86_LOCAL_APIC
1638 /* 1639 /*
1639 * set up the basic apic ops. 1640 * set up the basic apic ops.
1640 */ 1641 */
1641 apic_ops = &xen_basic_apic_ops; 1642 apic_ops = &xen_basic_apic_ops;
1642 #endif 1643 #endif
1643 1644
1644 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1645 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1645 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1646 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1646 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1647 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
1647 } 1648 }
1648 1649
1649 machine_ops = xen_machine_ops; 1650 machine_ops = xen_machine_ops;
1650 1651
1651 #ifdef CONFIG_X86_64 1652 #ifdef CONFIG_X86_64
1652 /* Disable until direct per-cpu data access. */ 1653 /* Disable until direct per-cpu data access. */
1653 have_vcpu_info_placement = 0; 1654 have_vcpu_info_placement = 0;
1654 x86_64_init_pda(); 1655 x86_64_init_pda();
1655 #endif 1656 #endif
1656 1657
1657 xen_smp_init(); 1658 xen_smp_init();
1658 1659
1659 /* Get mfn list */ 1660 /* Get mfn list */
1660 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1661 if (!xen_feature(XENFEAT_auto_translated_physmap))
1661 xen_build_dynamic_phys_to_machine(); 1662 xen_build_dynamic_phys_to_machine();
1662 1663
1663 pgd = (pgd_t *)xen_start_info->pt_base; 1664 pgd = (pgd_t *)xen_start_info->pt_base;
1664 1665
1665 /* Prevent unwanted bits from being set in PTEs. */ 1666 /* Prevent unwanted bits from being set in PTEs. */
1666 __supported_pte_mask &= ~_PAGE_GLOBAL; 1667 __supported_pte_mask &= ~_PAGE_GLOBAL;
1667 if (!xen_initial_domain()) 1668 if (!xen_initial_domain())
1668 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); 1669 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1669 1670
1670 /* Don't do the full vcpu_info placement stuff until we have a 1671 /* Don't do the full vcpu_info placement stuff until we have a
1671 possible map and a non-dummy shared_info. */ 1672 possible map and a non-dummy shared_info. */
1672 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1673 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
1673 1674
1674 xen_raw_console_write("mapping kernel into physical memory\n"); 1675 xen_raw_console_write("mapping kernel into physical memory\n");
1675 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1676 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1676 1677
1677 init_mm.pgd = pgd; 1678 init_mm.pgd = pgd;
1678 1679
1679 /* keep using Xen gdt for now; no urgent need to change it */ 1680 /* keep using Xen gdt for now; no urgent need to change it */
1680 1681
1681 pv_info.kernel_rpl = 1; 1682 pv_info.kernel_rpl = 1;
1682 if (xen_feature(XENFEAT_supervisor_mode_kernel)) 1683 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1683 pv_info.kernel_rpl = 0; 1684 pv_info.kernel_rpl = 0;
1684 1685
1685 /* set the limit of our address space */ 1686 /* set the limit of our address space */
1686 xen_reserve_top(); 1687 xen_reserve_top();
1687 1688
1688 #ifdef CONFIG_X86_32 1689 #ifdef CONFIG_X86_32
1689 /* set up basic CPUID stuff */ 1690 /* set up basic CPUID stuff */
1690 cpu_detect(&new_cpu_data); 1691 cpu_detect(&new_cpu_data);
1691 new_cpu_data.hard_math = 1; 1692 new_cpu_data.hard_math = 1;
1692 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1693 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1693 #endif 1694 #endif
1694 1695
1695 /* Poke various useful things into boot_params */ 1696 /* Poke various useful things into boot_params */
1696 boot_params.hdr.type_of_loader = (9 << 4) | 0; 1697 boot_params.hdr.type_of_loader = (9 << 4) | 0;
1697 boot_params.hdr.ramdisk_image = xen_start_info->mod_start 1698 boot_params.hdr.ramdisk_image = xen_start_info->mod_start
1698 ? __pa(xen_start_info->mod_start) : 0; 1699 ? __pa(xen_start_info->mod_start) : 0;
1699 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1700 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1700 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); 1701 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1701 1702
1702 if (!xen_initial_domain()) { 1703 if (!xen_initial_domain()) {
1703 add_preferred_console("xenboot", 0, NULL); 1704 add_preferred_console("xenboot", 0, NULL);
1704 add_preferred_console("tty", 0, NULL); 1705 add_preferred_console("tty", 0, NULL);
1705 add_preferred_console("hvc", 0, NULL); 1706 add_preferred_console("hvc", 0, NULL);
1706 } 1707 }
1707 1708
1708 xen_raw_console_write("about to get started...\n"); 1709 xen_raw_console_write("about to get started...\n");
1709 1710
1710 /* Start the world */ 1711 /* Start the world */
1711 #ifdef CONFIG_X86_32 1712 #ifdef CONFIG_X86_32
1712 i386_start_kernel(); 1713 i386_start_kernel();
1713 #else 1714 #else
1714 x86_64_start_reservations((char *)__pa_symbol(&boot_params)); 1715 x86_64_start_reservations((char *)__pa_symbol(&boot_params));
1715 #endif 1716 #endif
1716 } 1717 }
1717 1718
drivers/xen/balloon.c
1 /****************************************************************************** 1 /******************************************************************************
2 * balloon.c 2 * balloon.c
3 * 3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 * 5 *
6 * Copyright (c) 2003, B Dragovic 6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser 7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2 11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed 12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other 13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license: 14 * software packages, subject to the following license:
15 * 15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy 16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without 17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify, 18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to 20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions: 21 * the following conditions:
22 * 22 *
23 * The above copyright notice and this permission notice shall be included in 23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software. 24 * all copies or substantial portions of the Software.
25 * 25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE. 32 * IN THE SOFTWARE.
33 */ 33 */
34 34
35 #include <linux/kernel.h> 35 #include <linux/kernel.h>
36 #include <linux/module.h> 36 #include <linux/module.h>
37 #include <linux/sched.h> 37 #include <linux/sched.h>
38 #include <linux/errno.h> 38 #include <linux/errno.h>
39 #include <linux/mm.h> 39 #include <linux/mm.h>
40 #include <linux/bootmem.h> 40 #include <linux/bootmem.h>
41 #include <linux/pagemap.h> 41 #include <linux/pagemap.h>
42 #include <linux/highmem.h> 42 #include <linux/highmem.h>
43 #include <linux/mutex.h> 43 #include <linux/mutex.h>
44 #include <linux/list.h> 44 #include <linux/list.h>
45 #include <linux/sysdev.h> 45 #include <linux/sysdev.h>
46 46
47 #include <asm/xen/hypervisor.h>
48 #include <asm/page.h> 47 #include <asm/page.h>
49 #include <asm/pgalloc.h> 48 #include <asm/pgalloc.h>
50 #include <asm/pgtable.h> 49 #include <asm/pgtable.h>
51 #include <asm/uaccess.h> 50 #include <asm/uaccess.h>
52 #include <asm/tlb.h> 51 #include <asm/tlb.h>
53 52
53 #include <asm/xen/hypervisor.h>
54 #include <asm/xen/hypercall.h>
55 #include <xen/interface/xen.h>
54 #include <xen/interface/memory.h> 56 #include <xen/interface/memory.h>
55 #include <xen/xenbus.h> 57 #include <xen/xenbus.h>
56 #include <xen/features.h> 58 #include <xen/features.h>
57 #include <xen/page.h> 59 #include <xen/page.h>
58 60
59 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 61 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
60 62
61 #define BALLOON_CLASS_NAME "xen_memory" 63 #define BALLOON_CLASS_NAME "xen_memory"
62 64
63 struct balloon_stats { 65 struct balloon_stats {
64 /* We aim for 'current allocation' == 'target allocation'. */ 66 /* We aim for 'current allocation' == 'target allocation'. */
65 unsigned long current_pages; 67 unsigned long current_pages;
66 unsigned long target_pages; 68 unsigned long target_pages;
67 /* We may hit the hard limit in Xen. If we do then we remember it. */ 69 /* We may hit the hard limit in Xen. If we do then we remember it. */
68 unsigned long hard_limit; 70 unsigned long hard_limit;
69 /* 71 /*
70 * Drivers may alter the memory reservation independently, but they 72 * Drivers may alter the memory reservation independently, but they
71 * must inform the balloon driver so we avoid hitting the hard limit. 73 * must inform the balloon driver so we avoid hitting the hard limit.
72 */ 74 */
73 unsigned long driver_pages; 75 unsigned long driver_pages;
74 /* Number of pages in high- and low-memory balloons. */ 76 /* Number of pages in high- and low-memory balloons. */
75 unsigned long balloon_low; 77 unsigned long balloon_low;
76 unsigned long balloon_high; 78 unsigned long balloon_high;
77 }; 79 };
78 80
79 static DEFINE_MUTEX(balloon_mutex); 81 static DEFINE_MUTEX(balloon_mutex);
80 82
81 static struct sys_device balloon_sysdev; 83 static struct sys_device balloon_sysdev;
82 84
83 static int register_balloon(struct sys_device *sysdev); 85 static int register_balloon(struct sys_device *sysdev);
84 86
85 /* 87 /*
86 * Protects atomic reservation decrease/increase against concurrent increases. 88 * Protects atomic reservation decrease/increase against concurrent increases.
87 * Also protects non-atomic updates of current_pages and driver_pages, and 89 * Also protects non-atomic updates of current_pages and driver_pages, and
88 * balloon lists. 90 * balloon lists.
89 */ 91 */
90 static DEFINE_SPINLOCK(balloon_lock); 92 static DEFINE_SPINLOCK(balloon_lock);
91 93
92 static struct balloon_stats balloon_stats; 94 static struct balloon_stats balloon_stats;
93 95
94 /* We increase/decrease in batches which fit in a page */ 96 /* We increase/decrease in batches which fit in a page */
95 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 97 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
96 98
97 /* VM /proc information for memory */ 99 /* VM /proc information for memory */
98 extern unsigned long totalram_pages; 100 extern unsigned long totalram_pages;
99 101
100 #ifdef CONFIG_HIGHMEM 102 #ifdef CONFIG_HIGHMEM
101 extern unsigned long totalhigh_pages; 103 extern unsigned long totalhigh_pages;
102 #define inc_totalhigh_pages() (totalhigh_pages++) 104 #define inc_totalhigh_pages() (totalhigh_pages++)
103 #define dec_totalhigh_pages() (totalhigh_pages--) 105 #define dec_totalhigh_pages() (totalhigh_pages--)
104 #else 106 #else
105 #define inc_totalhigh_pages() do {} while(0) 107 #define inc_totalhigh_pages() do {} while(0)
106 #define dec_totalhigh_pages() do {} while(0) 108 #define dec_totalhigh_pages() do {} while(0)
107 #endif 109 #endif
108 110
109 /* List of ballooned pages, threaded through the mem_map array. */ 111 /* List of ballooned pages, threaded through the mem_map array. */
110 static LIST_HEAD(ballooned_pages); 112 static LIST_HEAD(ballooned_pages);
111 113
112 /* Main work function, always executed in process context. */ 114 /* Main work function, always executed in process context. */
113 static void balloon_process(struct work_struct *work); 115 static void balloon_process(struct work_struct *work);
114 static DECLARE_WORK(balloon_worker, balloon_process); 116 static DECLARE_WORK(balloon_worker, balloon_process);
115 static struct timer_list balloon_timer; 117 static struct timer_list balloon_timer;
116 118
117 /* When ballooning out (allocating memory to return to Xen) we don't really 119 /* When ballooning out (allocating memory to return to Xen) we don't really
118 want the kernel to try too hard since that can trigger the oom killer. */ 120 want the kernel to try too hard since that can trigger the oom killer. */
119 #define GFP_BALLOON \ 121 #define GFP_BALLOON \
120 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) 122 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
121 123
122 static void scrub_page(struct page *page) 124 static void scrub_page(struct page *page)
123 { 125 {
124 #ifdef CONFIG_XEN_SCRUB_PAGES 126 #ifdef CONFIG_XEN_SCRUB_PAGES
125 clear_highpage(page); 127 clear_highpage(page);
126 #endif 128 #endif
127 } 129 }
128 130
129 /* balloon_append: add the given page to the balloon. */ 131 /* balloon_append: add the given page to the balloon. */
130 static void balloon_append(struct page *page) 132 static void balloon_append(struct page *page)
131 { 133 {
132 /* Lowmem is re-populated first, so highmem pages go at list tail. */ 134 /* Lowmem is re-populated first, so highmem pages go at list tail. */
133 if (PageHighMem(page)) { 135 if (PageHighMem(page)) {
134 list_add_tail(&page->lru, &ballooned_pages); 136 list_add_tail(&page->lru, &ballooned_pages);
135 balloon_stats.balloon_high++; 137 balloon_stats.balloon_high++;
136 dec_totalhigh_pages(); 138 dec_totalhigh_pages();
137 } else { 139 } else {
138 list_add(&page->lru, &ballooned_pages); 140 list_add(&page->lru, &ballooned_pages);
139 balloon_stats.balloon_low++; 141 balloon_stats.balloon_low++;
140 } 142 }
141 } 143 }
142 144
143 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 145 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
144 static struct page *balloon_retrieve(void) 146 static struct page *balloon_retrieve(void)
145 { 147 {
146 struct page *page; 148 struct page *page;
147 149
148 if (list_empty(&ballooned_pages)) 150 if (list_empty(&ballooned_pages))
149 return NULL; 151 return NULL;
150 152
151 page = list_entry(ballooned_pages.next, struct page, lru); 153 page = list_entry(ballooned_pages.next, struct page, lru);
152 list_del(&page->lru); 154 list_del(&page->lru);
153 155
154 if (PageHighMem(page)) { 156 if (PageHighMem(page)) {
155 balloon_stats.balloon_high--; 157 balloon_stats.balloon_high--;
156 inc_totalhigh_pages(); 158 inc_totalhigh_pages();
157 } 159 }
158 else 160 else
159 balloon_stats.balloon_low--; 161 balloon_stats.balloon_low--;
160 162
161 return page; 163 return page;
162 } 164 }
163 165
164 static struct page *balloon_first_page(void) 166 static struct page *balloon_first_page(void)
165 { 167 {
166 if (list_empty(&ballooned_pages)) 168 if (list_empty(&ballooned_pages))
167 return NULL; 169 return NULL;
168 return list_entry(ballooned_pages.next, struct page, lru); 170 return list_entry(ballooned_pages.next, struct page, lru);
169 } 171 }
170 172
171 static struct page *balloon_next_page(struct page *page) 173 static struct page *balloon_next_page(struct page *page)
172 { 174 {
173 struct list_head *next = page->lru.next; 175 struct list_head *next = page->lru.next;
174 if (next == &ballooned_pages) 176 if (next == &ballooned_pages)
175 return NULL; 177 return NULL;
176 return list_entry(next, struct page, lru); 178 return list_entry(next, struct page, lru);
177 } 179 }
178 180
179 static void balloon_alarm(unsigned long unused) 181 static void balloon_alarm(unsigned long unused)
180 { 182 {
181 schedule_work(&balloon_worker); 183 schedule_work(&balloon_worker);
182 } 184 }
183 185
184 static unsigned long current_target(void) 186 static unsigned long current_target(void)
185 { 187 {
186 unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit); 188 unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
187 189
188 target = min(target, 190 target = min(target,
189 balloon_stats.current_pages + 191 balloon_stats.current_pages +
190 balloon_stats.balloon_low + 192 balloon_stats.balloon_low +
191 balloon_stats.balloon_high); 193 balloon_stats.balloon_high);
192 194
193 return target; 195 return target;
194 } 196 }
195 197
196 static int increase_reservation(unsigned long nr_pages) 198 static int increase_reservation(unsigned long nr_pages)
197 { 199 {
198 unsigned long pfn, i, flags; 200 unsigned long pfn, i, flags;
199 struct page *page; 201 struct page *page;
200 long rc; 202 long rc;
201 struct xen_memory_reservation reservation = { 203 struct xen_memory_reservation reservation = {
202 .address_bits = 0, 204 .address_bits = 0,
203 .extent_order = 0, 205 .extent_order = 0,
204 .domid = DOMID_SELF 206 .domid = DOMID_SELF
205 }; 207 };
206 208
207 if (nr_pages > ARRAY_SIZE(frame_list)) 209 if (nr_pages > ARRAY_SIZE(frame_list))
208 nr_pages = ARRAY_SIZE(frame_list); 210 nr_pages = ARRAY_SIZE(frame_list);
209 211
210 spin_lock_irqsave(&balloon_lock, flags); 212 spin_lock_irqsave(&balloon_lock, flags);
211 213
212 page = balloon_first_page(); 214 page = balloon_first_page();
213 for (i = 0; i < nr_pages; i++) { 215 for (i = 0; i < nr_pages; i++) {
214 BUG_ON(page == NULL); 216 BUG_ON(page == NULL);
215 frame_list[i] = page_to_pfn(page);; 217 frame_list[i] = page_to_pfn(page);;
216 page = balloon_next_page(page); 218 page = balloon_next_page(page);
217 } 219 }
218 220
219 set_xen_guest_handle(reservation.extent_start, frame_list); 221 set_xen_guest_handle(reservation.extent_start, frame_list);
220 reservation.nr_extents = nr_pages; 222 reservation.nr_extents = nr_pages;
221 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); 223 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
222 if (rc < nr_pages) { 224 if (rc < nr_pages) {
223 if (rc > 0) { 225 if (rc > 0) {
224 int ret; 226 int ret;
225 227
226 /* We hit the Xen hard limit: reprobe. */ 228 /* We hit the Xen hard limit: reprobe. */
227 reservation.nr_extents = rc; 229 reservation.nr_extents = rc;
228 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 230 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
229 &reservation); 231 &reservation);
230 BUG_ON(ret != rc); 232 BUG_ON(ret != rc);
231 } 233 }
232 if (rc >= 0) 234 if (rc >= 0)
233 balloon_stats.hard_limit = (balloon_stats.current_pages + rc - 235 balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
234 balloon_stats.driver_pages); 236 balloon_stats.driver_pages);
235 goto out; 237 goto out;
236 } 238 }
237 239
238 for (i = 0; i < nr_pages; i++) { 240 for (i = 0; i < nr_pages; i++) {
239 page = balloon_retrieve(); 241 page = balloon_retrieve();
240 BUG_ON(page == NULL); 242 BUG_ON(page == NULL);
241 243
242 pfn = page_to_pfn(page); 244 pfn = page_to_pfn(page);
243 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && 245 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
244 phys_to_machine_mapping_valid(pfn)); 246 phys_to_machine_mapping_valid(pfn));
245 247
246 set_phys_to_machine(pfn, frame_list[i]); 248 set_phys_to_machine(pfn, frame_list[i]);
247 249
248 /* Link back into the page tables if not highmem. */ 250 /* Link back into the page tables if not highmem. */
249 if (pfn < max_low_pfn) { 251 if (pfn < max_low_pfn) {
250 int ret; 252 int ret;
251 ret = HYPERVISOR_update_va_mapping( 253 ret = HYPERVISOR_update_va_mapping(
252 (unsigned long)__va(pfn << PAGE_SHIFT), 254 (unsigned long)__va(pfn << PAGE_SHIFT),
253 mfn_pte(frame_list[i], PAGE_KERNEL), 255 mfn_pte(frame_list[i], PAGE_KERNEL),
254 0); 256 0);
255 BUG_ON(ret); 257 BUG_ON(ret);
256 } 258 }
257 259
258 /* Relinquish the page back to the allocator. */ 260 /* Relinquish the page back to the allocator. */
259 ClearPageReserved(page); 261 ClearPageReserved(page);
260 init_page_count(page); 262 init_page_count(page);
261 __free_page(page); 263 __free_page(page);
262 } 264 }
263 265
264 balloon_stats.current_pages += nr_pages; 266 balloon_stats.current_pages += nr_pages;
265 totalram_pages = balloon_stats.current_pages; 267 totalram_pages = balloon_stats.current_pages;
266 268
267 out: 269 out:
268 spin_unlock_irqrestore(&balloon_lock, flags); 270 spin_unlock_irqrestore(&balloon_lock, flags);
269 271
270 return 0; 272 return 0;
271 } 273 }
272 274
273 static int decrease_reservation(unsigned long nr_pages) 275 static int decrease_reservation(unsigned long nr_pages)
274 { 276 {
275 unsigned long pfn, i, flags; 277 unsigned long pfn, i, flags;
276 struct page *page; 278 struct page *page;
277 int need_sleep = 0; 279 int need_sleep = 0;
278 int ret; 280 int ret;
279 struct xen_memory_reservation reservation = { 281 struct xen_memory_reservation reservation = {
280 .address_bits = 0, 282 .address_bits = 0,
281 .extent_order = 0, 283 .extent_order = 0,
282 .domid = DOMID_SELF 284 .domid = DOMID_SELF
283 }; 285 };
284 286
285 if (nr_pages > ARRAY_SIZE(frame_list)) 287 if (nr_pages > ARRAY_SIZE(frame_list))
286 nr_pages = ARRAY_SIZE(frame_list); 288 nr_pages = ARRAY_SIZE(frame_list);
287 289
288 for (i = 0; i < nr_pages; i++) { 290 for (i = 0; i < nr_pages; i++) {
289 if ((page = alloc_page(GFP_BALLOON)) == NULL) { 291 if ((page = alloc_page(GFP_BALLOON)) == NULL) {
290 nr_pages = i; 292 nr_pages = i;
291 need_sleep = 1; 293 need_sleep = 1;
292 break; 294 break;
293 } 295 }
294 296
295 pfn = page_to_pfn(page); 297 pfn = page_to_pfn(page);
296 frame_list[i] = pfn_to_mfn(pfn); 298 frame_list[i] = pfn_to_mfn(pfn);
297 299
298 scrub_page(page); 300 scrub_page(page);
299 } 301 }
300 302
301 /* Ensure that ballooned highmem pages don't have kmaps. */ 303 /* Ensure that ballooned highmem pages don't have kmaps. */
302 kmap_flush_unused(); 304 kmap_flush_unused();
303 flush_tlb_all(); 305 flush_tlb_all();
304 306
305 spin_lock_irqsave(&balloon_lock, flags); 307 spin_lock_irqsave(&balloon_lock, flags);
306 308
307 /* No more mappings: invalidate P2M and add to balloon. */ 309 /* No more mappings: invalidate P2M and add to balloon. */
308 for (i = 0; i < nr_pages; i++) { 310 for (i = 0; i < nr_pages; i++) {
309 pfn = mfn_to_pfn(frame_list[i]); 311 pfn = mfn_to_pfn(frame_list[i]);
310 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 312 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
311 balloon_append(pfn_to_page(pfn)); 313 balloon_append(pfn_to_page(pfn));
312 } 314 }
313 315
314 set_xen_guest_handle(reservation.extent_start, frame_list); 316 set_xen_guest_handle(reservation.extent_start, frame_list);
315 reservation.nr_extents = nr_pages; 317 reservation.nr_extents = nr_pages;
316 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 318 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
317 BUG_ON(ret != nr_pages); 319 BUG_ON(ret != nr_pages);
318 320
319 balloon_stats.current_pages -= nr_pages; 321 balloon_stats.current_pages -= nr_pages;
320 totalram_pages = balloon_stats.current_pages; 322 totalram_pages = balloon_stats.current_pages;
321 323
322 spin_unlock_irqrestore(&balloon_lock, flags); 324 spin_unlock_irqrestore(&balloon_lock, flags);
323 325
324 return need_sleep; 326 return need_sleep;
325 } 327 }
326 328
327 /* 329 /*
328 * We avoid multiple worker processes conflicting via the balloon mutex. 330 * We avoid multiple worker processes conflicting via the balloon mutex.
329 * We may of course race updates of the target counts (which are protected 331 * We may of course race updates of the target counts (which are protected
330 * by the balloon lock), or with changes to the Xen hard limit, but we will 332 * by the balloon lock), or with changes to the Xen hard limit, but we will
331 * recover from these in time. 333 * recover from these in time.
332 */ 334 */
333 static void balloon_process(struct work_struct *work) 335 static void balloon_process(struct work_struct *work)
334 { 336 {
335 int need_sleep = 0; 337 int need_sleep = 0;
336 long credit; 338 long credit;
337 339
338 mutex_lock(&balloon_mutex); 340 mutex_lock(&balloon_mutex);
339 341
340 do { 342 do {
341 credit = current_target() - balloon_stats.current_pages; 343 credit = current_target() - balloon_stats.current_pages;
342 if (credit > 0) 344 if (credit > 0)
343 need_sleep = (increase_reservation(credit) != 0); 345 need_sleep = (increase_reservation(credit) != 0);
344 if (credit < 0) 346 if (credit < 0)
345 need_sleep = (decrease_reservation(-credit) != 0); 347 need_sleep = (decrease_reservation(-credit) != 0);
346 348
347 #ifndef CONFIG_PREEMPT 349 #ifndef CONFIG_PREEMPT
348 if (need_resched()) 350 if (need_resched())
349 schedule(); 351 schedule();
350 #endif 352 #endif
351 } while ((credit != 0) && !need_sleep); 353 } while ((credit != 0) && !need_sleep);
352 354
353 /* Schedule more work if there is some still to be done. */ 355 /* Schedule more work if there is some still to be done. */
354 if (current_target() != balloon_stats.current_pages) 356 if (current_target() != balloon_stats.current_pages)
355 mod_timer(&balloon_timer, jiffies + HZ); 357 mod_timer(&balloon_timer, jiffies + HZ);
356 358
357 mutex_unlock(&balloon_mutex); 359 mutex_unlock(&balloon_mutex);
358 } 360 }
359 361
360 /* Resets the Xen limit, sets new target, and kicks off processing. */ 362 /* Resets the Xen limit, sets new target, and kicks off processing. */
361 static void balloon_set_new_target(unsigned long target) 363 static void balloon_set_new_target(unsigned long target)
362 { 364 {
363 /* No need for lock. Not read-modify-write updates. */ 365 /* No need for lock. Not read-modify-write updates. */
364 balloon_stats.hard_limit = ~0UL; 366 balloon_stats.hard_limit = ~0UL;
365 balloon_stats.target_pages = target; 367 balloon_stats.target_pages = target;
366 schedule_work(&balloon_worker); 368 schedule_work(&balloon_worker);
367 } 369 }
368 370
369 static struct xenbus_watch target_watch = 371 static struct xenbus_watch target_watch =
370 { 372 {
371 .node = "memory/target" 373 .node = "memory/target"
372 }; 374 };
373 375
374 /* React to a change in the target key */ 376 /* React to a change in the target key */
375 static void watch_target(struct xenbus_watch *watch, 377 static void watch_target(struct xenbus_watch *watch,
376 const char **vec, unsigned int len) 378 const char **vec, unsigned int len)
377 { 379 {
378 unsigned long long new_target; 380 unsigned long long new_target;
379 int err; 381 int err;
380 382
381 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); 383 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
382 if (err != 1) { 384 if (err != 1) {
383 /* This is ok (for domain0 at least) - so just return */ 385 /* This is ok (for domain0 at least) - so just return */
384 return; 386 return;
385 } 387 }
386 388
387 /* The given memory/target value is in KiB, so it needs converting to 389 /* The given memory/target value is in KiB, so it needs converting to
388 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 390 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
389 */ 391 */
390 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); 392 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
391 } 393 }
392 394
393 static int balloon_init_watcher(struct notifier_block *notifier, 395 static int balloon_init_watcher(struct notifier_block *notifier,
394 unsigned long event, 396 unsigned long event,
395 void *data) 397 void *data)
396 { 398 {
397 int err; 399 int err;
398 400
399 err = register_xenbus_watch(&target_watch); 401 err = register_xenbus_watch(&target_watch);
400 if (err) 402 if (err)
401 printk(KERN_ERR "Failed to set balloon watcher\n"); 403 printk(KERN_ERR "Failed to set balloon watcher\n");
402 404
403 return NOTIFY_DONE; 405 return NOTIFY_DONE;
404 } 406 }
405 407
406 static struct notifier_block xenstore_notifier; 408 static struct notifier_block xenstore_notifier;
407 409
408 static int __init balloon_init(void) 410 static int __init balloon_init(void)
409 { 411 {
410 unsigned long pfn; 412 unsigned long pfn;
411 struct page *page; 413 struct page *page;
412 414
413 if (!xen_pv_domain()) 415 if (!xen_pv_domain())
414 return -ENODEV; 416 return -ENODEV;
415 417
416 pr_info("xen_balloon: Initialising balloon driver.\n"); 418 pr_info("xen_balloon: Initialising balloon driver.\n");
417 419
418 balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); 420 balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
419 totalram_pages = balloon_stats.current_pages; 421 totalram_pages = balloon_stats.current_pages;
420 balloon_stats.target_pages = balloon_stats.current_pages; 422 balloon_stats.target_pages = balloon_stats.current_pages;
421 balloon_stats.balloon_low = 0; 423 balloon_stats.balloon_low = 0;
422 balloon_stats.balloon_high = 0; 424 balloon_stats.balloon_high = 0;
423 balloon_stats.driver_pages = 0UL; 425 balloon_stats.driver_pages = 0UL;
424 balloon_stats.hard_limit = ~0UL; 426 balloon_stats.hard_limit = ~0UL;
425 427
426 init_timer(&balloon_timer); 428 init_timer(&balloon_timer);
427 balloon_timer.data = 0; 429 balloon_timer.data = 0;
428 balloon_timer.function = balloon_alarm; 430 balloon_timer.function = balloon_alarm;
429 431
430 register_balloon(&balloon_sysdev); 432 register_balloon(&balloon_sysdev);
431 433
432 /* Initialise the balloon with excess memory space. */ 434 /* Initialise the balloon with excess memory space. */
433 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { 435 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
434 page = pfn_to_page(pfn); 436 page = pfn_to_page(pfn);
435 if (!PageReserved(page)) 437 if (!PageReserved(page))
436 balloon_append(page); 438 balloon_append(page);
437 } 439 }
438 440
439 target_watch.callback = watch_target; 441 target_watch.callback = watch_target;
440 xenstore_notifier.notifier_call = balloon_init_watcher; 442 xenstore_notifier.notifier_call = balloon_init_watcher;
441 443
442 register_xenstore_notifier(&xenstore_notifier); 444 register_xenstore_notifier(&xenstore_notifier);
443 445
444 return 0; 446 return 0;
445 } 447 }
446 448
447 subsys_initcall(balloon_init); 449 subsys_initcall(balloon_init);
448 450
449 static void balloon_exit(void) 451 static void balloon_exit(void)
450 { 452 {
451 /* XXX - release balloon here */ 453 /* XXX - release balloon here */
452 return; 454 return;
453 } 455 }
454 456
455 module_exit(balloon_exit); 457 module_exit(balloon_exit);
456 458
457 #define BALLOON_SHOW(name, format, args...) \ 459 #define BALLOON_SHOW(name, format, args...) \
458 static ssize_t show_##name(struct sys_device *dev, \ 460 static ssize_t show_##name(struct sys_device *dev, \
459 struct sysdev_attribute *attr, \ 461 struct sysdev_attribute *attr, \
460 char *buf) \ 462 char *buf) \
461 { \ 463 { \
462 return sprintf(buf, format, ##args); \ 464 return sprintf(buf, format, ##args); \
463 } \ 465 } \
464 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) 466 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
465 467
466 BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); 468 BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
467 BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); 469 BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
468 BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); 470 BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
469 BALLOON_SHOW(hard_limit_kb, 471 BALLOON_SHOW(hard_limit_kb,
470 (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n", 472 (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
471 (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); 473 (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
472 BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); 474 BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
473 475
474 static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, 476 static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
475 char *buf) 477 char *buf)
476 { 478 {
477 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); 479 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
478 } 480 }
479 481
480 static ssize_t store_target_kb(struct sys_device *dev, 482 static ssize_t store_target_kb(struct sys_device *dev,
481 struct sysdev_attribute *attr, 483 struct sysdev_attribute *attr,
482 const char *buf, 484 const char *buf,
483 size_t count) 485 size_t count)
484 { 486 {
485 char *endchar; 487 char *endchar;
486 unsigned long long target_bytes; 488 unsigned long long target_bytes;
487 489
488 if (!capable(CAP_SYS_ADMIN)) 490 if (!capable(CAP_SYS_ADMIN))
489 return -EPERM; 491 return -EPERM;
490 492
491 target_bytes = memparse(buf, &endchar); 493 target_bytes = memparse(buf, &endchar);
492 494
493 balloon_set_new_target(target_bytes >> PAGE_SHIFT); 495 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
494 496
495 return count; 497 return count;
496 } 498 }
497 499
498 static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, 500 static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
499 show_target_kb, store_target_kb); 501 show_target_kb, store_target_kb);
500 502
501 static struct sysdev_attribute *balloon_attrs[] = { 503 static struct sysdev_attribute *balloon_attrs[] = {
502 &attr_target_kb, 504 &attr_target_kb,
503 }; 505 };
504 506
505 static struct attribute *balloon_info_attrs[] = { 507 static struct attribute *balloon_info_attrs[] = {
506 &attr_current_kb.attr, 508 &attr_current_kb.attr,
507 &attr_low_kb.attr, 509 &attr_low_kb.attr,
508 &attr_high_kb.attr, 510 &attr_high_kb.attr,
509 &attr_hard_limit_kb.attr, 511 &attr_hard_limit_kb.attr,
510 &attr_driver_kb.attr, 512 &attr_driver_kb.attr,
511 NULL 513 NULL
512 }; 514 };
513 515
514 static struct attribute_group balloon_info_group = { 516 static struct attribute_group balloon_info_group = {
515 .name = "info", 517 .name = "info",
516 .attrs = balloon_info_attrs, 518 .attrs = balloon_info_attrs,
517 }; 519 };
518 520
519 static struct sysdev_class balloon_sysdev_class = { 521 static struct sysdev_class balloon_sysdev_class = {
520 .name = BALLOON_CLASS_NAME, 522 .name = BALLOON_CLASS_NAME,
521 }; 523 };
522 524
523 static int register_balloon(struct sys_device *sysdev) 525 static int register_balloon(struct sys_device *sysdev)
524 { 526 {
525 int i, error; 527 int i, error;
526 528
527 error = sysdev_class_register(&balloon_sysdev_class); 529 error = sysdev_class_register(&balloon_sysdev_class);
528 if (error) 530 if (error)
529 return error; 531 return error;
530 532
531 sysdev->id = 0; 533 sysdev->id = 0;
532 sysdev->cls = &balloon_sysdev_class; 534 sysdev->cls = &balloon_sysdev_class;
533 535
534 error = sysdev_register(sysdev); 536 error = sysdev_register(sysdev);
535 if (error) { 537 if (error) {
536 sysdev_class_unregister(&balloon_sysdev_class); 538 sysdev_class_unregister(&balloon_sysdev_class);
537 return error; 539 return error;
538 } 540 }
539 541
540 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { 542 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
541 error = sysdev_create_file(sysdev, balloon_attrs[i]); 543 error = sysdev_create_file(sysdev, balloon_attrs[i]);
542 if (error) 544 if (error)
543 goto fail; 545 goto fail;
544 } 546 }
545 547
546 error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); 548 error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
547 if (error) 549 if (error)
548 goto fail; 550 goto fail;
549 551
550 return 0; 552 return 0;
551 553
552 fail: 554 fail:
553 while (--i >= 0) 555 while (--i >= 0)
554 sysdev_remove_file(sysdev, balloon_attrs[i]); 556 sysdev_remove_file(sysdev, balloon_attrs[i]);
555 sysdev_unregister(sysdev); 557 sysdev_unregister(sysdev);
556 sysdev_class_unregister(&balloon_sysdev_class); 558 sysdev_class_unregister(&balloon_sysdev_class);
557 return error; 559 return error;
558 } 560 }
559 561
560 MODULE_LICENSE("GPL"); 562 MODULE_LICENSE("GPL");
drivers/xen/features.c
1 /****************************************************************************** 1 /******************************************************************************
2 * features.c 2 * features.c
3 * 3 *
4 * Xen feature flags. 4 * Xen feature flags.
5 * 5 *
6 * Copyright (c) 2006, Ian Campbell, XenSource Inc. 6 * Copyright (c) 2006, Ian Campbell, XenSource Inc.
7 */ 7 */
8 #include <linux/types.h> 8 #include <linux/types.h>
9 #include <linux/cache.h> 9 #include <linux/cache.h>
10 #include <linux/module.h> 10 #include <linux/module.h>
11 #include <asm/xen/hypervisor.h> 11
12 #include <asm/xen/hypercall.h>
13
14 #include <xen/interface/xen.h>
15 #include <xen/interface/version.h>
12 #include <xen/features.h> 16 #include <xen/features.h>
13 17
14 u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; 18 u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
15 EXPORT_SYMBOL_GPL(xen_features); 19 EXPORT_SYMBOL_GPL(xen_features);
16 20
17 void xen_setup_features(void) 21 void xen_setup_features(void)
18 { 22 {
19 struct xen_feature_info fi; 23 struct xen_feature_info fi;
20 int i, j; 24 int i, j;
21 25
22 for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { 26 for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
23 fi.submap_idx = i; 27 fi.submap_idx = i;
24 if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) 28 if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
25 break; 29 break;
26 for (j = 0; j < 32; j++) 30 for (j = 0; j < 32; j++)
27 xen_features[i * 32 + j] = !!(fi.submap & 1<<j); 31 xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
28 } 32 }
29 } 33 }
30 34
drivers/xen/grant-table.c
1 /****************************************************************************** 1 /******************************************************************************
2 * grant_table.c 2 * grant_table.c
3 * 3 *
4 * Granting foreign access to our memory reservation. 4 * Granting foreign access to our memory reservation.
5 * 5 *
6 * Copyright (c) 2005-2006, Christopher Clark 6 * Copyright (c) 2005-2006, Christopher Clark
7 * Copyright (c) 2004-2005, K A Fraser 7 * Copyright (c) 2004-2005, K A Fraser
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2 10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed 11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other 12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license: 13 * software packages, subject to the following license:
14 * 14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy 15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without 16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify, 17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to 19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions: 20 * the following conditions:
21 * 21 *
22 * The above copyright notice and this permission notice shall be included in 22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software. 23 * all copies or substantial portions of the Software.
24 * 24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE. 31 * IN THE SOFTWARE.
32 */ 32 */
33 33
34 #include <linux/module.h> 34 #include <linux/module.h>
35 #include <linux/sched.h> 35 #include <linux/sched.h>
36 #include <linux/mm.h> 36 #include <linux/mm.h>
37 #include <linux/vmalloc.h> 37 #include <linux/vmalloc.h>
38 #include <linux/uaccess.h> 38 #include <linux/uaccess.h>
39 39
40 #include <xen/interface/xen.h> 40 #include <xen/interface/xen.h>
41 #include <xen/page.h> 41 #include <xen/page.h>
42 #include <xen/grant_table.h> 42 #include <xen/grant_table.h>
43 #include <asm/xen/hypercall.h>
43 44
44 #include <asm/pgtable.h> 45 #include <asm/pgtable.h>
45 #include <asm/sync_bitops.h> 46 #include <asm/sync_bitops.h>
46 47
47 48
48 /* External tools reserve first few grant table entries. */ 49 /* External tools reserve first few grant table entries. */
49 #define NR_RESERVED_ENTRIES 8 50 #define NR_RESERVED_ENTRIES 8
50 #define GNTTAB_LIST_END 0xffffffff 51 #define GNTTAB_LIST_END 0xffffffff
51 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry)) 52 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
52 53
53 static grant_ref_t **gnttab_list; 54 static grant_ref_t **gnttab_list;
54 static unsigned int nr_grant_frames; 55 static unsigned int nr_grant_frames;
55 static unsigned int boot_max_nr_grant_frames; 56 static unsigned int boot_max_nr_grant_frames;
56 static int gnttab_free_count; 57 static int gnttab_free_count;
57 static grant_ref_t gnttab_free_head; 58 static grant_ref_t gnttab_free_head;
58 static DEFINE_SPINLOCK(gnttab_list_lock); 59 static DEFINE_SPINLOCK(gnttab_list_lock);
59 60
60 static struct grant_entry *shared; 61 static struct grant_entry *shared;
61 62
62 static struct gnttab_free_callback *gnttab_free_callback_list; 63 static struct gnttab_free_callback *gnttab_free_callback_list;
63 64
64 static int gnttab_expand(unsigned int req_entries); 65 static int gnttab_expand(unsigned int req_entries);
65 66
66 #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) 67 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
67 68
68 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) 69 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
69 { 70 {
70 return &gnttab_list[(entry) / RPP][(entry) % RPP]; 71 return &gnttab_list[(entry) / RPP][(entry) % RPP];
71 } 72 }
72 /* This can be used as an l-value */ 73 /* This can be used as an l-value */
73 #define gnttab_entry(entry) (*__gnttab_entry(entry)) 74 #define gnttab_entry(entry) (*__gnttab_entry(entry))
74 75
75 static int get_free_entries(unsigned count) 76 static int get_free_entries(unsigned count)
76 { 77 {
77 unsigned long flags; 78 unsigned long flags;
78 int ref, rc; 79 int ref, rc;
79 grant_ref_t head; 80 grant_ref_t head;
80 81
81 spin_lock_irqsave(&gnttab_list_lock, flags); 82 spin_lock_irqsave(&gnttab_list_lock, flags);
82 83
83 if ((gnttab_free_count < count) && 84 if ((gnttab_free_count < count) &&
84 ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { 85 ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
85 spin_unlock_irqrestore(&gnttab_list_lock, flags); 86 spin_unlock_irqrestore(&gnttab_list_lock, flags);
86 return rc; 87 return rc;
87 } 88 }
88 89
89 ref = head = gnttab_free_head; 90 ref = head = gnttab_free_head;
90 gnttab_free_count -= count; 91 gnttab_free_count -= count;
91 while (count-- > 1) 92 while (count-- > 1)
92 head = gnttab_entry(head); 93 head = gnttab_entry(head);
93 gnttab_free_head = gnttab_entry(head); 94 gnttab_free_head = gnttab_entry(head);
94 gnttab_entry(head) = GNTTAB_LIST_END; 95 gnttab_entry(head) = GNTTAB_LIST_END;
95 96
96 spin_unlock_irqrestore(&gnttab_list_lock, flags); 97 spin_unlock_irqrestore(&gnttab_list_lock, flags);
97 98
98 return ref; 99 return ref;
99 } 100 }
100 101
101 static void do_free_callbacks(void) 102 static void do_free_callbacks(void)
102 { 103 {
103 struct gnttab_free_callback *callback, *next; 104 struct gnttab_free_callback *callback, *next;
104 105
105 callback = gnttab_free_callback_list; 106 callback = gnttab_free_callback_list;
106 gnttab_free_callback_list = NULL; 107 gnttab_free_callback_list = NULL;
107 108
108 while (callback != NULL) { 109 while (callback != NULL) {
109 next = callback->next; 110 next = callback->next;
110 if (gnttab_free_count >= callback->count) { 111 if (gnttab_free_count >= callback->count) {
111 callback->next = NULL; 112 callback->next = NULL;
112 callback->fn(callback->arg); 113 callback->fn(callback->arg);
113 } else { 114 } else {
114 callback->next = gnttab_free_callback_list; 115 callback->next = gnttab_free_callback_list;
115 gnttab_free_callback_list = callback; 116 gnttab_free_callback_list = callback;
116 } 117 }
117 callback = next; 118 callback = next;
118 } 119 }
119 } 120 }
120 121
121 static inline void check_free_callbacks(void) 122 static inline void check_free_callbacks(void)
122 { 123 {
123 if (unlikely(gnttab_free_callback_list)) 124 if (unlikely(gnttab_free_callback_list))
124 do_free_callbacks(); 125 do_free_callbacks();
125 } 126 }
126 127
127 static void put_free_entry(grant_ref_t ref) 128 static void put_free_entry(grant_ref_t ref)
128 { 129 {
129 unsigned long flags; 130 unsigned long flags;
130 spin_lock_irqsave(&gnttab_list_lock, flags); 131 spin_lock_irqsave(&gnttab_list_lock, flags);
131 gnttab_entry(ref) = gnttab_free_head; 132 gnttab_entry(ref) = gnttab_free_head;
132 gnttab_free_head = ref; 133 gnttab_free_head = ref;
133 gnttab_free_count++; 134 gnttab_free_count++;
134 check_free_callbacks(); 135 check_free_callbacks();
135 spin_unlock_irqrestore(&gnttab_list_lock, flags); 136 spin_unlock_irqrestore(&gnttab_list_lock, flags);
136 } 137 }
137 138
138 static void update_grant_entry(grant_ref_t ref, domid_t domid, 139 static void update_grant_entry(grant_ref_t ref, domid_t domid,
139 unsigned long frame, unsigned flags) 140 unsigned long frame, unsigned flags)
140 { 141 {
141 /* 142 /*
142 * Introducing a valid entry into the grant table: 143 * Introducing a valid entry into the grant table:
143 * 1. Write ent->domid. 144 * 1. Write ent->domid.
144 * 2. Write ent->frame: 145 * 2. Write ent->frame:
145 * GTF_permit_access: Frame to which access is permitted. 146 * GTF_permit_access: Frame to which access is permitted.
146 * GTF_accept_transfer: Pseudo-phys frame slot being filled by new 147 * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
147 * frame, or zero if none. 148 * frame, or zero if none.
148 * 3. Write memory barrier (WMB). 149 * 3. Write memory barrier (WMB).
149 * 4. Write ent->flags, inc. valid type. 150 * 4. Write ent->flags, inc. valid type.
150 */ 151 */
151 shared[ref].frame = frame; 152 shared[ref].frame = frame;
152 shared[ref].domid = domid; 153 shared[ref].domid = domid;
153 wmb(); 154 wmb();
154 shared[ref].flags = flags; 155 shared[ref].flags = flags;
155 } 156 }
156 157
157 /* 158 /*
158 * Public grant-issuing interface functions 159 * Public grant-issuing interface functions
159 */ 160 */
160 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 161 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
161 unsigned long frame, int readonly) 162 unsigned long frame, int readonly)
162 { 163 {
163 update_grant_entry(ref, domid, frame, 164 update_grant_entry(ref, domid, frame,
164 GTF_permit_access | (readonly ? GTF_readonly : 0)); 165 GTF_permit_access | (readonly ? GTF_readonly : 0));
165 } 166 }
166 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); 167 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
167 168
168 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, 169 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
169 int readonly) 170 int readonly)
170 { 171 {
171 int ref; 172 int ref;
172 173
173 ref = get_free_entries(1); 174 ref = get_free_entries(1);
174 if (unlikely(ref < 0)) 175 if (unlikely(ref < 0))
175 return -ENOSPC; 176 return -ENOSPC;
176 177
177 gnttab_grant_foreign_access_ref(ref, domid, frame, readonly); 178 gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
178 179
179 return ref; 180 return ref;
180 } 181 }
181 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); 182 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
182 183
183 int gnttab_query_foreign_access(grant_ref_t ref) 184 int gnttab_query_foreign_access(grant_ref_t ref)
184 { 185 {
185 u16 nflags; 186 u16 nflags;
186 187
187 nflags = shared[ref].flags; 188 nflags = shared[ref].flags;
188 189
189 return (nflags & (GTF_reading|GTF_writing)); 190 return (nflags & (GTF_reading|GTF_writing));
190 } 191 }
191 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); 192 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
192 193
193 int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) 194 int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
194 { 195 {
195 u16 flags, nflags; 196 u16 flags, nflags;
196 197
197 nflags = shared[ref].flags; 198 nflags = shared[ref].flags;
198 do { 199 do {
199 flags = nflags; 200 flags = nflags;
200 if (flags & (GTF_reading|GTF_writing)) { 201 if (flags & (GTF_reading|GTF_writing)) {
201 printk(KERN_ALERT "WARNING: g.e. still in use!\n"); 202 printk(KERN_ALERT "WARNING: g.e. still in use!\n");
202 return 0; 203 return 0;
203 } 204 }
204 } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags); 205 } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
205 206
206 return 1; 207 return 1;
207 } 208 }
208 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); 209 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
209 210
210 void gnttab_end_foreign_access(grant_ref_t ref, int readonly, 211 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
211 unsigned long page) 212 unsigned long page)
212 { 213 {
213 if (gnttab_end_foreign_access_ref(ref, readonly)) { 214 if (gnttab_end_foreign_access_ref(ref, readonly)) {
214 put_free_entry(ref); 215 put_free_entry(ref);
215 if (page != 0) 216 if (page != 0)
216 free_page(page); 217 free_page(page);
217 } else { 218 } else {
218 /* XXX This needs to be fixed so that the ref and page are 219 /* XXX This needs to be fixed so that the ref and page are
219 placed on a list to be freed up later. */ 220 placed on a list to be freed up later. */
220 printk(KERN_WARNING 221 printk(KERN_WARNING
221 "WARNING: leaking g.e. and page still in use!\n"); 222 "WARNING: leaking g.e. and page still in use!\n");
222 } 223 }
223 } 224 }
224 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); 225 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
225 226
226 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) 227 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
227 { 228 {
228 int ref; 229 int ref;
229 230
230 ref = get_free_entries(1); 231 ref = get_free_entries(1);
231 if (unlikely(ref < 0)) 232 if (unlikely(ref < 0))
232 return -ENOSPC; 233 return -ENOSPC;
233 gnttab_grant_foreign_transfer_ref(ref, domid, pfn); 234 gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
234 235
235 return ref; 236 return ref;
236 } 237 }
237 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); 238 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
238 239
239 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, 240 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
240 unsigned long pfn) 241 unsigned long pfn)
241 { 242 {
242 update_grant_entry(ref, domid, pfn, GTF_accept_transfer); 243 update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
243 } 244 }
244 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); 245 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
245 246
246 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) 247 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
247 { 248 {
248 unsigned long frame; 249 unsigned long frame;
249 u16 flags; 250 u16 flags;
250 251
251 /* 252 /*
252 * If a transfer is not even yet started, try to reclaim the grant 253 * If a transfer is not even yet started, try to reclaim the grant
253 * reference and return failure (== 0). 254 * reference and return failure (== 0).
254 */ 255 */
255 while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { 256 while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
256 if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags) 257 if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
257 return 0; 258 return 0;
258 cpu_relax(); 259 cpu_relax();
259 } 260 }
260 261
261 /* If a transfer is in progress then wait until it is completed. */ 262 /* If a transfer is in progress then wait until it is completed. */
262 while (!(flags & GTF_transfer_completed)) { 263 while (!(flags & GTF_transfer_completed)) {
263 flags = shared[ref].flags; 264 flags = shared[ref].flags;
264 cpu_relax(); 265 cpu_relax();
265 } 266 }
266 267
267 rmb(); /* Read the frame number /after/ reading completion status. */ 268 rmb(); /* Read the frame number /after/ reading completion status. */
268 frame = shared[ref].frame; 269 frame = shared[ref].frame;
269 BUG_ON(frame == 0); 270 BUG_ON(frame == 0);
270 271
271 return frame; 272 return frame;
272 } 273 }
273 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); 274 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
274 275
275 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) 276 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
276 { 277 {
277 unsigned long frame = gnttab_end_foreign_transfer_ref(ref); 278 unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
278 put_free_entry(ref); 279 put_free_entry(ref);
279 return frame; 280 return frame;
280 } 281 }
281 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer); 282 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
282 283
283 void gnttab_free_grant_reference(grant_ref_t ref) 284 void gnttab_free_grant_reference(grant_ref_t ref)
284 { 285 {
285 put_free_entry(ref); 286 put_free_entry(ref);
286 } 287 }
287 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference); 288 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
288 289
289 void gnttab_free_grant_references(grant_ref_t head) 290 void gnttab_free_grant_references(grant_ref_t head)
290 { 291 {
291 grant_ref_t ref; 292 grant_ref_t ref;
292 unsigned long flags; 293 unsigned long flags;
293 int count = 1; 294 int count = 1;
294 if (head == GNTTAB_LIST_END) 295 if (head == GNTTAB_LIST_END)
295 return; 296 return;
296 spin_lock_irqsave(&gnttab_list_lock, flags); 297 spin_lock_irqsave(&gnttab_list_lock, flags);
297 ref = head; 298 ref = head;
298 while (gnttab_entry(ref) != GNTTAB_LIST_END) { 299 while (gnttab_entry(ref) != GNTTAB_LIST_END) {
299 ref = gnttab_entry(ref); 300 ref = gnttab_entry(ref);
300 count++; 301 count++;
301 } 302 }
302 gnttab_entry(ref) = gnttab_free_head; 303 gnttab_entry(ref) = gnttab_free_head;
303 gnttab_free_head = head; 304 gnttab_free_head = head;
304 gnttab_free_count += count; 305 gnttab_free_count += count;
305 check_free_callbacks(); 306 check_free_callbacks();
306 spin_unlock_irqrestore(&gnttab_list_lock, flags); 307 spin_unlock_irqrestore(&gnttab_list_lock, flags);
307 } 308 }
308 EXPORT_SYMBOL_GPL(gnttab_free_grant_references); 309 EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
309 310
310 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) 311 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
311 { 312 {
312 int h = get_free_entries(count); 313 int h = get_free_entries(count);
313 314
314 if (h < 0) 315 if (h < 0)
315 return -ENOSPC; 316 return -ENOSPC;
316 317
317 *head = h; 318 *head = h;
318 319
319 return 0; 320 return 0;
320 } 321 }
321 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); 322 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
322 323
323 int gnttab_empty_grant_references(const grant_ref_t *private_head) 324 int gnttab_empty_grant_references(const grant_ref_t *private_head)
324 { 325 {
325 return (*private_head == GNTTAB_LIST_END); 326 return (*private_head == GNTTAB_LIST_END);
326 } 327 }
327 EXPORT_SYMBOL_GPL(gnttab_empty_grant_references); 328 EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
328 329
329 int gnttab_claim_grant_reference(grant_ref_t *private_head) 330 int gnttab_claim_grant_reference(grant_ref_t *private_head)
330 { 331 {
331 grant_ref_t g = *private_head; 332 grant_ref_t g = *private_head;
332 if (unlikely(g == GNTTAB_LIST_END)) 333 if (unlikely(g == GNTTAB_LIST_END))
333 return -ENOSPC; 334 return -ENOSPC;
334 *private_head = gnttab_entry(g); 335 *private_head = gnttab_entry(g);
335 return g; 336 return g;
336 } 337 }
337 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); 338 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
338 339
339 void gnttab_release_grant_reference(grant_ref_t *private_head, 340 void gnttab_release_grant_reference(grant_ref_t *private_head,
340 grant_ref_t release) 341 grant_ref_t release)
341 { 342 {
342 gnttab_entry(release) = *private_head; 343 gnttab_entry(release) = *private_head;
343 *private_head = release; 344 *private_head = release;
344 } 345 }
345 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); 346 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
346 347
347 void gnttab_request_free_callback(struct gnttab_free_callback *callback, 348 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
348 void (*fn)(void *), void *arg, u16 count) 349 void (*fn)(void *), void *arg, u16 count)
349 { 350 {
350 unsigned long flags; 351 unsigned long flags;
351 spin_lock_irqsave(&gnttab_list_lock, flags); 352 spin_lock_irqsave(&gnttab_list_lock, flags);
352 if (callback->next) 353 if (callback->next)
353 goto out; 354 goto out;
354 callback->fn = fn; 355 callback->fn = fn;
355 callback->arg = arg; 356 callback->arg = arg;
356 callback->count = count; 357 callback->count = count;
357 callback->next = gnttab_free_callback_list; 358 callback->next = gnttab_free_callback_list;
358 gnttab_free_callback_list = callback; 359 gnttab_free_callback_list = callback;
359 check_free_callbacks(); 360 check_free_callbacks();
360 out: 361 out:
361 spin_unlock_irqrestore(&gnttab_list_lock, flags); 362 spin_unlock_irqrestore(&gnttab_list_lock, flags);
362 } 363 }
363 EXPORT_SYMBOL_GPL(gnttab_request_free_callback); 364 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
364 365
365 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) 366 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
366 { 367 {
367 struct gnttab_free_callback **pcb; 368 struct gnttab_free_callback **pcb;
368 unsigned long flags; 369 unsigned long flags;
369 370
370 spin_lock_irqsave(&gnttab_list_lock, flags); 371 spin_lock_irqsave(&gnttab_list_lock, flags);
371 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { 372 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
372 if (*pcb == callback) { 373 if (*pcb == callback) {
373 *pcb = callback->next; 374 *pcb = callback->next;
374 break; 375 break;
375 } 376 }
376 } 377 }
377 spin_unlock_irqrestore(&gnttab_list_lock, flags); 378 spin_unlock_irqrestore(&gnttab_list_lock, flags);
378 } 379 }
379 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback); 380 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
380 381
381 static int grow_gnttab_list(unsigned int more_frames) 382 static int grow_gnttab_list(unsigned int more_frames)
382 { 383 {
383 unsigned int new_nr_grant_frames, extra_entries, i; 384 unsigned int new_nr_grant_frames, extra_entries, i;
384 unsigned int nr_glist_frames, new_nr_glist_frames; 385 unsigned int nr_glist_frames, new_nr_glist_frames;
385 386
386 new_nr_grant_frames = nr_grant_frames + more_frames; 387 new_nr_grant_frames = nr_grant_frames + more_frames;
387 extra_entries = more_frames * GREFS_PER_GRANT_FRAME; 388 extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
388 389
389 nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; 390 nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
390 new_nr_glist_frames = 391 new_nr_glist_frames =
391 (new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; 392 (new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
392 for (i = nr_glist_frames; i < new_nr_glist_frames; i++) { 393 for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
393 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); 394 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
394 if (!gnttab_list[i]) 395 if (!gnttab_list[i])
395 goto grow_nomem; 396 goto grow_nomem;
396 } 397 }
397 398
398 399
399 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; 400 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
400 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) 401 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
401 gnttab_entry(i) = i + 1; 402 gnttab_entry(i) = i + 1;
402 403
403 gnttab_entry(i) = gnttab_free_head; 404 gnttab_entry(i) = gnttab_free_head;
404 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; 405 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
405 gnttab_free_count += extra_entries; 406 gnttab_free_count += extra_entries;
406 407
407 nr_grant_frames = new_nr_grant_frames; 408 nr_grant_frames = new_nr_grant_frames;
408 409
409 check_free_callbacks(); 410 check_free_callbacks();
410 411
411 return 0; 412 return 0;
412 413
413 grow_nomem: 414 grow_nomem:
414 for ( ; i >= nr_glist_frames; i--) 415 for ( ; i >= nr_glist_frames; i--)
415 free_page((unsigned long) gnttab_list[i]); 416 free_page((unsigned long) gnttab_list[i]);
416 return -ENOMEM; 417 return -ENOMEM;
417 } 418 }
418 419
419 static unsigned int __max_nr_grant_frames(void) 420 static unsigned int __max_nr_grant_frames(void)
420 { 421 {
421 struct gnttab_query_size query; 422 struct gnttab_query_size query;
422 int rc; 423 int rc;
423 424
424 query.dom = DOMID_SELF; 425 query.dom = DOMID_SELF;
425 426
426 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); 427 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
427 if ((rc < 0) || (query.status != GNTST_okay)) 428 if ((rc < 0) || (query.status != GNTST_okay))
428 return 4; /* Legacy max supported number of frames */ 429 return 4; /* Legacy max supported number of frames */
429 430
430 return query.max_nr_frames; 431 return query.max_nr_frames;
431 } 432 }
432 433
433 static inline unsigned int max_nr_grant_frames(void) 434 static inline unsigned int max_nr_grant_frames(void)
434 { 435 {
435 unsigned int xen_max = __max_nr_grant_frames(); 436 unsigned int xen_max = __max_nr_grant_frames();
436 437
437 if (xen_max > boot_max_nr_grant_frames) 438 if (xen_max > boot_max_nr_grant_frames)
438 return boot_max_nr_grant_frames; 439 return boot_max_nr_grant_frames;
439 return xen_max; 440 return xen_max;
440 } 441 }
441 442
442 static int gnttab_map(unsigned int start_idx, unsigned int end_idx) 443 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
443 { 444 {
444 struct gnttab_setup_table setup; 445 struct gnttab_setup_table setup;
445 unsigned long *frames; 446 unsigned long *frames;
446 unsigned int nr_gframes = end_idx + 1; 447 unsigned int nr_gframes = end_idx + 1;
447 int rc; 448 int rc;
448 449
449 frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); 450 frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
450 if (!frames) 451 if (!frames)
451 return -ENOMEM; 452 return -ENOMEM;
452 453
453 setup.dom = DOMID_SELF; 454 setup.dom = DOMID_SELF;
454 setup.nr_frames = nr_gframes; 455 setup.nr_frames = nr_gframes;
455 set_xen_guest_handle(setup.frame_list, frames); 456 set_xen_guest_handle(setup.frame_list, frames);
456 457
457 rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); 458 rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
458 if (rc == -ENOSYS) { 459 if (rc == -ENOSYS) {
459 kfree(frames); 460 kfree(frames);
460 return -ENOSYS; 461 return -ENOSYS;
461 } 462 }
462 463
463 BUG_ON(rc || setup.status); 464 BUG_ON(rc || setup.status);
464 465
465 rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(), 466 rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
466 &shared); 467 &shared);
467 BUG_ON(rc); 468 BUG_ON(rc);
468 469
469 kfree(frames); 470 kfree(frames);
470 471
471 return 0; 472 return 0;
472 } 473 }
473 474
474 int gnttab_resume(void) 475 int gnttab_resume(void)
475 { 476 {
476 if (max_nr_grant_frames() < nr_grant_frames) 477 if (max_nr_grant_frames() < nr_grant_frames)
477 return -ENOSYS; 478 return -ENOSYS;
478 return gnttab_map(0, nr_grant_frames - 1); 479 return gnttab_map(0, nr_grant_frames - 1);
479 } 480 }
480 481
481 int gnttab_suspend(void) 482 int gnttab_suspend(void)
482 { 483 {
483 arch_gnttab_unmap_shared(shared, nr_grant_frames); 484 arch_gnttab_unmap_shared(shared, nr_grant_frames);
484 return 0; 485 return 0;
485 } 486 }
486 487
487 static int gnttab_expand(unsigned int req_entries) 488 static int gnttab_expand(unsigned int req_entries)
488 { 489 {
489 int rc; 490 int rc;
490 unsigned int cur, extra; 491 unsigned int cur, extra;
491 492
492 cur = nr_grant_frames; 493 cur = nr_grant_frames;
493 extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / 494 extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
494 GREFS_PER_GRANT_FRAME); 495 GREFS_PER_GRANT_FRAME);
495 if (cur + extra > max_nr_grant_frames()) 496 if (cur + extra > max_nr_grant_frames())
496 return -ENOSPC; 497 return -ENOSPC;
497 498
498 rc = gnttab_map(cur, cur + extra - 1); 499 rc = gnttab_map(cur, cur + extra - 1);
499 if (rc == 0) 500 if (rc == 0)
500 rc = grow_gnttab_list(extra); 501 rc = grow_gnttab_list(extra);
501 502
502 return rc; 503 return rc;
503 } 504 }
504 505
505 static int __devinit gnttab_init(void) 506 static int __devinit gnttab_init(void)
506 { 507 {
507 int i; 508 int i;
508 unsigned int max_nr_glist_frames, nr_glist_frames; 509 unsigned int max_nr_glist_frames, nr_glist_frames;
509 unsigned int nr_init_grefs; 510 unsigned int nr_init_grefs;
510 511
511 if (!xen_domain()) 512 if (!xen_domain())
512 return -ENODEV; 513 return -ENODEV;
513 514
514 nr_grant_frames = 1; 515 nr_grant_frames = 1;
515 boot_max_nr_grant_frames = __max_nr_grant_frames(); 516 boot_max_nr_grant_frames = __max_nr_grant_frames();
516 517
517 /* Determine the maximum number of frames required for the 518 /* Determine the maximum number of frames required for the
518 * grant reference free list on the current hypervisor. 519 * grant reference free list on the current hypervisor.
519 */ 520 */
520 max_nr_glist_frames = (boot_max_nr_grant_frames * 521 max_nr_glist_frames = (boot_max_nr_grant_frames *
521 GREFS_PER_GRANT_FRAME / RPP); 522 GREFS_PER_GRANT_FRAME / RPP);
522 523
523 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), 524 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
524 GFP_KERNEL); 525 GFP_KERNEL);
525 if (gnttab_list == NULL) 526 if (gnttab_list == NULL)
526 return -ENOMEM; 527 return -ENOMEM;
527 528
528 nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; 529 nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
529 for (i = 0; i < nr_glist_frames; i++) { 530 for (i = 0; i < nr_glist_frames; i++) {
530 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); 531 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
531 if (gnttab_list[i] == NULL) 532 if (gnttab_list[i] == NULL)
532 goto ini_nomem; 533 goto ini_nomem;
533 } 534 }
534 535
535 if (gnttab_resume() < 0) 536 if (gnttab_resume() < 0)
536 return -ENODEV; 537 return -ENODEV;
537 538
538 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; 539 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
539 540
540 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) 541 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
541 gnttab_entry(i) = i + 1; 542 gnttab_entry(i) = i + 1;
542 543
543 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; 544 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
544 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; 545 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
545 gnttab_free_head = NR_RESERVED_ENTRIES; 546 gnttab_free_head = NR_RESERVED_ENTRIES;
546 547
547 printk("Grant table initialized\n"); 548 printk("Grant table initialized\n");
548 return 0; 549 return 0;
549 550
550 ini_nomem: 551 ini_nomem:
551 for (i--; i >= 0; i--) 552 for (i--; i >= 0; i--)
552 free_page((unsigned long)gnttab_list[i]); 553 free_page((unsigned long)gnttab_list[i]);
553 kfree(gnttab_list); 554 kfree(gnttab_list);
554 return -ENOMEM; 555 return -ENOMEM;
555 } 556 }
556 557
557 core_initcall(gnttab_init); 558 core_initcall(gnttab_init);
558 559
include/xen/interface/event_channel.h
1 /****************************************************************************** 1 /******************************************************************************
2 * event_channel.h 2 * event_channel.h
3 * 3 *
4 * Event channels between domains. 4 * Event channels between domains.
5 * 5 *
6 * Copyright (c) 2003-2004, K A Fraser. 6 * Copyright (c) 2003-2004, K A Fraser.
7 */ 7 */
8 8
9 #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ 9 #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
10 #define __XEN_PUBLIC_EVENT_CHANNEL_H__ 10 #define __XEN_PUBLIC_EVENT_CHANNEL_H__
11 11
12 #include <xen/interface/xen.h>
13
12 typedef uint32_t evtchn_port_t; 14 typedef uint32_t evtchn_port_t;
13 DEFINE_GUEST_HANDLE(evtchn_port_t); 15 DEFINE_GUEST_HANDLE(evtchn_port_t);
14 16
15 /* 17 /*
16 * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as 18 * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
17 * accepting interdomain bindings from domain <remote_dom>. A fresh port 19 * accepting interdomain bindings from domain <remote_dom>. A fresh port
18 * is allocated in <dom> and returned as <port>. 20 * is allocated in <dom> and returned as <port>.
19 * NOTES: 21 * NOTES:
20 * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. 22 * 1. If the caller is unprivileged then <dom> must be DOMID_SELF.
21 * 2. <rdom> may be DOMID_SELF, allowing loopback connections. 23 * 2. <rdom> may be DOMID_SELF, allowing loopback connections.
22 */ 24 */
23 #define EVTCHNOP_alloc_unbound 6 25 #define EVTCHNOP_alloc_unbound 6
24 struct evtchn_alloc_unbound { 26 struct evtchn_alloc_unbound {
25 /* IN parameters */ 27 /* IN parameters */
26 domid_t dom, remote_dom; 28 domid_t dom, remote_dom;
27 /* OUT parameters */ 29 /* OUT parameters */
28 evtchn_port_t port; 30 evtchn_port_t port;
29 }; 31 };
30 32
31 /* 33 /*
32 * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between 34 * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
33 * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify 35 * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
34 * a port that is unbound and marked as accepting bindings from the calling 36 * a port that is unbound and marked as accepting bindings from the calling
35 * domain. A fresh port is allocated in the calling domain and returned as 37 * domain. A fresh port is allocated in the calling domain and returned as
36 * <local_port>. 38 * <local_port>.
37 * NOTES: 39 * NOTES:
38 * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. 40 * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
39 */ 41 */
40 #define EVTCHNOP_bind_interdomain 0 42 #define EVTCHNOP_bind_interdomain 0
41 struct evtchn_bind_interdomain { 43 struct evtchn_bind_interdomain {
42 /* IN parameters. */ 44 /* IN parameters. */
43 domid_t remote_dom; 45 domid_t remote_dom;
44 evtchn_port_t remote_port; 46 evtchn_port_t remote_port;
45 /* OUT parameters. */ 47 /* OUT parameters. */
46 evtchn_port_t local_port; 48 evtchn_port_t local_port;
47 }; 49 };
48 50
49 /* 51 /*
50 * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified 52 * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
51 * vcpu. 53 * vcpu.
52 * NOTES: 54 * NOTES:
53 * 1. A virtual IRQ may be bound to at most one event channel per vcpu. 55 * 1. A virtual IRQ may be bound to at most one event channel per vcpu.
54 * 2. The allocated event channel is bound to the specified vcpu. The binding 56 * 2. The allocated event channel is bound to the specified vcpu. The binding
55 * may not be changed. 57 * may not be changed.
56 */ 58 */
57 #define EVTCHNOP_bind_virq 1 59 #define EVTCHNOP_bind_virq 1
58 struct evtchn_bind_virq { 60 struct evtchn_bind_virq {
59 /* IN parameters. */ 61 /* IN parameters. */
60 uint32_t virq; 62 uint32_t virq;
61 uint32_t vcpu; 63 uint32_t vcpu;
62 /* OUT parameters. */ 64 /* OUT parameters. */
63 evtchn_port_t port; 65 evtchn_port_t port;
64 }; 66 };
65 67
66 /* 68 /*
67 * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. 69 * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
68 * NOTES: 70 * NOTES:
69 * 1. A physical IRQ may be bound to at most one event channel per domain. 71 * 1. A physical IRQ may be bound to at most one event channel per domain.
70 * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. 72 * 2. Only a sufficiently-privileged domain may bind to a physical IRQ.
71 */ 73 */
72 #define EVTCHNOP_bind_pirq 2 74 #define EVTCHNOP_bind_pirq 2
73 struct evtchn_bind_pirq { 75 struct evtchn_bind_pirq {
74 /* IN parameters. */ 76 /* IN parameters. */
75 uint32_t pirq; 77 uint32_t pirq;
76 #define BIND_PIRQ__WILL_SHARE 1 78 #define BIND_PIRQ__WILL_SHARE 1
77 uint32_t flags; /* BIND_PIRQ__* */ 79 uint32_t flags; /* BIND_PIRQ__* */
78 /* OUT parameters. */ 80 /* OUT parameters. */
79 evtchn_port_t port; 81 evtchn_port_t port;
80 }; 82 };
81 83
82 /* 84 /*
83 * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. 85 * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
84 * NOTES: 86 * NOTES:
85 * 1. The allocated event channel is bound to the specified vcpu. The binding 87 * 1. The allocated event channel is bound to the specified vcpu. The binding
86 * may not be changed. 88 * may not be changed.
87 */ 89 */
88 #define EVTCHNOP_bind_ipi 7 90 #define EVTCHNOP_bind_ipi 7
89 struct evtchn_bind_ipi { 91 struct evtchn_bind_ipi {
90 uint32_t vcpu; 92 uint32_t vcpu;
91 /* OUT parameters. */ 93 /* OUT parameters. */
92 evtchn_port_t port; 94 evtchn_port_t port;
93 }; 95 };
94 96
95 /* 97 /*
96 * EVTCHNOP_close: Close a local event channel <port>. If the channel is 98 * EVTCHNOP_close: Close a local event channel <port>. If the channel is
97 * interdomain then the remote end is placed in the unbound state 99 * interdomain then the remote end is placed in the unbound state
98 * (EVTCHNSTAT_unbound), awaiting a new connection. 100 * (EVTCHNSTAT_unbound), awaiting a new connection.
99 */ 101 */
100 #define EVTCHNOP_close 3 102 #define EVTCHNOP_close 3
101 struct evtchn_close { 103 struct evtchn_close {
102 /* IN parameters. */ 104 /* IN parameters. */
103 evtchn_port_t port; 105 evtchn_port_t port;
104 }; 106 };
105 107
106 /* 108 /*
107 * EVTCHNOP_send: Send an event to the remote end of the channel whose local 109 * EVTCHNOP_send: Send an event to the remote end of the channel whose local
108 * endpoint is <port>. 110 * endpoint is <port>.
109 */ 111 */
110 #define EVTCHNOP_send 4 112 #define EVTCHNOP_send 4
111 struct evtchn_send { 113 struct evtchn_send {
112 /* IN parameters. */ 114 /* IN parameters. */
113 evtchn_port_t port; 115 evtchn_port_t port;
114 }; 116 };
115 117
116 /* 118 /*
117 * EVTCHNOP_status: Get the current status of the communication channel which 119 * EVTCHNOP_status: Get the current status of the communication channel which
118 * has an endpoint at <dom, port>. 120 * has an endpoint at <dom, port>.
119 * NOTES: 121 * NOTES:
120 * 1. <dom> may be specified as DOMID_SELF. 122 * 1. <dom> may be specified as DOMID_SELF.
121 * 2. Only a sufficiently-privileged domain may obtain the status of an event 123 * 2. Only a sufficiently-privileged domain may obtain the status of an event
122 * channel for which <dom> is not DOMID_SELF. 124 * channel for which <dom> is not DOMID_SELF.
123 */ 125 */
124 #define EVTCHNOP_status 5 126 #define EVTCHNOP_status 5
125 struct evtchn_status { 127 struct evtchn_status {
126 /* IN parameters */ 128 /* IN parameters */
127 domid_t dom; 129 domid_t dom;
128 evtchn_port_t port; 130 evtchn_port_t port;
129 /* OUT parameters */ 131 /* OUT parameters */
130 #define EVTCHNSTAT_closed 0 /* Channel is not in use. */ 132 #define EVTCHNSTAT_closed 0 /* Channel is not in use. */
131 #define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ 133 #define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/
132 #define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ 134 #define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */
133 #define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ 135 #define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */
134 #define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ 136 #define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */
135 #define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ 137 #define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */
136 uint32_t status; 138 uint32_t status;
137 uint32_t vcpu; /* VCPU to which this channel is bound. */ 139 uint32_t vcpu; /* VCPU to which this channel is bound. */
138 union { 140 union {
139 struct { 141 struct {
140 domid_t dom; 142 domid_t dom;
141 } unbound; /* EVTCHNSTAT_unbound */ 143 } unbound; /* EVTCHNSTAT_unbound */
142 struct { 144 struct {
143 domid_t dom; 145 domid_t dom;
144 evtchn_port_t port; 146 evtchn_port_t port;
145 } interdomain; /* EVTCHNSTAT_interdomain */ 147 } interdomain; /* EVTCHNSTAT_interdomain */
146 uint32_t pirq; /* EVTCHNSTAT_pirq */ 148 uint32_t pirq; /* EVTCHNSTAT_pirq */
147 uint32_t virq; /* EVTCHNSTAT_virq */ 149 uint32_t virq; /* EVTCHNSTAT_virq */
148 } u; 150 } u;
149 }; 151 };
150 152
151 /* 153 /*
152 * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an 154 * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
153 * event is pending. 155 * event is pending.
154 * NOTES: 156 * NOTES:
155 * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised 157 * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised
156 * the binding. This binding cannot be changed. 158 * the binding. This binding cannot be changed.
157 * 2. All other channels notify vcpu0 by default. This default is set when 159 * 2. All other channels notify vcpu0 by default. This default is set when
158 * the channel is allocated (a port that is freed and subsequently reused 160 * the channel is allocated (a port that is freed and subsequently reused
159 * has its binding reset to vcpu0). 161 * has its binding reset to vcpu0).
160 */ 162 */
161 #define EVTCHNOP_bind_vcpu 8 163 #define EVTCHNOP_bind_vcpu 8
162 struct evtchn_bind_vcpu { 164 struct evtchn_bind_vcpu {
163 /* IN parameters. */ 165 /* IN parameters. */
164 evtchn_port_t port; 166 evtchn_port_t port;
165 uint32_t vcpu; 167 uint32_t vcpu;
166 }; 168 };
167 169
168 /* 170 /*
169 * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver 171 * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
170 * a notification to the appropriate VCPU if an event is pending. 172 * a notification to the appropriate VCPU if an event is pending.
171 */ 173 */
172 #define EVTCHNOP_unmask 9 174 #define EVTCHNOP_unmask 9
173 struct evtchn_unmask { 175 struct evtchn_unmask {
174 /* IN parameters. */ 176 /* IN parameters. */
175 evtchn_port_t port; 177 evtchn_port_t port;
176 }; 178 };
177 179
178 struct evtchn_op { 180 struct evtchn_op {
179 uint32_t cmd; /* EVTCHNOP_* */ 181 uint32_t cmd; /* EVTCHNOP_* */
180 union { 182 union {
181 struct evtchn_alloc_unbound alloc_unbound; 183 struct evtchn_alloc_unbound alloc_unbound;
182 struct evtchn_bind_interdomain bind_interdomain; 184 struct evtchn_bind_interdomain bind_interdomain;
183 struct evtchn_bind_virq bind_virq; 185 struct evtchn_bind_virq bind_virq;
184 struct evtchn_bind_pirq bind_pirq; 186 struct evtchn_bind_pirq bind_pirq;
185 struct evtchn_bind_ipi bind_ipi; 187 struct evtchn_bind_ipi bind_ipi;
186 struct evtchn_close close; 188 struct evtchn_close close;
187 struct evtchn_send send; 189 struct evtchn_send send;
188 struct evtchn_status status; 190 struct evtchn_status status;
189 struct evtchn_bind_vcpu bind_vcpu; 191 struct evtchn_bind_vcpu bind_vcpu;
190 struct evtchn_unmask unmask; 192 struct evtchn_unmask unmask;
191 } u; 193 } u;
192 }; 194 };
193 DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); 195 DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
194 196
195 #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ 197 #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
196 198