Commit be15f9d63b97da0065187696962331de6cd9de9e

Authored by Linus Torvalds

Merge branch 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (42 commits)
  xen: cache cr0 value to avoid trap'n'emulate for read_cr0
  xen/x86-64: clean up warnings about IST-using traps
  xen/x86-64: fix breakpoints and hardware watchpoints
  xen: reserve Xen start_info rather than e820 reserving
  xen: add FIX_TEXT_POKE to fixmap
  lguest: update lazy mmu changes to match lguest's use of kvm hypercalls
  xen: honour VCPU availability on boot
  xen: add "capabilities" file
  xen: drop kexec bits from /sys/hypervisor since kexec isn't implemented yet
  xen/sys/hypervisor: change writable_pt to features
  xen: add /sys/hypervisor support
  xen/xenbus: export xenbus_dev_changed
  xen: use device model for suspending xenbus devices
  xen: remove suspend_cancel hook
  xen/dev-evtchn: clean up locking in evtchn
  xen: export ioctl headers to userspace
  xen: add /dev/xen/evtchn driver
  xen: add irq_from_evtchn
  xen: clean up gate trap/interrupt constants
  xen: set _PAGE_NX in __supported_pte_mask before pagetable construction
  ...

Showing 37 changed files Side-by-side Diff

arch/x86/include/asm/paravirt.h
... ... @@ -56,6 +56,7 @@
56 56 struct tss_struct;
57 57 struct mm_struct;
58 58 struct desc_struct;
  59 +struct task_struct;
59 60  
60 61 /*
61 62 * Wrapper type for pointers to code which uses the non-standard
... ... @@ -203,7 +204,8 @@
203 204  
204 205 void (*swapgs)(void);
205 206  
206   - struct pv_lazy_ops lazy_mode;
  207 + void (*start_context_switch)(struct task_struct *prev);
  208 + void (*end_context_switch)(struct task_struct *next);
207 209 };
208 210  
209 211 struct pv_irq_ops {
210 212  
211 213  
212 214  
213 215  
214 216  
215 217  
... ... @@ -1399,24 +1401,22 @@
1399 1401 };
1400 1402  
1401 1403 enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
1402   -void paravirt_enter_lazy_cpu(void);
1403   -void paravirt_leave_lazy_cpu(void);
  1404 +void paravirt_start_context_switch(struct task_struct *prev);
  1405 +void paravirt_end_context_switch(struct task_struct *next);
  1406 +
1404 1407 void paravirt_enter_lazy_mmu(void);
1405 1408 void paravirt_leave_lazy_mmu(void);
1406   -void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
1407 1409  
1408   -#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE
1409   -static inline void arch_enter_lazy_cpu_mode(void)
  1410 +#define __HAVE_ARCH_START_CONTEXT_SWITCH
  1411 +static inline void arch_start_context_switch(struct task_struct *prev)
1410 1412 {
1411   - PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
  1413 + PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
1412 1414 }
1413 1415  
1414   -static inline void arch_leave_lazy_cpu_mode(void)
  1416 +static inline void arch_end_context_switch(struct task_struct *next)
1415 1417 {
1416   - PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
  1418 + PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
1417 1419 }
1418   -
1419   -void arch_flush_lazy_cpu_mode(void);
1420 1420  
1421 1421 #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1422 1422 static inline void arch_enter_lazy_mmu_mode(void)
arch/x86/include/asm/pgtable.h
... ... @@ -81,6 +81,8 @@
81 81 #define pte_val(x) native_pte_val(x)
82 82 #define __pte(x) native_make_pte(x)
83 83  
  84 +#define arch_end_context_switch(prev) do {} while(0)
  85 +
84 86 #endif /* CONFIG_PARAVIRT */
85 87  
86 88 /*
arch/x86/include/asm/required-features.h
... ... @@ -48,9 +48,15 @@
48 48 #endif
49 49  
50 50 #ifdef CONFIG_X86_64
  51 +#ifdef CONFIG_PARAVIRT
  52 +/* Paravirtualized systems may not have PSE or PGE available */
51 53 #define NEED_PSE 0
52   -#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
53 54 #define NEED_PGE 0
  55 +#else
  56 +#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
  57 +#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
  58 +#endif
  59 +#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
54 60 #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
55 61 #define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
56 62 #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
arch/x86/include/asm/thread_info.h
... ... @@ -94,7 +94,8 @@
94 94 #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
95 95 #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
96 96 #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
97   -#define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */
  97 +#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
  98 +#define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */
98 99  
99 100 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
100 101 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
... ... @@ -116,6 +117,7 @@
116 117 #define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
117 118 #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
118 119 #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
  120 +#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
119 121 #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)
120 122  
121 123 /* work to do in syscall_trace_enter() */
arch/x86/include/asm/traps.h
... ... @@ -14,6 +14,9 @@
14 14 asmlinkage void debug(void);
15 15 asmlinkage void nmi(void);
16 16 asmlinkage void int3(void);
  17 +asmlinkage void xen_debug(void);
  18 +asmlinkage void xen_int3(void);
  19 +asmlinkage void xen_stack_segment(void);
17 20 asmlinkage void overflow(void);
18 21 asmlinkage void bounds(void);
19 22 asmlinkage void invalid_op(void);
arch/x86/kernel/entry_64.S
... ... @@ -1379,6 +1379,11 @@
1379 1379 paranoidzeroentry_ist debug do_debug DEBUG_STACK
1380 1380 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1381 1381 paranoiderrorentry stack_segment do_stack_segment
  1382 +#ifdef CONFIG_XEN
  1383 +zeroentry xen_debug do_debug
  1384 +zeroentry xen_int3 do_int3
  1385 +errorentry xen_stack_segment do_stack_segment
  1386 +#endif
1382 1387 errorentry general_protection do_general_protection
1383 1388 errorentry page_fault do_page_fault
1384 1389 #ifdef CONFIG_X86_MCE
arch/x86/kernel/kvm.c
... ... @@ -195,7 +195,7 @@
195 195 struct kvm_para_state *state = kvm_para_state();
196 196  
197 197 mmu_queue_flush(state);
198   - paravirt_leave_lazy(paravirt_get_lazy_mode());
  198 + paravirt_leave_lazy_mmu();
199 199 state->mode = paravirt_get_lazy_mode();
200 200 }
201 201  
arch/x86/kernel/paravirt.c
... ... @@ -248,18 +248,16 @@
248 248  
249 249 static inline void enter_lazy(enum paravirt_lazy_mode mode)
250 250 {
251   - BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
252   - BUG_ON(preemptible());
  251 + BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
253 252  
254   - __get_cpu_var(paravirt_lazy_mode) = mode;
  253 + percpu_write(paravirt_lazy_mode, mode);
255 254 }
256 255  
257   -void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
  256 +static void leave_lazy(enum paravirt_lazy_mode mode)
258 257 {
259   - BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
260   - BUG_ON(preemptible());
  258 + BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
261 259  
262   - __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
  260 + percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
263 261 }
264 262  
265 263 void paravirt_enter_lazy_mmu(void)
266 264  
267 265  
268 266  
269 267  
270 268  
... ... @@ -269,22 +267,36 @@
269 267  
270 268 void paravirt_leave_lazy_mmu(void)
271 269 {
272   - paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
  270 + leave_lazy(PARAVIRT_LAZY_MMU);
273 271 }
274 272  
275   -void paravirt_enter_lazy_cpu(void)
  273 +void paravirt_start_context_switch(struct task_struct *prev)
276 274 {
  275 + BUG_ON(preemptible());
  276 +
  277 + if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
  278 + arch_leave_lazy_mmu_mode();
  279 + set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
  280 + }
277 281 enter_lazy(PARAVIRT_LAZY_CPU);
278 282 }
279 283  
280   -void paravirt_leave_lazy_cpu(void)
  284 +void paravirt_end_context_switch(struct task_struct *next)
281 285 {
282   - paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
  286 + BUG_ON(preemptible());
  287 +
  288 + leave_lazy(PARAVIRT_LAZY_CPU);
  289 +
  290 + if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
  291 + arch_enter_lazy_mmu_mode();
283 292 }
284 293  
285 294 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
286 295 {
287   - return __get_cpu_var(paravirt_lazy_mode);
  296 + if (in_interrupt())
  297 + return PARAVIRT_LAZY_NONE;
  298 +
  299 + return percpu_read(paravirt_lazy_mode);
288 300 }
289 301  
290 302 void arch_flush_lazy_mmu_mode(void)
... ... @@ -292,7 +304,6 @@
292 304 preempt_disable();
293 305  
294 306 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
295   - WARN_ON(preempt_count() == 1);
296 307 arch_leave_lazy_mmu_mode();
297 308 arch_enter_lazy_mmu_mode();
298 309 }
... ... @@ -300,19 +311,6 @@
300 311 preempt_enable();
301 312 }
302 313  
303   -void arch_flush_lazy_cpu_mode(void)
304   -{
305   - preempt_disable();
306   -
307   - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
308   - WARN_ON(preempt_count() == 1);
309   - arch_leave_lazy_cpu_mode();
310   - arch_enter_lazy_cpu_mode();
311   - }
312   -
313   - preempt_enable();
314   -}
315   -
316 314 struct pv_info pv_info = {
317 315 .name = "bare hardware",
318 316 .paravirt_enabled = 0,
... ... @@ -404,10 +402,8 @@
404 402 .set_iopl_mask = native_set_iopl_mask,
405 403 .io_delay = native_io_delay,
406 404  
407   - .lazy_mode = {
408   - .enter = paravirt_nop,
409   - .leave = paravirt_nop,
410   - },
  405 + .start_context_switch = paravirt_nop,
  406 + .end_context_switch = paravirt_nop,
411 407 };
412 408  
413 409 struct pv_apic_ops pv_apic_ops = {
arch/x86/kernel/process_32.c
... ... @@ -404,7 +404,7 @@
404 404 * done before math_state_restore, so the TS bit is up
405 405 * to date.
406 406 */
407   - arch_leave_lazy_cpu_mode();
  407 + arch_end_context_switch(next_p);
408 408  
409 409 /* If the task has used fpu the last 5 timeslices, just do a full
410 410 * restore of the math state immediately to avoid the trap; the
arch/x86/kernel/process_64.c
... ... @@ -425,7 +425,7 @@
425 425 * done before math_state_restore, so the TS bit is up
426 426 * to date.
427 427 */
428   - arch_leave_lazy_cpu_mode();
  428 + arch_end_context_switch(next_p);
429 429  
430 430 /*
431 431 * Switch FS and GS.
arch/x86/kernel/vmi_32.c
... ... @@ -462,22 +462,28 @@
462 462 }
463 463 #endif
464 464  
465   -static void vmi_enter_lazy_cpu(void)
  465 +static void vmi_start_context_switch(struct task_struct *prev)
466 466 {
467   - paravirt_enter_lazy_cpu();
  467 + paravirt_start_context_switch(prev);
468 468 vmi_ops.set_lazy_mode(2);
469 469 }
470 470  
  471 +static void vmi_end_context_switch(struct task_struct *next)
  472 +{
  473 + vmi_ops.set_lazy_mode(0);
  474 + paravirt_end_context_switch(next);
  475 +}
  476 +
471 477 static void vmi_enter_lazy_mmu(void)
472 478 {
473 479 paravirt_enter_lazy_mmu();
474 480 vmi_ops.set_lazy_mode(1);
475 481 }
476 482  
477   -static void vmi_leave_lazy(void)
  483 +static void vmi_leave_lazy_mmu(void)
478 484 {
479   - paravirt_leave_lazy(paravirt_get_lazy_mode());
480 485 vmi_ops.set_lazy_mode(0);
  486 + paravirt_leave_lazy_mmu();
481 487 }
482 488  
483 489 static inline int __init check_vmi_rom(struct vrom_header *rom)
484 490  
485 491  
... ... @@ -711,14 +717,14 @@
711 717 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
712 718 para_fill(pv_cpu_ops.io_delay, IODelay);
713 719  
714   - para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
  720 + para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
715 721 set_lazy_mode, SetLazyMode);
716   - para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy,
  722 + para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
717 723 set_lazy_mode, SetLazyMode);
718 724  
719 725 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
720 726 set_lazy_mode, SetLazyMode);
721   - para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy,
  727 + para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
722 728 set_lazy_mode, SetLazyMode);
723 729  
724 730 /* user and kernel flush are just handled with different flags to FlushTLB */
arch/x86/lguest/boot.c
... ... @@ -167,12 +167,18 @@
167 167  
168 168 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
169 169 * issue the do-nothing hypercall to flush any stored calls. */
170   -static void lguest_leave_lazy_mode(void)
  170 +static void lguest_leave_lazy_mmu_mode(void)
171 171 {
172   - paravirt_leave_lazy(paravirt_get_lazy_mode());
173 172 kvm_hypercall0(LHCALL_FLUSH_ASYNC);
  173 + paravirt_leave_lazy_mmu();
174 174 }
175 175  
  176 +static void lguest_end_context_switch(struct task_struct *next)
  177 +{
  178 + kvm_hypercall0(LHCALL_FLUSH_ASYNC);
  179 + paravirt_end_context_switch(next);
  180 +}
  181 +
176 182 /*G:033
177 183 * After that diversion we return to our first native-instruction
178 184 * replacements: four functions for interrupt control.
... ... @@ -1054,8 +1060,8 @@
1054 1060 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
1055 1061 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
1056 1062 pv_cpu_ops.wbinvd = lguest_wbinvd;
1057   - pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
1058   - pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
  1063 + pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
  1064 + pv_cpu_ops.end_context_switch = lguest_end_context_switch;
1059 1065  
1060 1066 /* pagetable management */
1061 1067 pv_mmu_ops.write_cr3 = lguest_write_cr3;
... ... @@ -1068,7 +1074,7 @@
1068 1074 pv_mmu_ops.read_cr2 = lguest_read_cr2;
1069 1075 pv_mmu_ops.read_cr3 = lguest_read_cr3;
1070 1076 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
1071   - pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
  1077 + pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
1072 1078 pv_mmu_ops.pte_update = lguest_pte_update;
1073 1079 pv_mmu_ops.pte_update_defer = lguest_pte_update;
1074 1080  
... ... @@ -201,12 +201,10 @@
201 201 if (!pmd_present(*pmd_k))
202 202 return NULL;
203 203  
204   - if (!pmd_present(*pmd)) {
  204 + if (!pmd_present(*pmd))
205 205 set_pmd(pmd, *pmd_k);
206   - arch_flush_lazy_mmu_mode();
207   - } else {
  206 + else
208 207 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
209   - }
210 208  
211 209 return pmd_k;
212 210 }
arch/x86/mm/highmem_32.c
... ... @@ -44,7 +44,6 @@
44 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
45 45 BUG_ON(!pte_none(*(kmap_pte-idx)));
46 46 set_pte(kmap_pte-idx, mk_pte(page, prot));
47   - arch_flush_lazy_mmu_mode();
48 47  
49 48 return (void *)vaddr;
50 49 }
... ... @@ -74,7 +73,6 @@
74 73 #endif
75 74 }
76 75  
77   - arch_flush_lazy_mmu_mode();
78 76 pagefault_enable();
79 77 }
80 78  
arch/x86/mm/iomap_32.c
... ... @@ -82,7 +82,6 @@
82 82 if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
83 83 kpte_clear_flush(kmap_pte-idx, vaddr);
84 84  
85   - arch_flush_lazy_mmu_mode();
86 85 pagefault_enable();
87 86 }
88 87 EXPORT_SYMBOL_GPL(iounmap_atomic);
arch/x86/mm/pageattr.c
... ... @@ -839,13 +839,6 @@
839 839  
840 840 vm_unmap_aliases();
841 841  
842   - /*
843   - * If we're called with lazy mmu updates enabled, the
844   - * in-memory pte state may be stale. Flush pending updates to
845   - * bring them up to date.
846   - */
847   - arch_flush_lazy_mmu_mode();
848   -
849 842 cpa.vaddr = addr;
850 843 cpa.pages = pages;
851 844 cpa.numpages = numpages;
... ... @@ -889,13 +882,6 @@
889 882 cpa_flush_range(*addr, numpages, cache);
890 883 } else
891 884 cpa_flush_all(cache);
892   -
893   - /*
894   - * If we've been called with lazy mmu updates enabled, then
895   - * make sure that everything gets flushed out before we
896   - * return.
897   - */
898   - arch_flush_lazy_mmu_mode();
899 885  
900 886 out:
901 887 return ret;
arch/x86/xen/enlighten.c
... ... @@ -20,6 +20,7 @@
20 20 #include <linux/delay.h>
21 21 #include <linux/start_kernel.h>
22 22 #include <linux/sched.h>
  23 +#include <linux/kprobes.h>
23 24 #include <linux/bootmem.h>
24 25 #include <linux/module.h>
25 26 #include <linux/mm.h>
... ... @@ -44,6 +45,7 @@
44 45 #include <asm/processor.h>
45 46 #include <asm/proto.h>
46 47 #include <asm/msr-index.h>
  48 +#include <asm/traps.h>
47 49 #include <asm/setup.h>
48 50 #include <asm/desc.h>
49 51 #include <asm/pgtable.h>
50 52  
51 53  
... ... @@ -240,10 +242,10 @@
240 242 return HYPERVISOR_get_debugreg(reg);
241 243 }
242 244  
243   -void xen_leave_lazy(void)
  245 +static void xen_end_context_switch(struct task_struct *next)
244 246 {
245   - paravirt_leave_lazy(paravirt_get_lazy_mode());
246 247 xen_mc_flush();
  248 + paravirt_end_context_switch(next);
247 249 }
248 250  
249 251 static unsigned long xen_store_tr(void)
250 252  
... ... @@ -428,11 +430,44 @@
428 430 static int cvt_gate_to_trap(int vector, const gate_desc *val,
429 431 struct trap_info *info)
430 432 {
  433 + unsigned long addr;
  434 +
431 435 if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
432 436 return 0;
433 437  
434 438 info->vector = vector;
435   - info->address = gate_offset(*val);
  439 +
  440 + addr = gate_offset(*val);
  441 +#ifdef CONFIG_X86_64
  442 + /*
  443 + * Look for known traps using IST, and substitute them
  444 + * appropriately. The debugger ones are the only ones we care
  445 + * about. Xen will handle faults like double_fault and
  446 + * machine_check, so we should never see them. Warn if
  447 + * there's an unexpected IST-using fault handler.
  448 + */
  449 + if (addr == (unsigned long)debug)
  450 + addr = (unsigned long)xen_debug;
  451 + else if (addr == (unsigned long)int3)
  452 + addr = (unsigned long)xen_int3;
  453 + else if (addr == (unsigned long)stack_segment)
  454 + addr = (unsigned long)xen_stack_segment;
  455 + else if (addr == (unsigned long)double_fault ||
  456 + addr == (unsigned long)nmi) {
  457 + /* Don't need to handle these */
  458 + return 0;
  459 +#ifdef CONFIG_X86_MCE
  460 + } else if (addr == (unsigned long)machine_check) {
  461 + return 0;
  462 +#endif
  463 + } else {
  464 + /* Some other trap using IST? */
  465 + if (WARN_ON(val->ist != 0))
  466 + return 0;
  467 + }
  468 +#endif /* CONFIG_X86_64 */
  469 + info->address = addr;
  470 +
436 471 info->cs = gate_segment(*val);
437 472 info->flags = val->dpl;
438 473 /* interrupt gates clear IF */
439 474  
... ... @@ -623,10 +658,26 @@
623 658 xen_mc_issue(PARAVIRT_LAZY_CPU);
624 659 }
625 660  
  661 +static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
  662 +
  663 +static unsigned long xen_read_cr0(void)
  664 +{
  665 + unsigned long cr0 = percpu_read(xen_cr0_value);
  666 +
  667 + if (unlikely(cr0 == 0)) {
  668 + cr0 = native_read_cr0();
  669 + percpu_write(xen_cr0_value, cr0);
  670 + }
  671 +
  672 + return cr0;
  673 +}
  674 +
626 675 static void xen_write_cr0(unsigned long cr0)
627 676 {
628 677 struct multicall_space mcs;
629 678  
  679 + percpu_write(xen_cr0_value, cr0);
  680 +
630 681 /* Only pay attention to cr0.TS; everything else is
631 682 ignored. */
632 683 mcs = xen_mc_entry(0);
... ... @@ -812,7 +863,7 @@
812 863  
813 864 .clts = xen_clts,
814 865  
815   - .read_cr0 = native_read_cr0,
  866 + .read_cr0 = xen_read_cr0,
816 867 .write_cr0 = xen_write_cr0,
817 868  
818 869 .read_cr4 = native_read_cr4,
... ... @@ -860,10 +911,8 @@
860 911 /* Xen takes care of %gs when switching to usermode for us */
861 912 .swapgs = paravirt_nop,
862 913  
863   - .lazy_mode = {
864   - .enter = paravirt_enter_lazy_cpu,
865   - .leave = xen_leave_lazy,
866   - },
  914 + .start_context_switch = paravirt_start_context_switch,
  915 + .end_context_switch = xen_end_context_switch,
867 916 };
868 917  
869 918 static const struct pv_apic_ops xen_apic_ops __initdata = {
... ... @@ -452,10 +452,6 @@
452 452 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
453 453 pte_t *ptep, pte_t pteval)
454 454 {
455   - /* updates to init_mm may be done without lock */
456   - if (mm == &init_mm)
457   - preempt_disable();
458   -
459 455 ADD_STATS(set_pte_at, 1);
460 456 // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
461 457 ADD_STATS(set_pte_at_current, mm == current->mm);
... ... @@ -476,9 +472,7 @@
476 472 }
477 473 xen_set_pte(ptep, pteval);
478 474  
479   -out:
480   - if (mm == &init_mm)
481   - preempt_enable();
  475 +out: return;
482 476 }
483 477  
484 478 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
485 479  
... ... @@ -1152,10 +1146,8 @@
1152 1146  
1153 1147 /* If this cpu still has a stale cr3 reference, then make sure
1154 1148 it has been flushed. */
1155   - if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
  1149 + if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
1156 1150 load_cr3(swapper_pg_dir);
1157   - arch_flush_lazy_cpu_mode();
1158   - }
1159 1151 }
1160 1152  
1161 1153 static void xen_drop_mm_ref(struct mm_struct *mm)
... ... @@ -1168,7 +1160,6 @@
1168 1160 load_cr3(swapper_pg_dir);
1169 1161 else
1170 1162 leave_mm(smp_processor_id());
1171   - arch_flush_lazy_cpu_mode();
1172 1163 }
1173 1164  
1174 1165 /* Get the "official" set of cpus referring to our pagetable. */
... ... @@ -1876,6 +1867,14 @@
1876 1867 xen_mark_init_mm_pinned();
1877 1868 }
1878 1869  
  1870 +static void xen_leave_lazy_mmu(void)
  1871 +{
  1872 + preempt_disable();
  1873 + xen_mc_flush();
  1874 + paravirt_leave_lazy_mmu();
  1875 + preempt_enable();
  1876 +}
  1877 +
1879 1878 const struct pv_mmu_ops xen_mmu_ops __initdata = {
1880 1879 .pagetable_setup_start = xen_pagetable_setup_start,
1881 1880 .pagetable_setup_done = xen_pagetable_setup_done,
... ... @@ -1949,7 +1948,7 @@
1949 1948  
1950 1949 .lazy_mode = {
1951 1950 .enter = paravirt_enter_lazy_mmu,
1952   - .leave = xen_leave_lazy,
  1951 + .leave = xen_leave_lazy_mmu,
1953 1952 },
1954 1953  
1955 1954 .set_fixmap = xen_set_fixmap,
arch/x86/xen/setup.c
... ... @@ -61,9 +61,9 @@
61 61 * - xen_start_info
62 62 * See comment above "struct start_info" in <xen/interface/xen.h>
63 63 */
64   - e820_add_region(__pa(xen_start_info->mfn_list),
65   - xen_start_info->pt_base - xen_start_info->mfn_list,
66   - E820_RESERVED);
  64 + reserve_early(__pa(xen_start_info->mfn_list),
  65 + __pa(xen_start_info->pt_base),
  66 + "XEN START INFO");
67 67  
68 68 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
69 69  
arch/x86/xen/xen-ops.h
... ... @@ -30,7 +30,6 @@
30 30 void xen_ident_map_ISA(void);
31 31 void xen_reserve_top(void);
32 32  
33   -void xen_leave_lazy(void);
34 33 void xen_post_allocator_init(void);
35 34  
36 35 char * __init xen_memory_setup(void);
... ... @@ -18,6 +18,16 @@
18 18 secure, but slightly less efficient.
19 19 If in doubt, say yes.
20 20  
  21 +config XEN_DEV_EVTCHN
  22 + tristate "Xen /dev/xen/evtchn device"
  23 + depends on XEN
  24 + default y
  25 + help
  26 + The evtchn driver allows a userspace process to triger event
  27 + channels and to receive notification of an event channel
  28 + firing.
  29 + If in doubt, say yes.
  30 +
21 31 config XENFS
22 32 tristate "Xen filesystem"
23 33 depends on XEN
... ... @@ -40,4 +50,15 @@
40 50 the compatibility mount point /proc/xen if it is running on
41 51 a xen platform.
42 52 If in doubt, say yes.
  53 +
  54 +config XEN_SYS_HYPERVISOR
  55 + bool "Create xen entries under /sys/hypervisor"
  56 + depends on XEN && SYSFS
  57 + select SYS_HYPERVISOR
  58 + default y
  59 + help
  60 + Create entries under /sys/hypervisor describing the Xen
  61 + hypervisor environment. When running native or in another
  62 + virtual environment, /sys/hypervisor will still be present,
  63 + but will have no xen contents.
drivers/xen/Makefile
... ... @@ -4,5 +4,7 @@
4 4 obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
5 5 obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
6 6 obj-$(CONFIG_XEN_BALLOON) += balloon.o
  7 +obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
7 8 obj-$(CONFIG_XENFS) += xenfs/
  9 +obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
drivers/xen/events.c
... ... @@ -151,6 +151,12 @@
151 151 return info_for_irq(irq)->evtchn;
152 152 }
153 153  
  154 +unsigned irq_from_evtchn(unsigned int evtchn)
  155 +{
  156 + return evtchn_to_irq[evtchn];
  157 +}
  158 +EXPORT_SYMBOL_GPL(irq_from_evtchn);
  159 +
154 160 static enum ipi_vector ipi_from_irq(unsigned irq)
155 161 {
156 162 struct irq_info *info = info_for_irq(irq);
drivers/xen/evtchn.c
  1 +/******************************************************************************
  2 + * evtchn.c
  3 + *
  4 + * Driver for receiving and demuxing event-channel signals.
  5 + *
  6 + * Copyright (c) 2004-2005, K A Fraser
  7 + * Multi-process extensions Copyright (c) 2004, Steven Smith
  8 + *
  9 + * This program is free software; you can redistribute it and/or
  10 + * modify it under the terms of the GNU General Public License version 2
  11 + * as published by the Free Software Foundation; or, when distributed
  12 + * separately from the Linux kernel or incorporated into other
  13 + * software packages, subject to the following license:
  14 + *
  15 + * Permission is hereby granted, free of charge, to any person obtaining a copy
  16 + * of this source file (the "Software"), to deal in the Software without
  17 + * restriction, including without limitation the rights to use, copy, modify,
  18 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  19 + * and to permit persons to whom the Software is furnished to do so, subject to
  20 + * the following conditions:
  21 + *
  22 + * The above copyright notice and this permission notice shall be included in
  23 + * all copies or substantial portions of the Software.
  24 + *
  25 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  26 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  27 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  28 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  29 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  30 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  31 + * IN THE SOFTWARE.
  32 + */
  33 +
  34 +#include <linux/module.h>
  35 +#include <linux/kernel.h>
  36 +#include <linux/sched.h>
  37 +#include <linux/slab.h>
  38 +#include <linux/string.h>
  39 +#include <linux/errno.h>
  40 +#include <linux/fs.h>
  41 +#include <linux/errno.h>
  42 +#include <linux/miscdevice.h>
  43 +#include <linux/major.h>
  44 +#include <linux/proc_fs.h>
  45 +#include <linux/stat.h>
  46 +#include <linux/poll.h>
  47 +#include <linux/irq.h>
  48 +#include <linux/init.h>
  49 +#include <linux/gfp.h>
  50 +#include <linux/mutex.h>
  51 +#include <linux/cpu.h>
  52 +#include <xen/events.h>
  53 +#include <xen/evtchn.h>
  54 +#include <asm/xen/hypervisor.h>
  55 +
  56 +struct per_user_data {
  57 + struct mutex bind_mutex; /* serialize bind/unbind operations */
  58 +
  59 + /* Notification ring, accessed via /dev/xen/evtchn. */
  60 +#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
  61 +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
  62 + evtchn_port_t *ring;
  63 + unsigned int ring_cons, ring_prod, ring_overflow;
  64 + struct mutex ring_cons_mutex; /* protect against concurrent readers */
  65 +
  66 + /* Processes wait on this queue when ring is empty. */
  67 + wait_queue_head_t evtchn_wait;
  68 + struct fasync_struct *evtchn_async_queue;
  69 + const char *name;
  70 +};
  71 +
  72 +/* Who's bound to each port? */
  73 +static struct per_user_data *port_user[NR_EVENT_CHANNELS];
  74 +static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
  75 +
  76 +irqreturn_t evtchn_interrupt(int irq, void *data)
  77 +{
  78 + unsigned int port = (unsigned long)data;
  79 + struct per_user_data *u;
  80 +
  81 + spin_lock(&port_user_lock);
  82 +
  83 + u = port_user[port];
  84 +
  85 + disable_irq_nosync(irq);
  86 +
  87 + if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
  88 + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
  89 + wmb(); /* Ensure ring contents visible */
  90 + if (u->ring_cons == u->ring_prod++) {
  91 + wake_up_interruptible(&u->evtchn_wait);
  92 + kill_fasync(&u->evtchn_async_queue,
  93 + SIGIO, POLL_IN);
  94 + }
  95 + } else {
  96 + u->ring_overflow = 1;
  97 + }
  98 +
  99 + spin_unlock(&port_user_lock);
  100 +
  101 + return IRQ_HANDLED;
  102 +}
  103 +
  104 +static ssize_t evtchn_read(struct file *file, char __user *buf,
  105 + size_t count, loff_t *ppos)
  106 +{
  107 + int rc;
  108 + unsigned int c, p, bytes1 = 0, bytes2 = 0;
  109 + struct per_user_data *u = file->private_data;
  110 +
  111 + /* Whole number of ports. */
  112 + count &= ~(sizeof(evtchn_port_t)-1);
  113 +
  114 + if (count == 0)
  115 + return 0;
  116 +
  117 + if (count > PAGE_SIZE)
  118 + count = PAGE_SIZE;
  119 +
  120 + for (;;) {
  121 + mutex_lock(&u->ring_cons_mutex);
  122 +
  123 + rc = -EFBIG;
  124 + if (u->ring_overflow)
  125 + goto unlock_out;
  126 +
  127 + c = u->ring_cons;
  128 + p = u->ring_prod;
  129 + if (c != p)
  130 + break;
  131 +
  132 + mutex_unlock(&u->ring_cons_mutex);
  133 +
  134 + if (file->f_flags & O_NONBLOCK)
  135 + return -EAGAIN;
  136 +
  137 + rc = wait_event_interruptible(u->evtchn_wait,
  138 + u->ring_cons != u->ring_prod);
  139 + if (rc)
  140 + return rc;
  141 + }
  142 +
  143 + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
  144 + if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
  145 + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
  146 + sizeof(evtchn_port_t);
  147 + bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
  148 + } else {
  149 + bytes1 = (p - c) * sizeof(evtchn_port_t);
  150 + bytes2 = 0;
  151 + }
  152 +
  153 + /* Truncate chunks according to caller's maximum byte count. */
  154 + if (bytes1 > count) {
  155 + bytes1 = count;
  156 + bytes2 = 0;
  157 + } else if ((bytes1 + bytes2) > count) {
  158 + bytes2 = count - bytes1;
  159 + }
  160 +
  161 + rc = -EFAULT;
  162 + rmb(); /* Ensure that we see the port before we copy it. */
  163 + if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
  164 + ((bytes2 != 0) &&
  165 + copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
  166 + goto unlock_out;
  167 +
  168 + u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
  169 + rc = bytes1 + bytes2;
  170 +
  171 + unlock_out:
  172 + mutex_unlock(&u->ring_cons_mutex);
  173 + return rc;
  174 +}
  175 +
  176 +static ssize_t evtchn_write(struct file *file, const char __user *buf,
  177 + size_t count, loff_t *ppos)
  178 +{
  179 + int rc, i;
  180 + evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
  181 + struct per_user_data *u = file->private_data;
  182 +
  183 + if (kbuf == NULL)
  184 + return -ENOMEM;
  185 +
  186 + /* Whole number of ports. */
  187 + count &= ~(sizeof(evtchn_port_t)-1);
  188 +
  189 + rc = 0;
  190 + if (count == 0)
  191 + goto out;
  192 +
  193 + if (count > PAGE_SIZE)
  194 + count = PAGE_SIZE;
  195 +
  196 + rc = -EFAULT;
  197 + if (copy_from_user(kbuf, buf, count) != 0)
  198 + goto out;
  199 +
  200 + spin_lock_irq(&port_user_lock);
  201 + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
  202 + if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
  203 + enable_irq(irq_from_evtchn(kbuf[i]));
  204 + spin_unlock_irq(&port_user_lock);
  205 +
  206 + rc = count;
  207 +
  208 + out:
  209 + free_page((unsigned long)kbuf);
  210 + return rc;
  211 +}
  212 +
  213 +static int evtchn_bind_to_user(struct per_user_data *u, int port)
  214 +{
  215 + int rc = 0;
  216 +
  217 + /*
  218 + * Ports are never reused, so every caller should pass in a
  219 + * unique port.
  220 + *
  221 + * (Locking not necessary because we haven't registered the
  222 + * interrupt handler yet, and our caller has already
  223 + * serialized bind operations.)
  224 + */
  225 + BUG_ON(port_user[port] != NULL);
  226 + port_user[port] = u;
  227 +
  228 + rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
  229 + u->name, (void *)(unsigned long)port);
  230 + if (rc >= 0)
  231 + rc = 0;
  232 +
  233 + return rc;
  234 +}
  235 +
  236 +static void evtchn_unbind_from_user(struct per_user_data *u, int port)
  237 +{
  238 + int irq = irq_from_evtchn(port);
  239 +
  240 + unbind_from_irqhandler(irq, (void *)(unsigned long)port);
  241 +
  242 + /* make sure we unbind the irq handler before clearing the port */
  243 + barrier();
  244 +
  245 + port_user[port] = NULL;
  246 +}
  247 +
  248 +static long evtchn_ioctl(struct file *file,
  249 + unsigned int cmd, unsigned long arg)
  250 +{
  251 + int rc;
  252 + struct per_user_data *u = file->private_data;
  253 + void __user *uarg = (void __user *) arg;
  254 +
  255 + /* Prevent bind from racing with unbind */
  256 + mutex_lock(&u->bind_mutex);
  257 +
  258 + switch (cmd) {
  259 + case IOCTL_EVTCHN_BIND_VIRQ: {
  260 + struct ioctl_evtchn_bind_virq bind;
  261 + struct evtchn_bind_virq bind_virq;
  262 +
  263 + rc = -EFAULT;
  264 + if (copy_from_user(&bind, uarg, sizeof(bind)))
  265 + break;
  266 +
  267 + bind_virq.virq = bind.virq;
  268 + bind_virq.vcpu = 0;
  269 + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
  270 + &bind_virq);
  271 + if (rc != 0)
  272 + break;
  273 +
  274 + rc = evtchn_bind_to_user(u, bind_virq.port);
  275 + if (rc == 0)
  276 + rc = bind_virq.port;
  277 + break;
  278 + }
  279 +
  280 + case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
  281 + struct ioctl_evtchn_bind_interdomain bind;
  282 + struct evtchn_bind_interdomain bind_interdomain;
  283 +
  284 + rc = -EFAULT;
  285 + if (copy_from_user(&bind, uarg, sizeof(bind)))
  286 + break;
  287 +
  288 + bind_interdomain.remote_dom = bind.remote_domain;
  289 + bind_interdomain.remote_port = bind.remote_port;
  290 + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
  291 + &bind_interdomain);
  292 + if (rc != 0)
  293 + break;
  294 +
  295 + rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
  296 + if (rc == 0)
  297 + rc = bind_interdomain.local_port;
  298 + break;
  299 + }
  300 +
  301 + case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
  302 + struct ioctl_evtchn_bind_unbound_port bind;
  303 + struct evtchn_alloc_unbound alloc_unbound;
  304 +
  305 + rc = -EFAULT;
  306 + if (copy_from_user(&bind, uarg, sizeof(bind)))
  307 + break;
  308 +
  309 + alloc_unbound.dom = DOMID_SELF;
  310 + alloc_unbound.remote_dom = bind.remote_domain;
  311 + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
  312 + &alloc_unbound);
  313 + if (rc != 0)
  314 + break;
  315 +
  316 + rc = evtchn_bind_to_user(u, alloc_unbound.port);
  317 + if (rc == 0)
  318 + rc = alloc_unbound.port;
  319 + break;
  320 + }
  321 +
  322 + case IOCTL_EVTCHN_UNBIND: {
  323 + struct ioctl_evtchn_unbind unbind;
  324 +
  325 + rc = -EFAULT;
  326 + if (copy_from_user(&unbind, uarg, sizeof(unbind)))
  327 + break;
  328 +
  329 + rc = -EINVAL;
  330 + if (unbind.port >= NR_EVENT_CHANNELS)
  331 + break;
  332 +
  333 + spin_lock_irq(&port_user_lock);
  334 +
  335 + rc = -ENOTCONN;
  336 + if (port_user[unbind.port] != u) {
  337 + spin_unlock_irq(&port_user_lock);
  338 + break;
  339 + }
  340 +
  341 + evtchn_unbind_from_user(u, unbind.port);
  342 +
  343 + spin_unlock_irq(&port_user_lock);
  344 +
  345 + rc = 0;
  346 + break;
  347 + }
  348 +
  349 + case IOCTL_EVTCHN_NOTIFY: {
  350 + struct ioctl_evtchn_notify notify;
  351 +
  352 + rc = -EFAULT;
  353 + if (copy_from_user(&notify, uarg, sizeof(notify)))
  354 + break;
  355 +
  356 + if (notify.port >= NR_EVENT_CHANNELS) {
  357 + rc = -EINVAL;
  358 + } else if (port_user[notify.port] != u) {
  359 + rc = -ENOTCONN;
  360 + } else {
  361 + notify_remote_via_evtchn(notify.port);
  362 + rc = 0;
  363 + }
  364 + break;
  365 + }
  366 +
  367 + case IOCTL_EVTCHN_RESET: {
  368 + /* Initialise the ring to empty. Clear errors. */
  369 + mutex_lock(&u->ring_cons_mutex);
  370 + spin_lock_irq(&port_user_lock);
  371 + u->ring_cons = u->ring_prod = u->ring_overflow = 0;
  372 + spin_unlock_irq(&port_user_lock);
  373 + mutex_unlock(&u->ring_cons_mutex);
  374 + rc = 0;
  375 + break;
  376 + }
  377 +
  378 + default:
  379 + rc = -ENOSYS;
  380 + break;
  381 + }
  382 + mutex_unlock(&u->bind_mutex);
  383 +
  384 + return rc;
  385 +}
  386 +
  387 +static unsigned int evtchn_poll(struct file *file, poll_table *wait)
  388 +{
  389 + unsigned int mask = POLLOUT | POLLWRNORM;
  390 + struct per_user_data *u = file->private_data;
  391 +
  392 + poll_wait(file, &u->evtchn_wait, wait);
  393 + if (u->ring_cons != u->ring_prod)
  394 + mask |= POLLIN | POLLRDNORM;
  395 + if (u->ring_overflow)
  396 + mask = POLLERR;
  397 + return mask;
  398 +}
  399 +
  400 +static int evtchn_fasync(int fd, struct file *filp, int on)
  401 +{
  402 + struct per_user_data *u = filp->private_data;
  403 + return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
  404 +}
  405 +
  406 +static int evtchn_open(struct inode *inode, struct file *filp)
  407 +{
  408 + struct per_user_data *u;
  409 +
  410 + u = kzalloc(sizeof(*u), GFP_KERNEL);
  411 + if (u == NULL)
  412 + return -ENOMEM;
  413 +
  414 + u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
  415 + if (u->name == NULL) {
  416 + kfree(u);
  417 + return -ENOMEM;
  418 + }
  419 +
  420 + init_waitqueue_head(&u->evtchn_wait);
  421 +
  422 + u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
  423 + if (u->ring == NULL) {
  424 + kfree(u->name);
  425 + kfree(u);
  426 + return -ENOMEM;
  427 + }
  428 +
  429 + mutex_init(&u->bind_mutex);
  430 + mutex_init(&u->ring_cons_mutex);
  431 +
  432 + filp->private_data = u;
  433 +
  434 + return 0;
  435 +}
  436 +
  437 +static int evtchn_release(struct inode *inode, struct file *filp)
  438 +{
  439 + int i;
  440 + struct per_user_data *u = filp->private_data;
  441 +
  442 + spin_lock_irq(&port_user_lock);
  443 +
  444 + free_page((unsigned long)u->ring);
  445 +
  446 + for (i = 0; i < NR_EVENT_CHANNELS; i++) {
  447 + if (port_user[i] != u)
  448 + continue;
  449 +
  450 + evtchn_unbind_from_user(port_user[i], i);
  451 + }
  452 +
  453 + spin_unlock_irq(&port_user_lock);
  454 +
  455 + kfree(u->name);
  456 + kfree(u);
  457 +
  458 + return 0;
  459 +}
  460 +
  461 +static const struct file_operations evtchn_fops = {
  462 + .owner = THIS_MODULE,
  463 + .read = evtchn_read,
  464 + .write = evtchn_write,
  465 + .unlocked_ioctl = evtchn_ioctl,
  466 + .poll = evtchn_poll,
  467 + .fasync = evtchn_fasync,
  468 + .open = evtchn_open,
  469 + .release = evtchn_release,
  470 +};
  471 +
  472 +static struct miscdevice evtchn_miscdev = {
  473 + .minor = MISC_DYNAMIC_MINOR,
  474 + .name = "evtchn",
  475 + .fops = &evtchn_fops,
  476 +};
  477 +static int __init evtchn_init(void)
  478 +{
  479 + int err;
  480 +
  481 + if (!xen_domain())
  482 + return -ENODEV;
  483 +
  484 + spin_lock_init(&port_user_lock);
  485 + memset(port_user, 0, sizeof(port_user));
  486 +
  487 + /* Create '/dev/misc/evtchn'. */
  488 + err = misc_register(&evtchn_miscdev);
  489 + if (err != 0) {
  490 + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
  491 + return err;
  492 + }
  493 +
  494 + printk(KERN_INFO "Event-channel device installed.\n");
  495 +
  496 + return 0;
  497 +}
  498 +
  499 +static void __exit evtchn_cleanup(void)
  500 +{
  501 + misc_deregister(&evtchn_miscdev);
  502 +}
  503 +
  504 +module_init(evtchn_init);
  505 +module_exit(evtchn_cleanup);
  506 +
  507 +MODULE_LICENSE("GPL");
drivers/xen/manage.c
... ... @@ -98,9 +98,8 @@
98 98 goto out;
99 99 }
100 100  
101   - printk("suspending xenbus...\n");
102   - /* XXX use normal device tree? */
103   - xenbus_suspend();
  101 + printk(KERN_DEBUG "suspending xenstore...\n");
  102 + xs_suspend();
104 103  
105 104 err = device_power_down(PMSG_SUSPEND);
106 105 if (err) {
107 106  
... ... @@ -116,9 +115,9 @@
116 115  
117 116 if (!cancelled) {
118 117 xen_arch_resume();
119   - xenbus_resume();
  118 + xs_resume();
120 119 } else
121   - xenbus_suspend_cancel();
  120 + xs_suspend_cancel();
122 121  
123 122 device_power_up(PMSG_RESUME);
124 123  
drivers/xen/sys-hypervisor.c
  1 +/*
  2 + * copyright (c) 2006 IBM Corporation
  3 + * Authored by: Mike D. Day <ncmike@us.ibm.com>
  4 + *
  5 + * This program is free software; you can redistribute it and/or modify
  6 + * it under the terms of the GNU General Public License version 2 as
  7 + * published by the Free Software Foundation.
  8 + */
  9 +
  10 +#include <linux/kernel.h>
  11 +#include <linux/module.h>
  12 +#include <linux/kobject.h>
  13 +
  14 +#include <asm/xen/hypervisor.h>
  15 +#include <asm/xen/hypercall.h>
  16 +
  17 +#include <xen/xenbus.h>
  18 +#include <xen/interface/xen.h>
  19 +#include <xen/interface/version.h>
  20 +
  21 +#define HYPERVISOR_ATTR_RO(_name) \
  22 +static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
  23 +
  24 +#define HYPERVISOR_ATTR_RW(_name) \
  25 +static struct hyp_sysfs_attr _name##_attr = \
  26 + __ATTR(_name, 0644, _name##_show, _name##_store)
  27 +
  28 +struct hyp_sysfs_attr {
  29 + struct attribute attr;
  30 + ssize_t (*show)(struct hyp_sysfs_attr *, char *);
  31 + ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
  32 + void *hyp_attr_data;
  33 +};
  34 +
  35 +static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
  36 +{
  37 + return sprintf(buffer, "xen\n");
  38 +}
  39 +
  40 +HYPERVISOR_ATTR_RO(type);
  41 +
  42 +static int __init xen_sysfs_type_init(void)
  43 +{
  44 + return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
  45 +}
  46 +
  47 +static void xen_sysfs_type_destroy(void)
  48 +{
  49 + sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
  50 +}
  51 +
  52 +/* xen version attributes */
  53 +static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
  54 +{
  55 + int version = HYPERVISOR_xen_version(XENVER_version, NULL);
  56 + if (version)
  57 + return sprintf(buffer, "%d\n", version >> 16);
  58 + return -ENODEV;
  59 +}
  60 +
  61 +HYPERVISOR_ATTR_RO(major);
  62 +
  63 +static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
  64 +{
  65 + int version = HYPERVISOR_xen_version(XENVER_version, NULL);
  66 + if (version)
  67 + return sprintf(buffer, "%d\n", version & 0xff);
  68 + return -ENODEV;
  69 +}
  70 +
  71 +HYPERVISOR_ATTR_RO(minor);
  72 +
  73 +static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
  74 +{
  75 + int ret = -ENOMEM;
  76 + char *extra;
  77 +
  78 + extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
  79 + if (extra) {
  80 + ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
  81 + if (!ret)
  82 + ret = sprintf(buffer, "%s\n", extra);
  83 + kfree(extra);
  84 + }
  85 +
  86 + return ret;
  87 +}
  88 +
  89 +HYPERVISOR_ATTR_RO(extra);
  90 +
  91 +static struct attribute *version_attrs[] = {
  92 + &major_attr.attr,
  93 + &minor_attr.attr,
  94 + &extra_attr.attr,
  95 + NULL
  96 +};
  97 +
  98 +static struct attribute_group version_group = {
  99 + .name = "version",
  100 + .attrs = version_attrs,
  101 +};
  102 +
  103 +static int __init xen_sysfs_version_init(void)
  104 +{
  105 + return sysfs_create_group(hypervisor_kobj, &version_group);
  106 +}
  107 +
  108 +static void xen_sysfs_version_destroy(void)
  109 +{
  110 + sysfs_remove_group(hypervisor_kobj, &version_group);
  111 +}
  112 +
  113 +/* UUID */
  114 +
  115 +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
  116 +{
  117 + char *vm, *val;
  118 + int ret;
  119 + extern int xenstored_ready;
  120 +
  121 + if (!xenstored_ready)
  122 + return -EBUSY;
  123 +
  124 + vm = xenbus_read(XBT_NIL, "vm", "", NULL);
  125 + if (IS_ERR(vm))
  126 + return PTR_ERR(vm);
  127 + val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
  128 + kfree(vm);
  129 + if (IS_ERR(val))
  130 + return PTR_ERR(val);
  131 + ret = sprintf(buffer, "%s\n", val);
  132 + kfree(val);
  133 + return ret;
  134 +}
  135 +
  136 +HYPERVISOR_ATTR_RO(uuid);
  137 +
  138 +static int __init xen_sysfs_uuid_init(void)
  139 +{
  140 + return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
  141 +}
  142 +
  143 +static void xen_sysfs_uuid_destroy(void)
  144 +{
  145 + sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
  146 +}
  147 +
  148 +/* xen compilation attributes */
  149 +
  150 +static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
  151 +{
  152 + int ret = -ENOMEM;
  153 + struct xen_compile_info *info;
  154 +
  155 + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
  156 + if (info) {
  157 + ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
  158 + if (!ret)
  159 + ret = sprintf(buffer, "%s\n", info->compiler);
  160 + kfree(info);
  161 + }
  162 +
  163 + return ret;
  164 +}
  165 +
  166 +HYPERVISOR_ATTR_RO(compiler);
  167 +
  168 +static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
  169 +{
  170 + int ret = -ENOMEM;
  171 + struct xen_compile_info *info;
  172 +
  173 + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
  174 + if (info) {
  175 + ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
  176 + if (!ret)
  177 + ret = sprintf(buffer, "%s\n", info->compile_by);
  178 + kfree(info);
  179 + }
  180 +
  181 + return ret;
  182 +}
  183 +
  184 +HYPERVISOR_ATTR_RO(compiled_by);
  185 +
  186 +static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
  187 +{
  188 + int ret = -ENOMEM;
  189 + struct xen_compile_info *info;
  190 +
  191 + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
  192 + if (info) {
  193 + ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
  194 + if (!ret)
  195 + ret = sprintf(buffer, "%s\n", info->compile_date);
  196 + kfree(info);
  197 + }
  198 +
  199 + return ret;
  200 +}
  201 +
  202 +HYPERVISOR_ATTR_RO(compile_date);
  203 +
  204 +static struct attribute *xen_compile_attrs[] = {
  205 + &compiler_attr.attr,
  206 + &compiled_by_attr.attr,
  207 + &compile_date_attr.attr,
  208 + NULL
  209 +};
  210 +
  211 +static struct attribute_group xen_compilation_group = {
  212 + .name = "compilation",
  213 + .attrs = xen_compile_attrs,
  214 +};
  215 +
  216 +int __init static xen_compilation_init(void)
  217 +{
  218 + return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
  219 +}
  220 +
  221 +static void xen_compilation_destroy(void)
  222 +{
  223 + sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
  224 +}
  225 +
  226 +/* xen properties info */
  227 +
  228 +static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
  229 +{
  230 + int ret = -ENOMEM;
  231 + char *caps;
  232 +
  233 + caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
  234 + if (caps) {
  235 + ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
  236 + if (!ret)
  237 + ret = sprintf(buffer, "%s\n", caps);
  238 + kfree(caps);
  239 + }
  240 +
  241 + return ret;
  242 +}
  243 +
  244 +HYPERVISOR_ATTR_RO(capabilities);
  245 +
  246 +static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
  247 +{
  248 + int ret = -ENOMEM;
  249 + char *cset;
  250 +
  251 + cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
  252 + if (cset) {
  253 + ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
  254 + if (!ret)
  255 + ret = sprintf(buffer, "%s\n", cset);
  256 + kfree(cset);
  257 + }
  258 +
  259 + return ret;
  260 +}
  261 +
  262 +HYPERVISOR_ATTR_RO(changeset);
  263 +
  264 +static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
  265 +{
  266 + int ret = -ENOMEM;
  267 + struct xen_platform_parameters *parms;
  268 +
  269 + parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
  270 + if (parms) {
  271 + ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
  272 + parms);
  273 + if (!ret)
  274 + ret = sprintf(buffer, "%lx\n", parms->virt_start);
  275 + kfree(parms);
  276 + }
  277 +
  278 + return ret;
  279 +}
  280 +
  281 +HYPERVISOR_ATTR_RO(virtual_start);
  282 +
  283 +static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
  284 +{
  285 + int ret;
  286 +
  287 + ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
  288 + if (ret > 0)
  289 + ret = sprintf(buffer, "%x\n", ret);
  290 +
  291 + return ret;
  292 +}
  293 +
  294 +HYPERVISOR_ATTR_RO(pagesize);
  295 +
  296 +static ssize_t xen_feature_show(int index, char *buffer)
  297 +{
  298 + ssize_t ret;
  299 + struct xen_feature_info info;
  300 +
  301 + info.submap_idx = index;
  302 + ret = HYPERVISOR_xen_version(XENVER_get_features, &info);
  303 + if (!ret)
  304 + ret = sprintf(buffer, "%08x", info.submap);
  305 +
  306 + return ret;
  307 +}
  308 +
  309 +static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer)
  310 +{
  311 + ssize_t len;
  312 + int i;
  313 +
  314 + len = 0;
  315 + for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) {
  316 + int ret = xen_feature_show(i, buffer + len);
  317 + if (ret < 0) {
  318 + if (len == 0)
  319 + len = ret;
  320 + break;
  321 + }
  322 + len += ret;
  323 + }
  324 + if (len > 0)
  325 + buffer[len++] = '\n';
  326 +
  327 + return len;
  328 +}
  329 +
  330 +HYPERVISOR_ATTR_RO(features);
  331 +
  332 +static struct attribute *xen_properties_attrs[] = {
  333 + &capabilities_attr.attr,
  334 + &changeset_attr.attr,
  335 + &virtual_start_attr.attr,
  336 + &pagesize_attr.attr,
  337 + &features_attr.attr,
  338 + NULL
  339 +};
  340 +
  341 +static struct attribute_group xen_properties_group = {
  342 + .name = "properties",
  343 + .attrs = xen_properties_attrs,
  344 +};
  345 +
  346 +static int __init xen_properties_init(void)
  347 +{
  348 + return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
  349 +}
  350 +
  351 +static void xen_properties_destroy(void)
  352 +{
  353 + sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
  354 +}
  355 +
  356 +static int __init hyper_sysfs_init(void)
  357 +{
  358 + int ret;
  359 +
  360 + if (!xen_domain())
  361 + return -ENODEV;
  362 +
  363 + ret = xen_sysfs_type_init();
  364 + if (ret)
  365 + goto out;
  366 + ret = xen_sysfs_version_init();
  367 + if (ret)
  368 + goto version_out;
  369 + ret = xen_compilation_init();
  370 + if (ret)
  371 + goto comp_out;
  372 + ret = xen_sysfs_uuid_init();
  373 + if (ret)
  374 + goto uuid_out;
  375 + ret = xen_properties_init();
  376 + if (ret)
  377 + goto prop_out;
  378 +
  379 + goto out;
  380 +
  381 +prop_out:
  382 + xen_sysfs_uuid_destroy();
  383 +uuid_out:
  384 + xen_compilation_destroy();
  385 +comp_out:
  386 + xen_sysfs_version_destroy();
  387 +version_out:
  388 + xen_sysfs_type_destroy();
  389 +out:
  390 + return ret;
  391 +}
  392 +
  393 +static void __exit hyper_sysfs_exit(void)
  394 +{
  395 + xen_properties_destroy();
  396 + xen_compilation_destroy();
  397 + xen_sysfs_uuid_destroy();
  398 + xen_sysfs_version_destroy();
  399 + xen_sysfs_type_destroy();
  400 +
  401 +}
  402 +module_init(hyper_sysfs_init);
  403 +module_exit(hyper_sysfs_exit);
  404 +
  405 +static ssize_t hyp_sysfs_show(struct kobject *kobj,
  406 + struct attribute *attr,
  407 + char *buffer)
  408 +{
  409 + struct hyp_sysfs_attr *hyp_attr;
  410 + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
  411 + if (hyp_attr->show)
  412 + return hyp_attr->show(hyp_attr, buffer);
  413 + return 0;
  414 +}
  415 +
  416 +static ssize_t hyp_sysfs_store(struct kobject *kobj,
  417 + struct attribute *attr,
  418 + const char *buffer,
  419 + size_t len)
  420 +{
  421 + struct hyp_sysfs_attr *hyp_attr;
  422 + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
  423 + if (hyp_attr->store)
  424 + return hyp_attr->store(hyp_attr, buffer, len);
  425 + return 0;
  426 +}
  427 +
  428 +static struct sysfs_ops hyp_sysfs_ops = {
  429 + .show = hyp_sysfs_show,
  430 + .store = hyp_sysfs_store,
  431 +};
  432 +
  433 +static struct kobj_type hyp_sysfs_kobj_type = {
  434 + .sysfs_ops = &hyp_sysfs_ops,
  435 +};
  436 +
  437 +static int __init hypervisor_subsys_init(void)
  438 +{
  439 + if (!xen_domain())
  440 + return -ENODEV;
  441 +
  442 + hypervisor_kobj->ktype = &hyp_sysfs_kobj_type;
  443 + return 0;
  444 +}
  445 +device_initcall(hypervisor_subsys_init);
drivers/xen/xenbus/xenbus_probe.c
... ... @@ -71,6 +71,9 @@
71 71  
72 72 static void xenbus_dev_shutdown(struct device *_dev);
73 73  
  74 +static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
  75 +static int xenbus_dev_resume(struct device *dev);
  76 +
74 77 /* If something in array of ids matches this device, return it. */
75 78 static const struct xenbus_device_id *
76 79 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
... ... @@ -188,6 +191,9 @@
188 191 .remove = xenbus_dev_remove,
189 192 .shutdown = xenbus_dev_shutdown,
190 193 .dev_attrs = xenbus_dev_attrs,
  194 +
  195 + .suspend = xenbus_dev_suspend,
  196 + .resume = xenbus_dev_resume,
191 197 },
192 198 };
193 199  
... ... @@ -654,6 +660,7 @@
654 660  
655 661 kfree(root);
656 662 }
  663 +EXPORT_SYMBOL_GPL(xenbus_dev_changed);
657 664  
658 665 static void frontend_changed(struct xenbus_watch *watch,
659 666 const char **vec, unsigned int len)
... ... @@ -669,7 +676,7 @@
669 676 .callback = frontend_changed,
670 677 };
671 678  
672   -static int suspend_dev(struct device *dev, void *data)
  679 +static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
673 680 {
674 681 int err = 0;
675 682 struct xenbus_driver *drv;
676 683  
677 684  
... ... @@ -682,36 +689,15 @@
682 689 drv = to_xenbus_driver(dev->driver);
683 690 xdev = container_of(dev, struct xenbus_device, dev);
684 691 if (drv->suspend)
685   - err = drv->suspend(xdev);
  692 + err = drv->suspend(xdev, state);
686 693 if (err)
687 694 printk(KERN_WARNING
688 695 "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
689 696 return 0;
690 697 }
691 698  
692   -static int suspend_cancel_dev(struct device *dev, void *data)
  699 +static int xenbus_dev_resume(struct device *dev)
693 700 {
694   - int err = 0;
695   - struct xenbus_driver *drv;
696   - struct xenbus_device *xdev;
697   -
698   - DPRINTK("");
699   -
700   - if (dev->driver == NULL)
701   - return 0;
702   - drv = to_xenbus_driver(dev->driver);
703   - xdev = container_of(dev, struct xenbus_device, dev);
704   - if (drv->suspend_cancel)
705   - err = drv->suspend_cancel(xdev);
706   - if (err)
707   - printk(KERN_WARNING
708   - "xenbus: suspend_cancel %s failed: %i\n",
709   - dev_name(dev), err);
710   - return 0;
711   -}
712   -
713   -static int resume_dev(struct device *dev, void *data)
714   -{
715 701 int err;
716 702 struct xenbus_driver *drv;
717 703 struct xenbus_device *xdev;
... ... @@ -754,33 +740,6 @@
754 740  
755 741 return 0;
756 742 }
757   -
758   -void xenbus_suspend(void)
759   -{
760   - DPRINTK("");
761   -
762   - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
763   - xenbus_backend_suspend(suspend_dev);
764   - xs_suspend();
765   -}
766   -EXPORT_SYMBOL_GPL(xenbus_suspend);
767   -
768   -void xenbus_resume(void)
769   -{
770   - xb_init_comms();
771   - xs_resume();
772   - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
773   - xenbus_backend_resume(resume_dev);
774   -}
775   -EXPORT_SYMBOL_GPL(xenbus_resume);
776   -
777   -void xenbus_suspend_cancel(void)
778   -{
779   - xs_suspend_cancel();
780   - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
781   - xenbus_backend_resume(suspend_cancel_dev);
782   -}
783   -EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
784 743  
785 744 /* A flag to determine if xenstored is 'ready' (i.e. has started) */
786 745 int xenstored_ready = 0;
drivers/xen/xenbus/xenbus_xs.c
... ... @@ -673,6 +673,8 @@
673 673 struct xenbus_watch *watch;
674 674 char token[sizeof(watch) * 2 + 1];
675 675  
  676 + xb_init_comms();
  677 +
676 678 mutex_unlock(&xs_state.response_mutex);
677 679 mutex_unlock(&xs_state.request_mutex);
678 680 up_write(&xs_state.transaction_mutex);
drivers/xen/xenfs/super.c
... ... @@ -20,10 +20,27 @@
20 20 MODULE_DESCRIPTION("Xen filesystem");
21 21 MODULE_LICENSE("GPL");
22 22  
  23 +static ssize_t capabilities_read(struct file *file, char __user *buf,
  24 + size_t size, loff_t *off)
  25 +{
  26 + char *tmp = "";
  27 +
  28 + if (xen_initial_domain())
  29 + tmp = "control_d\n";
  30 +
  31 + return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp));
  32 +}
  33 +
  34 +static const struct file_operations capabilities_file_ops = {
  35 + .read = capabilities_read,
  36 +};
  37 +
23 38 static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
24 39 {
25 40 static struct tree_descr xenfs_files[] = {
26   - [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR},
  41 + [1] = {},
  42 + { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
  43 + { "capabilities", &capabilities_file_ops, S_IRUGO },
27 44 {""},
28 45 };
29 46  
... ... @@ -8,4 +8,5 @@
8 8 header-y += rdma/
9 9 header-y += video/
10 10 header-y += drm/
  11 +header-y += xen/
include/asm-generic/pgtable.h
... ... @@ -280,17 +280,18 @@
280 280 #endif
281 281  
282 282 /*
283   - * A facility to provide batching of the reload of page tables with the
284   - * actual context switch code for paravirtualized guests. By convention,
285   - * only one of the lazy modes (CPU, MMU) should be active at any given
286   - * time, entry should never be nested, and entry and exits should always
287   - * be paired. This is for sanity of maintaining and reasoning about the
288   - * kernel code.
  283 + * A facility to provide batching of the reload of page tables and
  284 + * other process state with the actual context switch code for
  285 + * paravirtualized guests. By convention, only one of the batched
  286 + * update (lazy) modes (CPU, MMU) should be active at any given time,
  287 + * entry should never be nested, and entry and exits should always be
  288 + * paired. This is for sanity of maintaining and reasoning about the
  289 + * kernel code. In this case, the exit (end of the context switch) is
  290 + * in architecture-specific code, and so doesn't need a generic
  291 + * definition.
289 292 */
290   -#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
291   -#define arch_enter_lazy_cpu_mode() do {} while (0)
292   -#define arch_leave_lazy_cpu_mode() do {} while (0)
293   -#define arch_flush_lazy_cpu_mode() do {} while (0)
  293 +#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
  294 +#define arch_start_context_switch(prev) do {} while (0)
294 295 #endif
295 296  
296 297 #ifndef __HAVE_PFNMAP_TRACKING
  1 +header-y += evtchn.h
include/xen/events.h
... ... @@ -53,5 +53,8 @@
53 53 irq will be disabled so it won't deliver an interrupt. */
54 54 void xen_poll_irq(int irq);
55 55  
  56 +/* Determine the IRQ which is bound to an event channel */
  57 +unsigned irq_from_evtchn(unsigned int evtchn);
  58 +
56 59 #endif /* _XEN_EVENTS_H */
include/xen/evtchn.h
  1 +/******************************************************************************
  2 + * evtchn.h
  3 + *
  4 + * Interface to /dev/xen/evtchn.
  5 + *
  6 + * Copyright (c) 2003-2005, K A Fraser
  7 + *
  8 + * This program is free software; you can redistribute it and/or
  9 + * modify it under the terms of the GNU General Public License version 2
  10 + * as published by the Free Software Foundation; or, when distributed
  11 + * separately from the Linux kernel or incorporated into other
  12 + * software packages, subject to the following license:
  13 + *
  14 + * Permission is hereby granted, free of charge, to any person obtaining a copy
  15 + * of this source file (the "Software"), to deal in the Software without
  16 + * restriction, including without limitation the rights to use, copy, modify,
  17 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  18 + * and to permit persons to whom the Software is furnished to do so, subject to
  19 + * the following conditions:
  20 + *
  21 + * The above copyright notice and this permission notice shall be included in
  22 + * all copies or substantial portions of the Software.
  23 + *
  24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  25 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  26 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  27 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  28 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  29 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  30 + * IN THE SOFTWARE.
  31 + */
  32 +
  33 +#ifndef __LINUX_PUBLIC_EVTCHN_H__
  34 +#define __LINUX_PUBLIC_EVTCHN_H__
  35 +
  36 +/*
  37 + * Bind a fresh port to VIRQ @virq.
  38 + * Return allocated port.
  39 + */
  40 +#define IOCTL_EVTCHN_BIND_VIRQ \
  41 + _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq))
  42 +struct ioctl_evtchn_bind_virq {
  43 + unsigned int virq;
  44 +};
  45 +
  46 +/*
  47 + * Bind a fresh port to remote <@remote_domain, @remote_port>.
  48 + * Return allocated port.
  49 + */
  50 +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \
  51 + _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain))
  52 +struct ioctl_evtchn_bind_interdomain {
  53 + unsigned int remote_domain, remote_port;
  54 +};
  55 +
  56 +/*
  57 + * Allocate a fresh port for binding to @remote_domain.
  58 + * Return allocated port.
  59 + */
  60 +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \
  61 + _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port))
  62 +struct ioctl_evtchn_bind_unbound_port {
  63 + unsigned int remote_domain;
  64 +};
  65 +
  66 +/*
  67 + * Unbind previously allocated @port.
  68 + */
  69 +#define IOCTL_EVTCHN_UNBIND \
  70 + _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind))
  71 +struct ioctl_evtchn_unbind {
  72 + unsigned int port;
  73 +};
  74 +
  75 +/*
  76 + * Unbind previously allocated @port.
  77 + */
  78 +#define IOCTL_EVTCHN_NOTIFY \
  79 + _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify))
  80 +struct ioctl_evtchn_notify {
  81 + unsigned int port;
  82 +};
  83 +
  84 +/* Clear and reinitialise the event buffer. Clear error condition. */
  85 +#define IOCTL_EVTCHN_RESET \
  86 + _IOC(_IOC_NONE, 'E', 5, 0)
  87 +
  88 +#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
include/xen/interface/version.h
... ... @@ -57,5 +57,8 @@
57 57 /* Declares the features reported by XENVER_get_features. */
58 58 #include "features.h"
59 59  
  60 +/* arg == NULL; returns host memory page size. */
  61 +#define XENVER_pagesize 7
  62 +
60 63 #endif /* __XEN_PUBLIC_VERSION_H__ */
include/xen/xenbus.h
... ... @@ -91,8 +91,7 @@
91 91 void (*otherend_changed)(struct xenbus_device *dev,
92 92 enum xenbus_state backend_state);
93 93 int (*remove)(struct xenbus_device *dev);
94   - int (*suspend)(struct xenbus_device *dev);
95   - int (*suspend_cancel)(struct xenbus_device *dev);
  94 + int (*suspend)(struct xenbus_device *dev, pm_message_t state);
96 95 int (*resume)(struct xenbus_device *dev);
97 96 int (*uevent)(struct xenbus_device *, char **, int, char *, int);
98 97 struct device_driver driver;
... ... @@ -2783,7 +2783,7 @@
2783 2783 * combine the page table reload and the switch backend into
2784 2784 * one hypercall.
2785 2785 */
2786   - arch_enter_lazy_cpu_mode();
  2786 + arch_start_context_switch(prev);
2787 2787  
2788 2788 if (unlikely(!mm)) {
2789 2789 next->active_mm = oldmm;