From 0998baa8639e298a9272264260faadebe8fb8968 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 15 Nov 2018 13:03:41 +0800 Subject: [PATCH] MLK-20373-1 Intrdouce xen header files Introduce xen header files from Linux Kernel commit e2b623fbe6a3("Merge tag 's390-4.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux") Signed-off-by: Peng Fan Reviewed-by: Peng Fan Reviewed-by: Flynn xu (cherry picked from commit ddb393c45158f6114bd53c83dcd8397a6c2acbdc) (cherry picked from commit 8591f37af6345695a98216c136e9209a880901f1) (cherry picked from commit b93308506902dd30b84640f59fa75ada21d8775a) --- arch/arm/include/asm/xen/events.h | 0 arch/arm/include/asm/xen/hypercall.h | 66 +++ arch/arm/include/asm/xen/interface.h | 86 ++++ include/xen/event_channel.h | 385 +++++++++++++++++ include/xen/events.h | 15 + include/xen/hvm.h | 61 +++ include/xen/interface/event_channel.h | 279 ++++++++++++ include/xen/interface/grant_table.h | 568 ++++++++++++++++++++++++ include/xen/interface/hvm/hvm_op.h | 65 +++ include/xen/interface/hvm/params.h | 127 ++++++ include/xen/interface/io/console.h | 24 ++ include/xen/interface/io/ring.h | 432 +++++++++++++++++++ include/xen/interface/platform.h | 531 +++++++++++++++++++++++ include/xen/interface/sched.h | 183 ++++++++ include/xen/interface/xen.h | 782 ++++++++++++++++++++++++++++++++++ 15 files changed, 3604 insertions(+) create mode 100644 arch/arm/include/asm/xen/events.h create mode 100644 arch/arm/include/asm/xen/hypercall.h create mode 100644 arch/arm/include/asm/xen/interface.h create mode 100644 include/xen/event_channel.h create mode 100644 include/xen/events.h create mode 100644 include/xen/hvm.h create mode 100644 include/xen/interface/event_channel.h create mode 100644 include/xen/interface/grant_table.h create mode 100644 include/xen/interface/hvm/hvm_op.h create mode 100644 include/xen/interface/hvm/params.h create mode 100644 include/xen/interface/io/console.h create mode 100644 include/xen/interface/io/ring.h create mode 100644 include/xen/interface/platform.h create mode 100644 include/xen/interface/sched.h create mode 100644 include/xen/interface/xen.h diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h new file mode 100644 index 0000000..e69de29 diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h new file mode 100644 index 0000000..eb50b34 --- /dev/null +++ b/arch/arm/include/asm/xen/hypercall.h @@ -0,0 +1,66 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Stefano Stabellini , Citrix, 2012 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _ASM_ARM_XEN_HYPERCALL_H +#define _ASM_ARM_XEN_HYPERCALL_H + +#include + +#include +//#include +#include + +struct xen_dm_op_buf; + +long privcmd_call(unsigned call, unsigned long a1, + unsigned long a2, unsigned long a3, + unsigned long a4, unsigned long a5); +int HYPERVISOR_xen_version(int cmd, void *arg); +int HYPERVISOR_console_io(int cmd, int count, char *str); +int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count); +int HYPERVISOR_sched_op(int cmd, void *arg); +int HYPERVISOR_event_channel_op(int cmd, void *arg); +unsigned long HYPERVISOR_hvm_op(int op, void *arg); +int HYPERVISOR_memory_op(unsigned int cmd, void *arg); +int HYPERVISOR_physdev_op(int cmd, void *arg); +int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); +int HYPERVISOR_tmem_op(void *arg); +int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); +int HYPERVISOR_dm_op(domid_t domid, unsigned int nr_bufs, + struct xen_dm_op_buf *bufs); +int HYPERVISOR_platform_op_raw(void *arg); +static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) +{ + op->interface_version = XENPF_INTERFACE_VERSION; + return HYPERVISOR_platform_op_raw(op); +} +#endif /* _ASM_ARM_XEN_HYPERCALL_H */ diff --git a/arch/arm/include/asm/xen/interface.h b/arch/arm/include/asm/xen/interface.h new file mode 100644 index 0000000..c3eada2 --- /dev/null +++ b/arch/arm/include/asm/xen/interface.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/****************************************************************************** + * Guest OS interface to ARM Xen. + * + * Stefano Stabellini , Citrix, 2012 + */ + +#ifndef _ASM_ARM_XEN_INTERFACE_H +#define _ASM_ARM_XEN_INTERFACE_H + +#include + +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) + +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef struct { union { type *p; uint64_aligned_t q; }; } \ + __guest_handle_ ## name + +#define DEFINE_GUEST_HANDLE_STRUCT(name) \ + __DEFINE_GUEST_HANDLE(name, struct name) +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) +#define GUEST_HANDLE(name) __guest_handle_ ## name + +#define set_xen_guest_handle(hnd, val) \ + do { \ + if (sizeof(hnd) == 8) \ + *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while (0) + +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op + +#ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the interface with + * Xen so that we can have one ABI that works for 32 and 64 bit guests. + * Note that this means that the xen_pfn_t type may be capable of + * representing pfn's which the guest cannot represent in its own pfn + * type. However since pfn space is controlled by the guest this is + * fine since it simply wouldn't be able to create any sure pfns in + * the first place. + */ +typedef uint64_t xen_pfn_t; +#define PRI_xen_pfn "llx" +typedef uint64_t xen_ulong_t; +#define PRI_xen_ulong "llx" +typedef int64_t xen_long_t; +#define PRI_xen_long "llx" +/* Guest handles for primitive C types. */ +__DEFINE_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_GUEST_HANDLE(uint, unsigned int); +DEFINE_GUEST_HANDLE(char); +DEFINE_GUEST_HANDLE(int); +DEFINE_GUEST_HANDLE(void); +DEFINE_GUEST_HANDLE(uint64_t); +DEFINE_GUEST_HANDLE(uint32_t); +DEFINE_GUEST_HANDLE(xen_pfn_t); +DEFINE_GUEST_HANDLE(xen_ulong_t); + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 1 + +struct arch_vcpu_info { }; +struct arch_shared_info { }; + +/* TODO: Move pvclock definitions some place arch independent */ +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +/* It is OK to have a 12 bytes struct with no padding because it is packed */ +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; + u32 sec_hi; +} __attribute__((__packed__)); +#endif + +#endif /* _ASM_ARM_XEN_INTERFACE_H */ diff --git a/include/xen/event_channel.h b/include/xen/event_channel.h new file mode 100644 index 0000000..44c549d --- /dev/null +++ b/include/xen/event_channel.h @@ -0,0 +1,385 @@ +/****************************************************************************** + * event_channel.h + * + * Event channels between domains. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +#include "xen.h" + +/* + * `incontents 150 evtchn Event Channels + * + * Event channels are the basic primitive provided by Xen for event + * notifications. An event is the Xen equivalent of a hardware + * interrupt. They essentially store one bit of information, the event + * of interest is signalled by transitioning this bit from 0 to 1. + * + * Notifications are received by a guest via an upcall from Xen, + * indicating when an event arrives (setting the bit). Further + * notifications are masked until the bit is cleared again (therefore, + * guests must check the value of the bit after re-enabling event + * delivery to ensure no missed notifications). + * + * Event notifications can be masked by setting a flag; this is + * equivalent to disabling interrupts and can be used to ensure + * atomicity of certain operations in the guest kernel. + * + * Event channels are represented by the evtchn_* fields in + * struct shared_info and struct vcpu_info. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args) + * ` + * @cmd == EVTCHNOP_* (event-channel operation). + * @args == struct evtchn_* Operation-specific extra arguments (NULL if none). + */ + +/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */ +#define EVTCHNOP_bind_interdomain 0 +#define EVTCHNOP_bind_virq 1 +#define EVTCHNOP_bind_pirq 2 +#define EVTCHNOP_close 3 +#define EVTCHNOP_send 4 +#define EVTCHNOP_status 5 +#define EVTCHNOP_alloc_unbound 6 +#define EVTCHNOP_bind_ipi 7 +#define EVTCHNOP_bind_vcpu 8 +#define EVTCHNOP_unmask 9 +#define EVTCHNOP_reset 10 +#define EVTCHNOP_init_control 11 +#define EVTCHNOP_expand_array 12 +#define EVTCHNOP_set_priority 13 +/* ` } */ + +typedef uint32_t evtchn_port_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as + * accepting interdomain bindings from domain . A fresh port + * is allocated in and returned as . + * NOTES: + * 1. If the caller is unprivileged then must be DOMID_SELF. + * 2. may be DOMID_SELF, allowing loopback connections. + */ +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; +typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and . must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * . + * + * In case the peer domain has already tried to set our event channel + * pending, before it was bound, EVTCHNOP_bind_interdomain always sets + * the local event channel pending. + * + * The usual pattern of use, in the guest's upcall (or subsequent + * handler) is as follows: (Re-enable the event channel for subsequent + * signalling and then) check for the existence of whatever condition + * is being waited for by other means, and take whatever action is + * needed (if any). + * + * NOTES: + * 1. may be DOMID_SELF, allowing loopback connections. + */ +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; +typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified + * vcpu. + * NOTES: + * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list + * in xen.h for the classification of each VIRQ. + * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be + * re-bound via EVTCHNOP_bind_vcpu. + * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. + * The allocated event channel is bound to the specified vcpu and the + * binding cannot be changed. + */ +struct evtchn_bind_virq { + /* IN parameters. */ + uint32_t virq; /* enum virq */ + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_virq evtchn_bind_virq_t; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ ). + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +struct evtchn_bind_pirq { + /* IN parameters. */ + uint32_t pirq; +#define BIND_PIRQ__WILL_SHARE 1 + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +struct evtchn_bind_ipi { + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; + +/* + * EVTCHNOP_close: Close a local event channel . If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_close evtchn_close_t; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is . + */ +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_send evtchn_send_t; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at . + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which is not DOMID_SELF. + */ +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; +}; +typedef struct evtchn_status evtchn_status_t; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI-bound channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 3. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; +}; +typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_unmask evtchn_unmask_t; + +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + * 3. Destroys all control blocks and event array, resets event channel + * operations to 2-level ABI if called with == DOMID_SELF and FIFO + * ABI was used. Guests should not bind events during EVTCHNOP_reset call + * as these events are likely to be lost. + */ +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. + * + * Note: any events that are currently pending will not be resent and + * will be lost. Guests should call this before binding any event to + * avoid losing any events. + */ +struct evtchn_init_control { + /* IN parameters. */ + uint64_t control_gfn; + uint32_t offset; + uint32_t vcpu; + /* OUT parameters. */ + uint8_t link_bits; + uint8_t _pad[7]; +}; +typedef struct evtchn_init_control evtchn_init_control_t; + +/* + * EVTCHNOP_expand_array: add an additional page to the event array. + */ +struct evtchn_expand_array { + /* IN parameters. */ + uint64_t array_gfn; +}; +typedef struct evtchn_expand_array evtchn_expand_array_t; + +/* + * EVTCHNOP_set_priority: set the priority for an event channel. + */ +struct evtchn_set_priority { + /* IN parameters. */ + uint32_t port; + uint32_t priority; +}; +typedef struct evtchn_set_priority evtchn_set_priority_t; + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op) + * ` + * Superceded by new event_channel_op() hypercall since 0x00030202. + */ +struct evtchn_op { + uint32_t cmd; /* enum event_channel_op */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; +typedef struct evtchn_op evtchn_op_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); + +/* + * 2-level ABI + */ + +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) + +/* + * FIFO ABI + */ + +/* Events may have priorities from 0 (highest) to 15 (lowest). */ +#define EVTCHN_FIFO_PRIORITY_MAX 0 +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 +#define EVTCHN_FIFO_PRIORITY_MIN 15 + +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) + +typedef uint32_t event_word_t; + +#define EVTCHN_FIFO_PENDING 31 +#define EVTCHN_FIFO_MASKED 30 +#define EVTCHN_FIFO_LINKED 29 +#define EVTCHN_FIFO_BUSY 28 + +#define EVTCHN_FIFO_LINK_BITS 17 +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) + +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) + +struct evtchn_fifo_control_block { + uint32_t ready; + uint32_t _rsvd; + uint32_t head[EVTCHN_FIFO_MAX_QUEUES]; +}; +typedef struct evtchn_fifo_control_block evtchn_fifo_control_block_t; + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/events.h b/include/xen/events.h new file mode 100644 index 0000000..c39c080 --- /dev/null +++ b/include/xen/events.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _XEN_EVENTS_H +#define _XEN_EVENTS_H + +#include +#include +#include + +static inline void notify_remote_via_evtchn(int port) +{ + struct evtchn_send send = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); +} + +#endif /* _XEN_EVENTS_H */ diff --git a/include/xen/hvm.h b/include/xen/hvm.h new file mode 100644 index 0000000..0b15f8c --- /dev/null +++ b/include/xen/hvm.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Simple wrappers around HVM functions */ +#ifndef XEN_HVM_H__ +#define XEN_HVM_H__ + +#include +#include + +static const char *param_name(int op) +{ +#define PARAM(x) [HVM_PARAM_##x] = #x + static const char *const names[] = { + PARAM(CALLBACK_IRQ), + PARAM(STORE_PFN), + PARAM(STORE_EVTCHN), + PARAM(PAE_ENABLED), + PARAM(IOREQ_PFN), + PARAM(BUFIOREQ_PFN), + PARAM(TIMER_MODE), + PARAM(HPET_ENABLED), + PARAM(IDENT_PT), + PARAM(DM_DOMAIN), + PARAM(ACPI_S_STATE), + PARAM(VM86_TSS), + PARAM(VPT_ALIGN), + PARAM(CONSOLE_PFN), + PARAM(CONSOLE_EVTCHN), + }; +#undef PARAM + + if (op >= ARRAY_SIZE(names)) + return "unknown"; + + if (!names[op]) + return "reserved"; + + return names[op]; +} +static inline int hvm_get_parameter(int idx, uint64_t *value) +{ + struct xen_hvm_param xhv; + int r; + + xhv.domid = DOMID_SELF; + xhv.index = idx; + r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if (r < 0) { + pr_err("Cannot get hvm parameter %s (%d): %d!\n", + param_name(idx), idx, r); + return r; + } + *value = xhv.value; + return r; +} + +#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2 +#define HVM_CALLBACK_VIA_TYPE_SHIFT 56 +#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\ + HVM_CALLBACK_VIA_TYPE_SHIFT | (x)) + +#endif /* XEN_HVM_H__ */ diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h new file mode 100644 index 0000000..45650c9 --- /dev/null +++ b/include/xen/interface/event_channel.h @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/****************************************************************************** + * event_channel.h + * + * Event channels between domains. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +#include + +typedef uint32_t evtchn_port_t; +DEFINE_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as + * accepting interdomain bindings from domain . A fresh port + * is allocated in and returned as . + * NOTES: + * 1. If the caller is unprivileged then must be DOMID_SELF. + * 2. may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_alloc_unbound 6 +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and . must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * . + * NOTES: + * 2. may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_bind_interdomain 0 +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified + * vcpu. + * NOTES: + * 1. A virtual IRQ may be bound to at most one event channel per vcpu. + * 2. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_virq 1 +struct evtchn_bind_virq { + /* IN parameters. */ + uint32_t virq; + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ . + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +#define EVTCHNOP_bind_pirq 2 +struct evtchn_bind_pirq { + /* IN parameters. */ + uint32_t pirq; +#define BIND_PIRQ__WILL_SHARE 1 + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_ipi 7 +struct evtchn_bind_ipi { + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_close: Close a local event channel . If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +#define EVTCHNOP_close 3 +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is . + */ +#define EVTCHNOP_send 4 +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at . + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which is not DOMID_SELF. + */ +#define EVTCHNOP_status 5 +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; +}; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised + * the binding. This binding cannot be changed. + * 2. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +#define EVTCHNOP_bind_vcpu 8 +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; +}; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +#define EVTCHNOP_unmask 9 +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +#define EVTCHNOP_reset 10 +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. + */ +#define EVTCHNOP_init_control 11 +struct evtchn_init_control { + /* IN parameters. */ + uint64_t control_gfn; + uint32_t offset; + uint32_t vcpu; + /* OUT parameters. */ + uint8_t link_bits; + uint8_t _pad[7]; +}; + +/* + * EVTCHNOP_expand_array: add an additional page to the event array. + */ +#define EVTCHNOP_expand_array 12 +struct evtchn_expand_array { + /* IN parameters. */ + uint64_t array_gfn; +}; + +/* + * EVTCHNOP_set_priority: set the priority for an event channel. + */ +#define EVTCHNOP_set_priority 13 +struct evtchn_set_priority { + /* IN parameters. */ + uint32_t port; + uint32_t priority; +}; + +struct evtchn_op { + uint32_t cmd; /* EVTCHNOP_* */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; +DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); + +/* + * 2-level ABI + */ + +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) + +/* + * FIFO ABI + */ + +/* Events may have priorities from 0 (highest) to 15 (lowest). */ +#define EVTCHN_FIFO_PRIORITY_MAX 0 +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 +#define EVTCHN_FIFO_PRIORITY_MIN 15 + +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) + +typedef uint32_t event_word_t; + +#define EVTCHN_FIFO_PENDING 31 +#define EVTCHN_FIFO_MASKED 30 +#define EVTCHN_FIFO_LINKED 29 +#define EVTCHN_FIFO_BUSY 28 + +#define EVTCHN_FIFO_LINK_BITS 17 +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) + +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) + +struct evtchn_fifo_control_block { + uint32_t ready; + uint32_t _rsvd; + event_word_t head[EVTCHN_FIFO_MAX_QUEUES]; +}; + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h new file mode 100644 index 0000000..7fb7112 --- /dev/null +++ b/include/xen/interface/grant_table.h @@ -0,0 +1,568 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + +#include + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +struct grant_entry_v1 { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + uint16_t flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + uint32_t frame; +}; + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. + */ +#define GTF_invalid (0U<<0) +#define GTF_permit_access (1U<<0) +#define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) +#define GTF_type_mask (3U<<0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U<<_GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U<<_GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U<<_GTF_writing) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U<<_GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U<<_GTF_transfer_completed) + +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ + +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; + +/* + * Version 2 of the grant entry structure, here is a union because three + * different types are suppotted: full_page, sub_page and transitive. + */ +union grant_entry_v2 { + struct grant_entry_header hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + struct grant_entry_header hdr; + uint32_t pad0; + uint64_t frame; + } full_page; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + struct grant_entry_header hdr; + uint16_t page_off; + uint16_t length; + uint64_t frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + */ + struct { + struct grant_entry_header hdr; + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[4]; /* Pad to a power of two */ +}; + +typedef uint16_t grant_status_t; + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef uint32_t grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (,) for access + * by devices and/or host CPUs. If successful, is a tracking number + * that must be presented later to destroy the mapping(s). On error, + * is a negative status code. + * NOTES: + * 1. If GNTMAP_device_map is specified then is the address + * via which I/O devices may access the granted frame. + * 2. If GNTMAP_host_map is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in . + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +#define GNTTABOP_map_grant_ref 0 +struct gnttab_map_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint32_t flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + grant_handle_t handle; + uint64_t dev_bus_addr; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by . If or is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_grant_ref 1 +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint64_t dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); + +/* + * GNTTABOP_setup_table: Set up a grant table for comprising at least + * pages. The frame addresses are written to the . + * Only addresses are written, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +#define GNTTABOP_setup_table 2 +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + uint32_t nr_frames; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + GUEST_HANDLE(xen_pfn_t) frame_list; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +#define GNTTABOP_dump_table 3 +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); + +/* + * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * . + * + * Note that, even if the transfer fails, the specified page no longer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +#define GNTTABOP_transfer 4 +struct gnttab_transfer { + /* IN parameters. */ + xen_pfn_t mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + int16_t status; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) + +#define GNTTABOP_copy 5 +struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy); + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +#define GNTTABOP_query_size 6 +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + uint32_t nr_frames; + uint32_t max_nr_frames; + int16_t status; /* GNTST_* */ +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by but atomically replace the page table entry with one + * pointing to the machine address under . will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_and_replace 7 +struct gnttab_unmap_and_replace { + /* IN parameters. */ + uint64_t host_addr; + uint64_t new_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace); + +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +#define GNTTABOP_set_version 8 +struct gnttab_set_version { + /* IN parameters */ + uint32_t version; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version); + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for . In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * specify the size of vector . + * The frame addresses are returned in the . + * Only addresses are returned, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +#define GNTTABOP_get_status_frames 9 +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + GUEST_HANDLE(uint64_t) frame_list; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain . + */ +#define GNTTABOP_get_version 10 +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + uint16_t pad; + /* OUT parameters */ + uint32_t version; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); + +/* + * Issue one or more cache maintenance operations on a portion of a + * page granted to the calling domain by a foreign domain. + */ +#define GNTTABOP_cache_flush 12 +struct gnttab_cache_flush { + union { + uint64_t dev_bus_addr; + grant_ref_t ref; + } a; + uint16_t offset; /* offset from start of grant */ + uint16_t length; /* size within the grant */ +#define GNTTAB_CACHE_CLEAN (1<<0) +#define GNTTAB_CACHE_INVAL (1<<1) +#define GNTTAB_CACHE_SOURCE_GREF (1<<31) + uint32_t op; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); + +/* + * Bitfield values for update_pin_status.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1<<_GNTMAP_device_map) + /* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1<<_GNTMAP_host_map) + /* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1<<_GNTMAP_readonly) + /* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1<<_GNTMAP_application_map) + + /* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + +/* + * Values for error status returns. All errors are -ve. + */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page", \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "operation not done; try again" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h new file mode 100644 index 0000000..956a046 --- /dev/null +++ b/include/xen/interface/hvm/hvm_op.h @@ -0,0 +1,65 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ +#define __XEN_PUBLIC_HVM_HVM_OP_H__ + +/* Get/set subcommands: the second argument of the hypercall is a + * pointer to a xen_hvm_param struct. */ +#define HVMOP_set_param 0 +#define HVMOP_get_param 1 +struct xen_hvm_param { + domid_t domid; /* IN */ + uint32_t index; /* IN */ + uint64_t value; /* IN/OUT */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); + +/* Hint from PV drivers for pagetable destruction. */ +#define HVMOP_pagetable_dying 9 +struct xen_hvm_pagetable_dying { + /* Domain with a pagetable about to be destroyed. */ + domid_t domid; + /* guest physical address of the toplevel pagetable dying */ + aligned_u64 gpa; +}; +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); + +enum hvmmem_type_t { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +}; + +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + uint16_t mem_type; + uint16_t pad[2]; /* align next field on 8-byte boundary */ + /* IN variable. */ + uint64_t pfn; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type); + +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h new file mode 100644 index 0000000..4d61fc5 --- /dev/null +++ b/include/xen/interface/hvm/params.h @@ -0,0 +1,127 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ +#define __XEN_PUBLIC_HVM_PARAMS_H__ + +#include + +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +#define HVM_PARAM_CALLBACK_IRQ 0 +/* + * How should CPU0 event-channel notifications be delivered? + * + * If val == 0 then CPU0 event-channel notifications are not delivered. + * If val != 0, val[63:56] encodes the type, as follows: + */ + +#define HVM_PARAM_CALLBACK_TYPE_GSI 0 +/* + * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0, + * and disables all notifications. + */ + +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 +/* + * val[55:0] is a delivery PCI INTx line: + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] + */ + +#if defined(__i386__) || defined(__x86_64__) +#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 +/* + * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know + * if this delivery method is available. + */ +#elif defined(__arm__) || defined(__aarch64__) +#define HVM_PARAM_CALLBACK_TYPE_PPI 2 +/* + * val[55:16] needs to be zero. + * val[15:8] is interrupt flag of the PPI used by event-channel: + * bit 8: the PPI is edge(1) or level(0) triggered + * bit 9: the PPI is active low(1) or high(0) + * val[7:0] is a PPI number used by event-channel. + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to + * the notification is handled by the interrupt controller. + */ +#endif + +#define HVM_PARAM_STORE_PFN 1 +#define HVM_PARAM_STORE_EVTCHN 2 + +#define HVM_PARAM_PAE_ENABLED 4 + +#define HVM_PARAM_IOREQ_PFN 5 + +#define HVM_PARAM_BUFIOREQ_PFN 6 + +/* + * Set mode for virtual timers (currently x86 only): + * delay_for_missed_ticks (default): + * Do not advance a vcpu's time beyond the correct delivery time for + * interrupts that have been missed due to preemption. Deliver missed + * interrupts when the vcpu is rescheduled and advance the vcpu's virtual + * time stepwise for each one. + * no_delay_for_missed_ticks: + * As above, missed interrupts are delivered, but guest time always tracks + * wallclock (i.e., real) time while doing so. + * no_missed_ticks_pending: + * No missed interrupts are held pending. Instead, to ensure ticks are + * delivered at some non-zero rate, if we detect missed ticks then the + * internal tick alarm is not disabled if the VCPU is preempted during the + * next tick period. + * one_missed_tick_pending: + * Missed interrupts are collapsed together and delivered as one 'late tick'. + * Guest time always tracks wallclock (i.e., real) time. + */ +#define HVM_PARAM_TIMER_MODE 10 +#define HVMPTM_delay_for_missed_ticks 0 +#define HVMPTM_no_delay_for_missed_ticks 1 +#define HVMPTM_no_missed_ticks_pending 2 +#define HVMPTM_one_missed_tick_pending 3 + +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ +#define HVM_PARAM_HPET_ENABLED 11 + +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ +#define HVM_PARAM_IDENT_PT 12 + +/* Device Model domain, defaults to 0. */ +#define HVM_PARAM_DM_DOMAIN 13 + +/* ACPI S state: currently support S0 and S3 on x86. */ +#define HVM_PARAM_ACPI_S_STATE 14 + +/* TSS used on Intel when CR0.PE=0. */ +#define HVM_PARAM_VM86_TSS 15 + +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +#define HVM_NR_PARAMS 19 + +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/include/xen/interface/io/console.h b/include/xen/interface/io/console.h new file mode 100644 index 0000000..85ca8b0 --- /dev/null +++ b/include/xen/interface/io/console.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef uint32_t XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h new file mode 100644 index 0000000..3f40501 --- /dev/null +++ b/include/xen/interface/io/ring.h @@ -0,0 +1,432 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +#include + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) + +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say struct request, and struct response already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, struct request, struct response); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * struct mytag_sring - The shared ring. + * struct mytag_front_ring - The 'front' half of the ring. + * struct mytag_back_ring - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * struct mytag_front_ring front_ring; + * SHARED_RING_INIT((struct mytag_sring *)shared_page); + * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page, + * PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * struct mytag_back_ring back_ring; + * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page, + * PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + uint8_t pad[48]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + memset((_s)->pad, 0, sizeof((_s)->pad)); \ +} while(0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* Initialize to existing shared indexes -- for recovery */ +#define FRONT_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->req_prod_pvt = (_s)->req_prod; \ + (_r)->rsp_cons = (_s)->rsp_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ +} while (0) + +#define BACK_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ + (_r)->req_cons = (_s)->req_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ + }) + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +/* + * Get a local copy of a request. + * + * Use this in preference to RING_GET_REQUEST() so all processing is + * done on a local copy that cannot be modified by the other end. + * + * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this + * to be ineffective where _req is a struct which consists of only bitfields. + */ +#define RING_COPY_REQUEST(_r, _idx, _req) do { \ + /* Use volatile to force the copy into _req. */ \ + *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ +} while (0) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + + +#define RING_PUSH_REQUESTS(_r) do { \ + virt_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + virt_wmb(); /* front sees responses /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + virt_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + virt_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + virt_wmb(); /* front sees responses /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + virt_mb(); /* front sees new responses /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + virt_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + virt_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + + +/* + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and + * functions to check if there is data on the ring, and to read and + * write to them. + * + * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but + * does not define the indexes page. As different protocols can have + * extensions to the basic format, this macro allow them to define their + * own struct. + * + * XEN_FLEX_RING_SIZE + * Convenience macro to calculate the size of one of the two rings + * from the overall order. + * + * $NAME_mask + * Function to apply the size mask to an index, to reduce the index + * within the range [0-size]. + * + * $NAME_read_packet + * Function to read data from the ring. The amount of data to read is + * specified by the "size" argument. + * + * $NAME_write_packet + * Function to write data to the ring. The amount of data to write is + * specified by the "size" argument. + * + * $NAME_get_ring_ptr + * Convenience function that returns a pointer to read/write to the + * ring at the right location. + * + * $NAME_data_intf + * Indexes page, shared between frontend and backend. It also + * contains the array of grant refs. + * + * $NAME_queued + * Function to calculate how many bytes are currently on the ring, + * ready to be read. It can also be used to calculate how much free + * space is currently on the ring (XEN_FLEX_RING_SIZE() - + * $NAME_queued()). + */ + +#ifndef XEN_PAGE_SHIFT +/* The PAGE_SIZE for ring protocols and hypercall interfaces is always + * 4K, regardless of the architecture, and page granularity chosen by + * operating systems. + */ +#define XEN_PAGE_SHIFT 12 +#endif +#define XEN_FLEX_RING_SIZE(order) \ + (1UL << ((order) + XEN_PAGE_SHIFT - 1)) + +#define DEFINE_XEN_FLEX_RING(name) \ +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size) \ +{ \ + return idx & (ring_size - 1); \ +} \ + \ +static inline unsigned char *name##_get_ring_ptr(unsigned char *buf, \ + RING_IDX idx, \ + RING_IDX ring_size) \ +{ \ + return buf + name##_mask(idx, ring_size); \ +} \ + \ +static inline void name##_read_packet(void *opaque, \ + const unsigned char *buf, \ + size_t size, \ + RING_IDX masked_prod, \ + RING_IDX *masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_cons < masked_prod || \ + size <= ring_size - *masked_cons) { \ + memcpy(opaque, buf + *masked_cons, size); \ + } else { \ + memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons); \ + memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf, \ + size - (ring_size - *masked_cons)); \ + } \ + *masked_cons = name##_mask(*masked_cons + size, ring_size); \ +} \ + \ +static inline void name##_write_packet(unsigned char *buf, \ + const void *opaque, \ + size_t size, \ + RING_IDX *masked_prod, \ + RING_IDX masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_prod < masked_cons || \ + size <= ring_size - *masked_prod) { \ + memcpy(buf + *masked_prod, opaque, size); \ + } else { \ + memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod); \ + memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), \ + size - (ring_size - *masked_prod)); \ + } \ + *masked_prod = name##_mask(*masked_prod + size, ring_size); \ +} \ + \ +static inline RING_IDX name##_queued(RING_IDX prod, \ + RING_IDX cons, \ + RING_IDX ring_size) \ +{ \ + RING_IDX size; \ + \ + if (prod == cons) \ + return 0; \ + \ + prod = name##_mask(prod, ring_size); \ + cons = name##_mask(cons, ring_size); \ + \ + if (prod == cons) \ + return ring_size; \ + \ + if (prod > cons) \ + size = prod - cons; \ + else \ + size = ring_size - (cons - prod); \ + return size; \ +} \ + \ +struct name##_data { \ + unsigned char *in; /* half of the allocation */ \ + unsigned char *out; /* half of the allocation */ \ +} + +#define DEFINE_XEN_FLEX_RING_AND_INTF(name) \ +struct name##_data_intf { \ + RING_IDX in_cons, in_prod; \ + \ + uint8_t pad1[56]; \ + \ + RING_IDX out_cons, out_prod; \ + \ + uint8_t pad2[56]; \ + \ + RING_IDX ring_order; \ + grant_ref_t ref[]; \ +}; \ +DEFINE_XEN_FLEX_RING(name) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h new file mode 100644 index 0000000..732efb0 --- /dev/null +++ b/include/xen/interface/platform.h @@ -0,0 +1,531 @@ +/****************************************************************************** + * platform.h + * + * Hardware platform operations. Intended for use by domain-0 kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_PLATFORM_H__ +#define __XEN_PUBLIC_PLATFORM_H__ + +#include + +#define XENPF_INTERFACE_VERSION 0x03000001 + +/* + * Set clock such that it would read after 00:00:00 UTC, + * 1 January, 1970 if the current system time was . + */ +#define XENPF_settime32 17 +struct xenpf_settime32 { + /* IN variables. */ + uint32_t secs; + uint32_t nsecs; + uint64_t system_time; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime32_t); +#define XENPF_settime64 62 +struct xenpf_settime64 { + /* IN variables. */ + uint64_t secs; + uint32_t nsecs; + uint32_t mbz; + uint64_t system_time; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime64_t); + +/* + * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. + * On x86, @type is an architecture-defined MTRR memory type. + * On success, returns the MTRR that was used (@reg) and a handle that can + * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. + * (x86-specific). + */ +#define XENPF_add_memtype 31 +struct xenpf_add_memtype { + /* IN variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; + /* OUT variables. */ + uint32_t handle; + uint32_t reg; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_add_memtype_t); + +/* + * Tear down an existing memory-range type. If @handle is remembered then it + * should be passed in to accurately tear down the correct setting (in case + * of overlapping memory regions with differing types). If it is not known + * then @handle should be set to zero. In all cases @reg must be set. + * (x86-specific). + */ +#define XENPF_del_memtype 32 +struct xenpf_del_memtype { + /* IN variables. */ + uint32_t handle; + uint32_t reg; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_del_memtype_t); + +/* Read current type of an MTRR (x86-specific). */ +#define XENPF_read_memtype 33 +struct xenpf_read_memtype { + /* IN variables. */ + uint32_t reg; + /* OUT variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_read_memtype_t); + +#define XENPF_microcode_update 35 +struct xenpf_microcode_update { + /* IN variables. */ + GUEST_HANDLE(void) data; /* Pointer to microcode data */ + uint32_t length; /* Length of microcode data. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_microcode_update_t); + +#define XENPF_platform_quirk 39 +#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ +#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ +#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ +struct xenpf_platform_quirk { + /* IN variables. */ + uint32_t quirk_id; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t); + +#define XENPF_efi_runtime_call 49 +#define XEN_EFI_get_time 1 +#define XEN_EFI_set_time 2 +#define XEN_EFI_get_wakeup_time 3 +#define XEN_EFI_set_wakeup_time 4 +#define XEN_EFI_get_next_high_monotonic_count 5 +#define XEN_EFI_get_variable 6 +#define XEN_EFI_set_variable 7 +#define XEN_EFI_get_next_variable_name 8 +#define XEN_EFI_query_variable_info 9 +#define XEN_EFI_query_capsule_capabilities 10 +#define XEN_EFI_update_capsule 11 + +struct xenpf_efi_runtime_call { + uint32_t function; + /* + * This field is generally used for per sub-function flags (defined + * below), except for the XEN_EFI_get_next_high_monotonic_count case, + * where it holds the single returned value. + */ + uint32_t misc; + xen_ulong_t status; + union { +#define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001 + struct { + struct xenpf_efi_time { + uint16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t min; + uint8_t sec; + uint32_t ns; + int16_t tz; + uint8_t daylight; + } time; + uint32_t resolution; + uint32_t accuracy; + } get_time; + + struct xenpf_efi_time set_time; + +#define XEN_EFI_GET_WAKEUP_TIME_ENABLED 0x00000001 +#define XEN_EFI_GET_WAKEUP_TIME_PENDING 0x00000002 + struct xenpf_efi_time get_wakeup_time; + +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE 0x00000001 +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY 0x00000002 + struct xenpf_efi_time set_wakeup_time; + +#define XEN_EFI_VARIABLE_NON_VOLATILE 0x00000001 +#define XEN_EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 +#define XEN_EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 + struct { + GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + xen_ulong_t size; + GUEST_HANDLE(void) data; + struct xenpf_efi_guid { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; + } vendor_guid; + } get_variable, set_variable; + + struct { + xen_ulong_t size; + GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + struct xenpf_efi_guid vendor_guid; + } get_next_variable_name; + + struct { + uint32_t attr; + uint64_t max_store_size; + uint64_t remain_store_size; + uint64_t max_size; + } query_variable_info; + + struct { + GUEST_HANDLE(void) capsule_header_array; + xen_ulong_t capsule_count; + uint64_t max_capsule_size; + uint32_t reset_type; + } query_capsule_capabilities; + + struct { + GUEST_HANDLE(void) capsule_header_array; + xen_ulong_t capsule_count; + uint64_t sg_list; /* machine address */ + } update_capsule; + } u; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_efi_runtime_call); + +#define XEN_FW_EFI_VERSION 0 +#define XEN_FW_EFI_CONFIG_TABLE 1 +#define XEN_FW_EFI_VENDOR 2 +#define XEN_FW_EFI_MEM_INFO 3 +#define XEN_FW_EFI_RT_VERSION 4 + +#define XENPF_firmware_info 50 +#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ +#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +#define XEN_FW_EFI_INFO 4 /* from EFI */ +#define XEN_FW_KBD_SHIFT_FLAGS 5 /* Int16, Fn02: Get keyboard shift flags. */ + +struct xenpf_firmware_info { + /* IN variables. */ + uint32_t type; + uint32_t index; + /* OUT variables. */ + union { + struct { + /* Int13, Fn48: Check Extensions Present. */ + uint8_t device; /* %dl: bios device number */ + uint8_t version; /* %ah: major version */ + uint16_t interface_support; /* %cx: support bitmap */ + /* Int13, Fn08: Legacy Get Device Parameters. */ + uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ + uint8_t legacy_max_head; /* %dh: max head # */ + uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ + /* NB. First uint16_t of buffer must be set to buffer size. */ + GUEST_HANDLE(void) edd_params; + } disk_info; /* XEN_FW_DISK_INFO */ + struct { + uint8_t device; /* bios device number */ + uint32_t mbr_signature; /* offset 0x1b8 in mbr */ + } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ + struct { + /* Int10, AX=4F15: Get EDID info. */ + uint8_t capabilities; + uint8_t edid_transfer_time; + /* must refer to 128-byte buffer */ + GUEST_HANDLE(uchar) edid; + } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + + union xenpf_efi_info { + uint32_t version; + struct { + uint64_t addr; /* EFI_CONFIGURATION_TABLE */ + uint32_t nent; + } cfg; + struct { + uint32_t revision; + uint32_t bufsz; /* input, in bytes */ + GUEST_HANDLE(void) name; + /* UCS-2/UTF-16 string */ + } vendor; + struct { + uint64_t addr; + uint64_t size; + uint64_t attr; + uint32_t type; + } mem; + } efi_info; /* XEN_FW_EFI_INFO */ + + uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */ + } u; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t); + +#define XENPF_enter_acpi_sleep 51 +struct xenpf_enter_acpi_sleep { + /* IN variables */ + uint16_t val_a; /* PM1a control / sleep type A. */ + uint16_t val_b; /* PM1b control / sleep type B. */ + uint32_t sleep_state; /* Which state to enter (Sn). */ +#define XENPF_ACPI_SLEEP_EXTENDED 0x00000001 + uint32_t flags; /* XENPF_ACPI_SLEEP_*. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_enter_acpi_sleep_t); + +#define XENPF_change_freq 52 +struct xenpf_change_freq { + /* IN variables */ + uint32_t flags; /* Must be zero. */ + uint32_t cpu; /* Physical cpu. */ + uint64_t freq; /* New frequency (Hz). */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_change_freq_t); + +/* + * Get idle times (nanoseconds since boot) for physical CPUs specified in the + * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is + * indexed by CPU number; only entries with the corresponding @cpumap_bitmap + * bit set are written to. On return, @cpumap_bitmap is modified so that any + * non-existent CPUs are cleared. Such CPUs have their @idletime array entry + * cleared. + */ +#define XENPF_getidletime 53 +struct xenpf_getidletime { + /* IN/OUT variables */ + /* IN: CPUs to interrogate; OUT: subset of IN which are present */ + GUEST_HANDLE(uchar) cpumap_bitmap; + /* IN variables */ + /* Size of cpumap bitmap. */ + uint32_t cpumap_nr_cpus; + /* Must be indexable for every cpu in cpumap_bitmap. */ + GUEST_HANDLE(uint64_t) idletime; + /* OUT variables */ + /* System time when the idletime snapshots were taken. */ + uint64_t now; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t); + +#define XENPF_set_processor_pminfo 54 + +/* ability bits */ +#define XEN_PROCESSOR_PM_CX 1 +#define XEN_PROCESSOR_PM_PX 2 +#define XEN_PROCESSOR_PM_TX 4 + +/* cmd type */ +#define XEN_PM_CX 0 +#define XEN_PM_PX 1 +#define XEN_PM_TX 2 +#define XEN_PM_PDC 3 +/* Px sub info type */ +#define XEN_PX_PCT 1 +#define XEN_PX_PSS 2 +#define XEN_PX_PPC 4 +#define XEN_PX_PSD 8 + +struct xen_power_register { + uint32_t space_id; + uint32_t bit_width; + uint32_t bit_offset; + uint32_t access_size; + uint64_t address; +}; + +struct xen_processor_csd { + uint32_t domain; /* domain number of one dependent group */ + uint32_t coord_type; /* coordination type */ + uint32_t num; /* number of processors in same domain */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_processor_csd); + +struct xen_processor_cx { + struct xen_power_register reg; /* GAS for Cx trigger register */ + uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ + uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ + uint32_t power; /* average power consumption(mW) */ + uint32_t dpcnt; /* number of dependency entries */ + GUEST_HANDLE(xen_processor_csd) dp; /* NULL if no dependency */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_processor_cx); + +struct xen_processor_flags { + uint32_t bm_control:1; + uint32_t bm_check:1; + uint32_t has_cst:1; + uint32_t power_setup_done:1; + uint32_t bm_rld_set:1; +}; + +struct xen_processor_power { + uint32_t count; /* number of C state entries in array below */ + struct xen_processor_flags flags; /* global flags of this processor */ + GUEST_HANDLE(xen_processor_cx) states; /* supported c states */ +}; + +struct xen_pct_register { + uint8_t descriptor; + uint16_t length; + uint8_t space_id; + uint8_t bit_width; + uint8_t bit_offset; + uint8_t reserved; + uint64_t address; +}; + +struct xen_processor_px { + uint64_t core_frequency; /* megahertz */ + uint64_t power; /* milliWatts */ + uint64_t transition_latency; /* microseconds */ + uint64_t bus_master_latency; /* microseconds */ + uint64_t control; /* control value */ + uint64_t status; /* success indicator */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_processor_px); + +struct xen_psd_package { + uint64_t num_entries; + uint64_t revision; + uint64_t domain; + uint64_t coord_type; + uint64_t num_processors; +}; + +struct xen_processor_performance { + uint32_t flags; /* flag for Px sub info type */ + uint32_t platform_limit; /* Platform limitation on freq usage */ + struct xen_pct_register control_register; + struct xen_pct_register status_register; + uint32_t state_count; /* total available performance states */ + GUEST_HANDLE(xen_processor_px) states; + struct xen_psd_package domain_info; + uint32_t shared_type; /* coordination type of this processor */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance); + +struct xenpf_set_processor_pminfo { + /* IN variables */ + uint32_t id; /* ACPI CPU ID */ + uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */ + union { + struct xen_processor_power power;/* Cx: _CST/_CSD */ + struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ + GUEST_HANDLE(uint32_t) pdc; + }; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo); + +#define XENPF_get_cpuinfo 55 +struct xenpf_pcpuinfo { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; +#define XEN_PCPU_FLAGS_ONLINE 1 + /* Correponding xen_cpuid is not present*/ +#define XEN_PCPU_FLAGS_INVALID 2 + uint32_t flags; + uint32_t apic_id; + uint32_t acpi_id; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo); + +#define XENPF_cpu_online 56 +#define XENPF_cpu_offline 57 +struct xenpf_cpu_ol { + uint32_t cpuid; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol); + +#define XENPF_cpu_hotadd 58 +struct xenpf_cpu_hotadd { + uint32_t apic_id; + uint32_t acpi_id; + uint32_t pxm; +}; + +#define XENPF_mem_hotadd 59 +struct xenpf_mem_hotadd { + uint64_t spfn; + uint64_t epfn; + uint32_t pxm; + uint32_t flags; +}; + +#define XENPF_core_parking 60 +struct xenpf_core_parking { + /* IN variables */ +#define XEN_CORE_PARKING_SET 1 +#define XEN_CORE_PARKING_GET 2 + uint32_t type; + /* IN variables: set cpu nums expected to be idled */ + /* OUT variables: get cpu nums actually be idled */ + uint32_t idle_nums; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking); + +#define XENPF_get_symbol 63 +struct xenpf_symdata { + /* IN/OUT variables */ + uint32_t namelen; /* size of 'name' buffer */ + + /* IN/OUT variables */ + uint32_t symnum; /* IN: Symbol to read */ + /* OUT: Next available symbol. If same as IN */ + /* then we reached the end */ + + /* OUT variables */ + GUEST_HANDLE(char) name; + uint64_t address; + char type; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata); + +struct xen_platform_op { + uint32_t cmd; + uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ + union { + struct xenpf_settime32 settime32; + struct xenpf_settime64 settime64; + struct xenpf_add_memtype add_memtype; + struct xenpf_del_memtype del_memtype; + struct xenpf_read_memtype read_memtype; + struct xenpf_microcode_update microcode; + struct xenpf_platform_quirk platform_quirk; + struct xenpf_efi_runtime_call efi_runtime_call; + struct xenpf_firmware_info firmware_info; + struct xenpf_enter_acpi_sleep enter_acpi_sleep; + struct xenpf_change_freq change_freq; + struct xenpf_getidletime getidletime; + struct xenpf_set_processor_pminfo set_pminfo; + struct xenpf_pcpuinfo pcpu_info; + struct xenpf_cpu_ol cpu_ol; + struct xenpf_cpu_hotadd cpu_add; + struct xenpf_mem_hotadd mem_add; + struct xenpf_core_parking core_parking; + struct xenpf_symdata symdata; + uint8_t pad[128]; + } u; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t); + +#endif /* __XEN_PUBLIC_PLATFORM_H__ */ diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h new file mode 100644 index 0000000..a4c4d73 --- /dev/null +++ b/include/xen/interface/sched.h @@ -0,0 +1,183 @@ +/****************************************************************************** + * sched.h + * + * Scheduler state interactions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +#include + +/* + * Guest Scheduler Operations + * + * The SCHEDOP interface provides mechanisms for a guest to interact + * with the scheduler, including yield, blocking and shutting itself + * down. + */ + +/* + * The prototype for this hypercall is: + * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) + * + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s), as described below. + * ... == Additional Operation-specific extra arguments, described below. + * + * Versions of Xen prior to 3.0.2 provided only the following legacy version + * of this hypercall, supporting only the commands yield, block and shutdown: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + * + * This legacy version is available to new guests as: + * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg) + */ + +/* + * Voluntarily yield the CPU. + * @arg == NULL. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == NULL. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == pointer to sched_shutdown structure. + * + * If the sched_shutdown_t reason is SHUTDOWN_suspend then + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN + * of the guest's start info page. RDX/EDX is the third hypercall + * argument. + * + * In addition, which reason is SHUTDOWN_suspend this hypercall + * returns 1 if suspend was cancelled or the domain was merely + * checkpointed, and 0 if it is resuming in a new domain. + */ +#define SCHEDOP_shutdown 2 + +/* + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll structure. + */ +#define SCHEDOP_poll 3 + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown structure. + */ +#define SCHEDOP_remote_shutdown 4 + +/* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == sched_shutdown, as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 + +/* + * Override the current vcpu affinity by pinning it to one physical cpu or + * undo this override restoring the previous affinity. + * @arg == pointer to sched_pin_override structure. + * + * A negative pcpu value will undo a previous pin override and restore the + * previous cpu affinity. + * This call is allowed for the hardware domain only and requires the cpu + * to be part of the domain's cpupool. + */ +#define SCHEDOP_pin_override 7 + +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ +}; +DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown); + +struct sched_poll { + GUEST_HANDLE(evtchn_port_t) ports; + unsigned int nr_ports; + uint64_t timeout; +}; +DEFINE_GUEST_HANDLE_STRUCT(sched_poll); + +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ +}; +DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown); + +struct sched_watchdog { + uint32_t id; /* watchdog ID */ + uint32_t timeout; /* timeout */ +}; +DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog); + +struct sched_pin_override { + int32_t pcpu; +}; +DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ + +/* + * Domain asked to perform 'soft reset' for it. The expected behavior is to + * reset internal Xen state for the domain returning it to the point where it + * was created but leaving the domain's memory contents and vCPU contexts + * intact. This will allow the domain to start over and set up all Xen specific + * interfaces again. + */ +#define SHUTDOWN_soft_reset 5 +#define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h new file mode 100644 index 0000000..8bfb242 --- /dev/null +++ b/include/xen/interface/xen.h @@ -0,0 +1,782 @@ +/****************************************************************************** + * xen.h + * + * Guest OS interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include + +/* + * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). + */ + +/* + * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. + * EAX = return value + * (argument registers may be clobbered on return) + * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6. + * RAX = return value + * (argument registers not clobbered on return; RCX, R11 are) + */ +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 +#define __HYPERVISOR_platform_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_xsm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ +#define __HYPERVISOR_xenpmu_op 40 +#define __HYPERVISOR_dm_op 41 + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +/* + * VIRTUAL INTERRUPTS + * + * Virtual interrupts that a guest OS may receive from Xen. + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ +#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ +#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ +#define VIRQ_XENPMU 13 /* PMC interrupt */ + +/* Architecture-specific VIRQ definitions. */ +#define VIRQ_ARCH_0 16 +#define VIRQ_ARCH_1 17 +#define VIRQ_ARCH_2 18 +#define VIRQ_ARCH_3 19 +#define VIRQ_ARCH_4 20 +#define VIRQ_ARCH_5 21 +#define VIRQ_ARCH_6 22 +#define VIRQ_ARCH_7 23 + +#define NR_VIRQS 24 + +/* + * enum neg_errnoval HYPERVISOR_mmu_update(const struct mmu_update reqs[], + * unsigned count, unsigned *done_out, + * unsigned foreigndom) + * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). + * @count is the length of the above array. + * @pdone is an output parameter indicating number of completed operations + * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this + * hypercall invocation. Can be DOMID_SELF. + * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced + * in this hypercall invocation. The value of this field + * (x) encodes the PFD as follows: + * x == 0 => PFD == DOMID_SELF + * x != 0 => PFD == x - 1 + * + * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. + * ------------- + * ptr[1:0] == MMU_NORMAL_PT_UPDATE: + * Updates an entry in a page table belonging to PFD. If updating an L1 table, + * and the new table entry is valid/present, the mapped frame must belong to + * FD. If attempting to map an I/O page then the caller assumes the privilege + * of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * There also certain implicit requirements when using this hypercall. The + * pages that make up a pagetable must be mapped read-only in the guest. + * This prevents uncontrolled guest updates to the pagetable. Xen strictly + * enforces this, and will disallow any pagetable update which will end up + * mapping pagetable page RW, and will disallow using any writable page as a + * pagetable. In practice it means that when constructing a page table for a + * process, thread, etc, we MUST be very dilligient in following these rules: + * 1). Start with top-level page (PGD or in Xen language: L4). Fill out + * the entries. + * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD + * or L2). + * 3). Start filling out the PTE table (L1) with the PTE entries. Once + * done, make sure to set each of those entries to RO (so writeable bit + * is unset). Once that has been completed, set the PMD (L2) for this + * PTE table as RO. + * 4). When completed with all of the PMD (L2) entries, and all of them have + * been set to RO, make sure to set RO the PUD (L3). Do the same + * operation on PGD (L4) pagetable entries that have a PUD (L3) entry. + * 5). Now before you can use those pages (so setting the cr3), you MUST also + * pin them so that the hypervisor can verify the entries. This is done + * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame + * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op( + * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be + * issued. + * For 32-bit guests, the L4 is not used (as there is less pagetables), so + * instead use L3. + * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE + * hypercall. Also if so desired the OS can also try to write to the PTE + * and be trapped by the hypervisor (as the PTE entry is RO). + * + * To deallocate the pages, the operations are the reverse of the steps + * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the + * pagetable MUST not be in use (meaning that the cr3 is not set to it). + * + * ptr[1:0] == MMU_MACHPHYS_UPDATE: + * Updates an entry in the machine->pseudo-physical mapping table. + * ptr[:2] -- Machine address within the frame whose mapping to modify. + * The frame must belong to the FD, if one is specified. + * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: + * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed + * with those in @val. + * + * @val is usually the machine frame number along with some attributes. + * The attributes by default follow the architecture defined bits. Meaning that + * if this is a X86_64 machine and four page table layout is used, the layout + * of val is: + * - 63 if set means No execute (NX) + * - 46-13 the machine frame number + * - 12 available for guest + * - 11 available for guest + * - 10 available for guest + * - 9 available for guest + * - 8 global + * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages) + * - 6 dirty + * - 5 accessed + * - 4 page cached disabled + * - 3 page write through + * - 2 userspace accessible + * - 1 writeable + * - 0 present + * + * The one bits that does not fit with the default layout is the PAGE_PSE + * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the + * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB + * (or 2MB) instead of using the PAGE_PSE bit. + * + * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen + * using it as the Page Attribute Table (PAT) bit - for details on it please + * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of + * pages instead of using MTRRs. + * + * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits): + * PAT4 PAT0 + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) + * +-----+-----+----+----+----+-----+----+----+ + * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen + * +-----+-----+----+----+----+-----+----+----+ + * + * The lookup of this index table translates to looking up + * Bit 7, Bit 4, and Bit 3 of val entry: + * + * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). + * + * If all bits are off, then we are using PAT0. If bit 3 turned on, + * then we are using PAT1, if bit 3 and bit 4, then PAT2.. + * + * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means + * that if a guest that follows Linux's PAT setup and would like to set Write + * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is + * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the + * caching as: + * + * WB = none (so PAT0) + * WC = PWT (bit 3 on) + * UC = PWT | PCD (bit 3 and 4 are on). + * + * To make it work with Xen, it needs to translate the WC bit as so: + * + * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 + * + * And to translate back it would: + * + * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. + */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ +#define MMU_PT_UPDATE_NO_TRANSLATE 3 /* checked '*ptr = val'. ptr is MA. */ + +/* + * MMU EXTENDED OPERATIONS + * + * enum neg_errnoval HYPERVISOR_mmuext_op(mmuext_op_t uops[], + * unsigned int count, + * unsigned int *pdone, + * unsigned int foreigndom) + */ +/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * + * cmd: MMUEXT_(UN)PIN_*_TABLE + * mfn: Machine frame number to be (un)pinned as a p.t. page. + * The frame must belong to the FD, if one is specified. + * + * cmd: MMUEXT_NEW_BASEPTR + * mfn: Machine frame number of new page-table base to install in MMU. + * + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in user space. + * + * cmd: MMUEXT_TLB_FLUSH_LOCAL + * No additional arguments. Flushes local TLB. + * + * cmd: MMUEXT_INVLPG_LOCAL + * linear_addr: Linear address to be flushed from the local TLB. + * + * cmd: MMUEXT_TLB_FLUSH_MULTI + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_INVLPG_MULTI + * linear_addr: Linear address to be flushed. + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_TLB_FLUSH_ALL + * No additional arguments. Flushes all VCPUs' TLBs. + * + * cmd: MMUEXT_INVLPG_ALL + * linear_addr: Linear address to be flushed from all VCPUs' TLBs. + * + * cmd: MMUEXT_FLUSH_CACHE + * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_FLUSH_CACHE_GLOBAL + * No additional arguments. Writes back and flushes cache contents + * on all CPUs in the system. + * + * cmd: MMUEXT_SET_LDT + * linear_addr: Linear address of LDT base (NB. must be page-aligned). + * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_[UN]MARK_SUPER + * mfn: Machine frame number of head of superpage to be [un]marked. + */ +#define MMUEXT_PIN_L1_TABLE 0 +#define MMUEXT_PIN_L2_TABLE 1 +#define MMUEXT_PIN_L3_TABLE 2 +#define MMUEXT_PIN_L4_TABLE 3 +#define MMUEXT_UNPIN_TABLE 4 +#define MMUEXT_NEW_BASEPTR 5 +#define MMUEXT_TLB_FLUSH_LOCAL 6 +#define MMUEXT_INVLPG_LOCAL 7 +#define MMUEXT_TLB_FLUSH_MULTI 8 +#define MMUEXT_INVLPG_MULTI 9 +#define MMUEXT_TLB_FLUSH_ALL 10 +#define MMUEXT_INVLPG_ALL 11 +#define MMUEXT_FLUSH_CACHE 12 +#define MMUEXT_SET_LDT 13 +#define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_FLUSH_CACHE_GLOBAL 18 +#define MMUEXT_MARK_SUPER 19 +#define MMUEXT_UNMARK_SUPER 20 + +#ifndef __ASSEMBLY__ +struct mmuext_op { + unsigned int cmd; + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ + xen_pfn_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + unsigned long linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ + void *vcpumask; + /* COPY_PAGE */ + xen_pfn_t src_mfn; + } arg2; +}; +DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); +#endif + +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +#define UVMF_NONE (0UL<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (3UL<<0) +#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ +#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ + +/* + * Commands to HYPERVISOR_console_io(). + */ +#define CONSOLEIO_write 0 +#define CONSOLEIO_read 1 + +/* + * Commands to HYPERVISOR_vm_assist(). + */ +#define VMASST_CMD_enable 0 +#define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ +#define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ +#define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ +#define VMASST_TYPE_writable_pagetables 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ +#define VMASST_TYPE_pae_extended_cr3 3 + +/* + * x86 guests: Sane behaviour for virtual iopl + * - virtual iopl updated from do_iret() hypercalls. + * - virtual iopl reported in bounce frames. + * - guest kernels assumed to be level 0 for the purpose of iopl checks. + */ +#define VMASST_TYPE_architectural_iopl 4 + +/* + * All guests: activate update indicator in vcpu_runstate_info + * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped + * vcpu_runstate_info during updates of the runstate information. + */ +#define VMASST_TYPE_runstate_update_flag 5 + +#define MAX_VMASST_TYPE 5 + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +/* + * Send an array of these to HYPERVISOR_mmu_update(). + * NB. The fields are natural pointer/address size for this architecture. + */ +struct mmu_update { + uint64_t ptr; /* Machine address of PTE. */ + uint64_t val; /* New contents of PTE. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(mmu_update); + +/* + * Send an array of these to HYPERVISOR_multicall(). + * NB. The fields are logically the natural register size for this + * architecture. In cases where xen_ulong_t is larger than this then + * any unused bits in the upper portion must be zero. + */ +struct multicall_entry { + xen_ulong_t op; + xen_long_t result; + xen_ulong_t args[6]; +}; +DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); + +struct vcpu_time_info { + /* + * Updates to the following values are preceded and followed + * by an increment of 'version'. The guest can therefore + * detect updates by looking for changes to 'version'. If the + * least-significant bit of the version number is set then an + * update is in progress and the guest must wait to read a + * consistent set of values. The correct way to interact with + * the version number is similar to Linux's seqlock: see the + * implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; +}; /* 32 bytes */ + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + xen_ulong_t evtchn_pending_sel; + struct arch_vcpu_info arch; + struct pvclock_vcpu_time_info time; +}; /* 64 bytes (x86) */ + +/* + * Xen/kernel shared data -- pointer provided in start_info. + * NB. We expect that this struct is smaller than a page. + */ +struct shared_info { + struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + struct pvclock_wall_clock wc; + + struct arch_shared_info arch; + +}; + +/* + * Start-of-day memory layout + * + * 1. The domain is started within contiguous virtual-memory region. + * 2. The contiguous region begins and ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: + * a. relocated kernel image + * b. initial ram disk [mod_start, mod_len] + * (may be omitted) + * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) + * d. start_info_t structure [register ESI (x86)] + * in case of dom0 this page contains the console info, too + * e. unless dom0: xenstore ring page + * f. unless dom0: console ring page + * g. bootstrap page tables [pt_base, CR3 (x86)] + * h. bootstrap stack [register ESP (x86)] + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The list of page frames forms a contiguous 'pseudo-physical' memory + * layout for the domain. In particular, the bootstrap virtual-memory + * region is a 1:1 mapping to the first section of the pseudo-physical map. + * 6. All bootstrap elements are mapped read-writable for the guest OS. The + * only exception is the bootstrap page table, which is mapped read-only. + * 7. There is guaranteed to be at least 512kB padding after the final + * bootstrap element. If necessary, the bootstrap virtual region is + * extended by an extra 4MB to ensure this. + */ + +#define MAX_GUEST_CMDLINE 1024 +struct start_info { + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen--". */ + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + xen_pfn_t mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ + int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* The pfn range here covers both page table and p->m table frames. */ + unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ + unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ +}; + +/* These flags are passed in the 'flags' field of start_info_t. */ +#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ +#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_VIRT_P2M_4TOOLS (1<<4) /* Do Xen tools understand a virt. mapped */ + /* P->M making the 3 level tree obsolete? */ +#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ + +/* + * A multiboot module is a package containing modules very similar to a + * multiboot module array. The only differences are: + * - the array of module descriptors is by convention simply at the beginning + * of the multiboot module, + * - addresses in the module descriptors are based on the beginning of the + * multiboot module, + * - the number of modules is determined by a termination descriptor that has + * mod_start == 0. + * + * This permits to both build it statically and reference it in a configuration + * file, and let the PV guest easily rebase the addresses to virtual addresses + * and at the same time count the number of modules. + */ +struct xen_multiboot_mod_list { + /* Address of first byte of the module */ + uint32_t mod_start; + /* Address of last byte of the module (inclusive) */ + uint32_t mod_end; + /* Address of zero-terminated command line */ + uint32_t cmdline; + /* Unused, must be zero */ + uint32_t pad; +}; +/* + * The console structure in start_info.console.dom0 + * + * This structure includes a variety of information required to + * have a working VGA/VESA console. + */ +struct dom0_vga_console_info { + uint8_t video_type; +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 +#define XEN_VGATYPE_EFI_LFB 0x70 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; + + /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ + uint32_t gbl_caps; + /* Mode attributes (offset 0x0, VESA command 0x4f01). */ + uint16_t mode_attrs; + } vesa_lfb; + } u; +}; + +typedef uint64_t cpumap_t; + +typedef uint8_t xen_domain_handle_t[16]; + +/* Turn a plain number into a C unsigned long constant. */ +#define __mk_unsigned_long(x) x ## UL +#define mk_unsigned_long(x) __mk_unsigned_long(x) + +#define TMEM_SPEC_VERSION 1 + +struct tmem_op { + uint32_t cmd; + int32_t pool_id; + union { + struct { /* for cmd == TMEM_NEW_POOL */ + uint64_t uuid[2]; + uint32_t flags; + } new; + struct { + uint64_t oid[3]; + uint32_t index; + uint32_t tmem_offset; + uint32_t pfn_offset; + uint32_t len; + GUEST_HANDLE(void) gmfn; /* guest machine page frame */ + } gen; + } u; +}; + +DEFINE_GUEST_HANDLE(u64); + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_XEN_H__ */ -- 1.9.1