Blame view
drivers/lguest/core.c
10.3 KB
2e04ef769
|
1 2 |
/*P:400 * This contains run_guest() which actually calls into the Host<->Guest |
f938d2c89
|
3 |
* Switcher and analyzes the return, such as determining if the Guest wants the |
2e04ef769
|
4 5 |
* Host to do something. This file also contains useful helper routines. :*/ |
d7e28ffe6
|
6 7 8 9 10 11 12 13 |
#include <linux/module.h> #include <linux/stringify.h> #include <linux/stddef.h> #include <linux/io.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/cpu.h> #include <linux/freezer.h> |
625efab1c
|
14 |
#include <linux/highmem.h> |
5a0e3ad6a
|
15 |
#include <linux/slab.h> |
d7e28ffe6
|
16 |
#include <asm/paravirt.h> |
d7e28ffe6
|
17 18 19 |
#include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/poll.h> |
d7e28ffe6
|
20 |
#include <asm/asm-offsets.h> |
d7e28ffe6
|
21 |
#include "lg.h" |
d7e28ffe6
|
22 23 24 |
static struct vm_struct *switcher_vma; static struct page **switcher_page; |
d7e28ffe6
|
25 26 |
/* This One Big lock protects all inter-guest data structures. */ DEFINE_MUTEX(lguest_lock); |
d7e28ffe6
|
27 |
|
2e04ef769
|
28 29 |
/*H:010 * We need to set up the Switcher at a high virtual address. Remember the |
bff672e63
|
30 31 32 33 34 35 36 37 |
* Switcher is a few hundred bytes of assembler code which actually changes the * CPU to run the Guest, and then changes back to the Host when a trap or * interrupt happens. * * The Switcher code must be at the same virtual address in the Guest as the * Host since it will be running as the switchover occurs. * * Trying to map memory at a particular address is an unusual thing to do, so |
2e04ef769
|
38 39 |
* it's not a simple one-liner. */ |
d7e28ffe6
|
40 41 42 43 |
static __init int map_switcher(void) { int i, err; struct page **pagep; |
bff672e63
|
44 45 46 47 48 49 50 |
/* * Map the Switcher in to high memory. * * It turns out that if we choose the address 0xFFC00000 (4MB under the * top virtual address), it makes setting up the page tables really * easy. */ |
2e04ef769
|
51 52 53 54 |
/* * We allocate an array of struct page pointers. map_vm_area() wants * this, rather than just an array of pages. */ |
d7e28ffe6
|
55 56 57 58 59 60 |
switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, GFP_KERNEL); if (!switcher_page) { err = -ENOMEM; goto out; } |
2e04ef769
|
61 62 63 64 |
/* * Now we actually allocate the pages. The Guest will see these pages, * so we make sure they're zeroed. */ |
d7e28ffe6
|
65 |
for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { |
6c189d831
|
66 67 |
switcher_page[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!switcher_page[i]) { |
d7e28ffe6
|
68 69 70 |
err = -ENOMEM; goto free_some_pages; } |
d7e28ffe6
|
71 |
} |
2e04ef769
|
72 73 |
/* * First we check that the Switcher won't overlap the fixmap area at |
f14ae652b
|
74 |
* the top of memory. It's currently nowhere near, but it could have |
2e04ef769
|
75 76 |
* very strange effects if it ever happened. */ |
f14ae652b
|
77 78 79 80 81 82 |
if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ err = -ENOMEM; printk("lguest: mapping switcher would thwack fixmap "); goto free_pages; } |
2e04ef769
|
83 84 |
/* * Now we reserve the "virtual memory area" we want: 0xFFC00000 |
bff672e63
|
85 |
* (SWITCHER_ADDR). We might not get it in theory, but in practice |
f14ae652b
|
86 |
* it's worked so far. The end address needs +1 because __get_vm_area |
2e04ef769
|
87 88 |
* allocates an extra guard page, so we need space for that. */ |
d7e28ffe6
|
89 |
switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, |
f14ae652b
|
90 91 |
VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); |
d7e28ffe6
|
92 93 94 95 96 97 |
if (!switcher_vma) { err = -ENOMEM; printk("lguest: could not map switcher pages high "); goto free_pages; } |
2e04ef769
|
98 99 |
/* * This code actually sets up the pages we've allocated to appear at |
bff672e63
|
100 101 102 |
* SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the * kind of pages we're mapping (kernel pages), and a pointer to our * array of struct pages. It increments that pointer, but we don't |
2e04ef769
|
103 104 |
* care. */ |
d7e28ffe6
|
105 |
pagep = switcher_page; |
ed1dc7781
|
106 |
err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); |
d7e28ffe6
|
107 108 109 110 111 |
if (err) { printk("lguest: map_vm_area failed: %i ", err); goto free_vma; } |
bff672e63
|
112 |
|
2e04ef769
|
113 114 |
/* * Now the Switcher is mapped at the right address, we can't fail! |
9f54288de
|
115 |
* Copy in the compiled-in Switcher code (from x86/switcher_32.S). |
2e04ef769
|
116 |
*/ |
d7e28ffe6
|
117 118 |
memcpy(switcher_vma->addr, start_switcher_text, end_switcher_text - start_switcher_text); |
d7e28ffe6
|
119 120 121 |
printk(KERN_INFO "lguest: mapped switcher at %p ", switcher_vma->addr); |
bff672e63
|
122 |
/* And we succeeded... */ |
d7e28ffe6
|
123 124 125 126 127 128 129 130 131 132 133 134 135 |
return 0; free_vma: vunmap(switcher_vma->addr); free_pages: i = TOTAL_SWITCHER_PAGES; free_some_pages: for (--i; i >= 0; i--) __free_pages(switcher_page[i], 0); kfree(switcher_page); out: return err; } |
bff672e63
|
136 |
/*:*/ |
d7e28ffe6
|
137 |
|
2e04ef769
|
138 |
/* Cleaning up the mapping when the module is unloaded is almost... too easy. */ |
d7e28ffe6
|
139 140 141 |
static void unmap_switcher(void) { unsigned int i; |
bff672e63
|
142 |
/* vunmap() undoes *both* map_vm_area() and __get_vm_area(). */ |
d7e28ffe6
|
143 |
vunmap(switcher_vma->addr); |
bff672e63
|
144 |
/* Now we just need to free the pages we copied the switcher into */ |
d7e28ffe6
|
145 146 |
for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) __free_pages(switcher_page[i], 0); |
0a707210a
|
147 |
kfree(switcher_page); |
d7e28ffe6
|
148 |
} |
e1e72965e
|
149 |
/*H:032 |
dde797899
|
150 151 |
* Dealing With Guest Memory. * |
e1e72965e
|
152 153 154 |
* Before we go too much further into the Host, we need to grok the routines * we use to deal with Guest memory. * |
dde797899
|
155 |
* When the Guest gives us (what it thinks is) a physical address, we can use |
3c6b5bfa3
|
156 157 |
* the normal copy_from_user() & copy_to_user() on the corresponding place in * the memory region allocated by the Launcher. |
dde797899
|
158 159 160 161 |
* * But we can't trust the Guest: it might be trying to access the Launcher * code. We have to check that the range is below the pfn_limit the Launcher * gave us. We have to make sure that addr + len doesn't give us a false |
2e04ef769
|
162 163 |
* positive by overflowing, too. */ |
df1693abc
|
164 165 |
bool lguest_address_ok(const struct lguest *lg, unsigned long addr, unsigned long len) |
d7e28ffe6
|
166 167 168 |
{ return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); } |
2e04ef769
|
169 170 |
/* * This routine copies memory from the Guest. Here we can see how useful the |
2d37f94a2
|
171 |
* kill_lguest() routine we met in the Launcher can be: we return a random |
2e04ef769
|
172 173 |
* value (all zeroes) instead of needing to return an error. */ |
382ac6b3f
|
174 |
void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) |
d7e28ffe6
|
175 |
{ |
382ac6b3f
|
176 177 |
if (!lguest_address_ok(cpu->lg, addr, bytes) || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) { |
d7e28ffe6
|
178 179 |
/* copy_from_user should do this, but as we rely on it... */ memset(b, 0, bytes); |
382ac6b3f
|
180 |
kill_guest(cpu, "bad read address %#lx len %u", addr, bytes); |
d7e28ffe6
|
181 182 |
} } |
a6bd8e130
|
183 |
/* This is the write (copy into Guest) version. */ |
382ac6b3f
|
184 |
void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, |
2d37f94a2
|
185 |
unsigned bytes) |
d7e28ffe6
|
186 |
{ |
382ac6b3f
|
187 188 189 |
if (!lguest_address_ok(cpu->lg, addr, bytes) || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0) kill_guest(cpu, "bad write address %#lx len %u", addr, bytes); |
d7e28ffe6
|
190 |
} |
2d37f94a2
|
191 |
/*:*/ |
d7e28ffe6
|
192 |
|
2e04ef769
|
193 194 |
/*H:030 * Let's jump straight to the the main loop which runs the Guest. |
bff672e63
|
195 |
* Remember, this is called by the Launcher reading /dev/lguest, and we keep |
2e04ef769
|
196 197 |
* going around and around until something interesting happens. */ |
d0953d42c
|
198 |
int run_guest(struct lg_cpu *cpu, unsigned long __user *user) |
d7e28ffe6
|
199 |
{ |
bff672e63
|
200 |
/* We stop running once the Guest is dead. */ |
382ac6b3f
|
201 |
while (!cpu->lg->dead) { |
abd41f037
|
202 |
unsigned int irq; |
a32a8813d
|
203 |
bool more; |
abd41f037
|
204 |
|
cc6d4fbce
|
205 |
/* First we run any hypercalls the Guest wants done. */ |
73044f05a
|
206 207 |
if (cpu->hcall) do_hypercalls(cpu); |
cc6d4fbce
|
208 |
|
2e04ef769
|
209 210 |
/* * It's possible the Guest did a NOTIFY hypercall to the |
a91d74a3c
|
211 |
* Launcher. |
2e04ef769
|
212 |
*/ |
5e232f4f4
|
213 |
if (cpu->pending_notify) { |
a91d74a3c
|
214 215 216 217 |
/* * Does it just needs to write to a registered * eventfd (ie. the appropriate virtqueue thread)? */ |
df60aeef4
|
218 |
if (!send_notify_to_eventfd(cpu)) { |
a91d74a3c
|
219 |
/* OK, we tell the main Laucher. */ |
df60aeef4
|
220 221 222 223 |
if (put_user(cpu->pending_notify, user)) return -EFAULT; return sizeof(cpu->pending_notify); } |
d7e28ffe6
|
224 |
} |
0acf00014
|
225 226 227 228 229 230 |
/* * All long-lived kernel loops need to check with this horrible * thing called the freezer. If the Host is trying to suspend, * it stops us. */ try_to_freeze(); |
bff672e63
|
231 |
/* Check for signals */ |
d7e28ffe6
|
232 233 |
if (signal_pending(current)) return -ERESTARTSYS; |
2e04ef769
|
234 235 |
/* * Check if there are any interrupts which can be delivered now: |
a6bd8e130
|
236 |
* if so, this sets up the hander to be executed when we next |
2e04ef769
|
237 238 |
* run the Guest. */ |
a32a8813d
|
239 |
irq = interrupt_pending(cpu, &more); |
abd41f037
|
240 |
if (irq < LGUEST_IRQS) |
a32a8813d
|
241 |
try_deliver_interrupt(cpu, irq, more); |
d7e28ffe6
|
242 |
|
2e04ef769
|
243 |
/* |
2e04ef769
|
244 245 246 |
* Just make absolutely sure the Guest is still alive. One of * those hypercalls could have been fatal, for example. */ |
382ac6b3f
|
247 |
if (cpu->lg->dead) |
d7e28ffe6
|
248 |
break; |
2e04ef769
|
249 250 251 252 |
/* * If the Guest asked to be stopped, we sleep. The Guest's * clock timer will wake us. */ |
66686c2ab
|
253 |
if (cpu->halted) { |
d7e28ffe6
|
254 |
set_current_state(TASK_INTERRUPTIBLE); |
2e04ef769
|
255 256 257 258 |
/* * Just before we sleep, make sure no interrupt snuck in * which we should be doing. */ |
5dac051bc
|
259 |
if (interrupt_pending(cpu, &more) < LGUEST_IRQS) |
abd41f037
|
260 261 262 |
set_current_state(TASK_RUNNING); else schedule(); |
d7e28ffe6
|
263 264 |
continue; } |
2e04ef769
|
265 266 267 268 |
/* * OK, now we're ready to jump into the Guest. First we put up * the "Do Not Disturb" sign: */ |
d7e28ffe6
|
269 |
local_irq_disable(); |
625efab1c
|
270 |
/* Actually run the Guest until something happens. */ |
d0953d42c
|
271 |
lguest_arch_run_guest(cpu); |
bff672e63
|
272 273 |
/* Now we're ready to be interrupted or moved to other CPUs */ |
d7e28ffe6
|
274 |
local_irq_enable(); |
625efab1c
|
275 |
/* Now we deal with whatever happened to the Guest. */ |
73044f05a
|
276 |
lguest_arch_handle_trap(cpu); |
d7e28ffe6
|
277 |
} |
625efab1c
|
278 |
|
a6bd8e130
|
279 |
/* Special case: Guest is 'dead' but wants a reboot. */ |
382ac6b3f
|
280 |
if (cpu->lg->dead == ERR_PTR(-ERESTART)) |
ec04b13f6
|
281 |
return -ERESTART; |
a6bd8e130
|
282 |
|
bff672e63
|
283 |
/* The Guest is dead => "No such file or directory" */ |
d7e28ffe6
|
284 285 |
return -ENOENT; } |
bff672e63
|
286 287 288 289 290 291 292 293 |
/*H:000 * Welcome to the Host! * * By this point your brain has been tickled by the Guest code and numbed by * the Launcher code; prepare for it to be stretched by the Host code. This is * the heart. Let's begin at the initialization routine for the Host's lg * module. */ |
d7e28ffe6
|
294 295 296 |
static int __init init(void) { int err; |
bff672e63
|
297 |
/* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ |
b56e3215d
|
298 |
if (get_kernel_rpl() != 0) { |
5c55841d1
|
299 300 |
printk("lguest is afraid of being a guest "); |
d7e28ffe6
|
301 302 |
return -EPERM; } |
bff672e63
|
303 |
/* First we put the Switcher up in very high virtual memory. */ |
d7e28ffe6
|
304 305 |
err = map_switcher(); if (err) |
c18acd73f
|
306 |
goto out; |
d7e28ffe6
|
307 |
|
bff672e63
|
308 |
/* Now we set up the pagetable implementation for the Guests. */ |
d7e28ffe6
|
309 |
err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES); |
c18acd73f
|
310 311 |
if (err) goto unmap; |
bff672e63
|
312 |
|
c18acd73f
|
313 314 315 316 |
/* We might need to reserve an interrupt vector. */ err = init_interrupts(); if (err) goto free_pgtables; |
bff672e63
|
317 |
/* /dev/lguest needs to be registered. */ |
d7e28ffe6
|
318 |
err = lguest_device_init(); |
c18acd73f
|
319 320 |
if (err) goto free_interrupts; |
bff672e63
|
321 |
|
625efab1c
|
322 323 |
/* Finally we do some architecture-specific setup. */ lguest_arch_host_init(); |
bff672e63
|
324 325 |
/* All good! */ |
d7e28ffe6
|
326 |
return 0; |
c18acd73f
|
327 328 329 330 331 332 333 334 335 |
free_interrupts: free_interrupts(); free_pgtables: free_pagetables(); unmap: unmap_switcher(); out: return err; |
d7e28ffe6
|
336 |
} |
bff672e63
|
337 |
/* Cleaning up is just the same code, backwards. With a little French. */ |
d7e28ffe6
|
338 339 340 |
static void __exit fini(void) { lguest_device_remove(); |
c18acd73f
|
341 |
free_interrupts(); |
d7e28ffe6
|
342 343 |
free_pagetables(); unmap_switcher(); |
bff672e63
|
344 |
|
625efab1c
|
345 |
lguest_arch_host_fini(); |
d7e28ffe6
|
346 |
} |
625efab1c
|
347 |
/*:*/ |
d7e28ffe6
|
348 |
|
2e04ef769
|
349 350 351 352 |
/* * The Host side of lguest can be a module. This is a nice way for people to * play with it. */ |
d7e28ffe6
|
353 354 355 356 |
module_init(init); module_exit(fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); |