Blame view

drivers/lguest/page_tables.c 35.4 KB
2e04ef769   Rusty Russell   lguest: fix comme...
1
2
  /*P:700
   * The pagetable code, on the other hand, still shows the scars of
f938d2c89   Rusty Russell   lguest: documenta...
3
4
5
   * previous encounters.  It's functional, and as neat as it can be in the
   * circumstances, but be wary, for these things are subtle and break easily.
   * The Guest provides a virtual to physical mapping, but we can neither trust
a6bd8e130   Rusty Russell   lguest: comment d...
6
   * it nor use it: we verify and convert it here then point the CPU to the
2e04ef769   Rusty Russell   lguest: fix comme...
7
8
   * converted Guest pages when running the Guest.
  :*/
f938d2c89   Rusty Russell   lguest: documenta...
9
10
  
  /* Copyright (C) Rusty Russell IBM Corporation 2006.
d7e28ffe6   Rusty Russell   lguest: the host ...
11
12
   * GPL v2 and any later version */
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
13
  #include <linux/gfp.h>
d7e28ffe6   Rusty Russell   lguest: the host ...
14
15
16
17
18
  #include <linux/types.h>
  #include <linux/spinlock.h>
  #include <linux/random.h>
  #include <linux/percpu.h>
  #include <asm/tlbflush.h>
47436aa4a   Rusty Russell   Boot with virtual...
19
  #include <asm/uaccess.h>
d7e28ffe6   Rusty Russell   lguest: the host ...
20
  #include "lg.h"
2e04ef769   Rusty Russell   lguest: fix comme...
21
22
  /*M:008
   * We hold reference to pages, which prevents them from being swapped.
f56a384e9   Rusty Russell   lguest: documenta...
23
24
   * It'd be nice to have a callback in the "struct mm_struct" when Linux wants
   * to swap out.  If we had this, and a shrinker callback to trim PTE pages, we
2e04ef769   Rusty Russell   lguest: fix comme...
25
26
   * could probably consider launching Guests as non-root.
  :*/
f56a384e9   Rusty Russell   lguest: documenta...
27

bff672e63   Rusty Russell   lguest: documenta...
28
29
30
  /*H:300
   * The Page Table Code
   *
a91d74a3c   Rusty Russell   lguest: update co...
31
32
33
34
   * We use two-level page tables for the Guest, or three-level with PAE.  If
   * you're not entirely comfortable with virtual addresses, physical addresses
   * and page tables then I recommend you review arch/x86/lguest/boot.c's "Page
   * Table Handling" (with diagrams!).
bff672e63   Rusty Russell   lguest: documenta...
35
36
37
38
39
40
41
42
43
   *
   * The Guest keeps page tables, but we maintain the actual ones here: these are
   * called "shadow" page tables.  Which is a very Guest-centric name: these are
   * the real page tables the CPU uses, although we keep them up to date to
   * reflect the Guest's.  (See what I mean about weird naming?  Since when do
   * shadows reflect anything?)
   *
   * Anyway, this is the most complicated part of the Host code.  There are seven
   * parts to this:
e1e72965e   Rusty Russell   lguest: documenta...
44
45
46
   *  (i) Looking up a page table entry when the Guest faults,
   *  (ii) Making sure the Guest stack is mapped,
   *  (iii) Setting up a page table entry when the Guest tells us one has changed,
bff672e63   Rusty Russell   lguest: documenta...
47
   *  (iv) Switching page tables,
e1e72965e   Rusty Russell   lguest: documenta...
48
   *  (v) Flushing (throwing away) page tables,
bff672e63   Rusty Russell   lguest: documenta...
49
50
   *  (vi) Mapping the Switcher when the Guest is about to run,
   *  (vii) Setting up the page tables initially.
2e04ef769   Rusty Russell   lguest: fix comme...
51
  :*/
bff672e63   Rusty Russell   lguest: documenta...
52

2e04ef769   Rusty Russell   lguest: fix comme...
53
  /*
a91d74a3c   Rusty Russell   lguest: update co...
54
55
   * The Switcher uses the complete top PTE page.  That's 1024 PTE entries (4MB)
   * or 512 PTE entries with PAE (2MB).
2e04ef769   Rusty Russell   lguest: fix comme...
56
   */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
57
  #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
d7e28ffe6   Rusty Russell   lguest: the host ...
58

2e04ef769   Rusty Russell   lguest: fix comme...
59
60
61
62
  /*
   * For PAE we need the PMD index as well. We use the last 2MB, so we
   * will need the last pmd entry of the last pmd page.
   */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
63
64
65
66
67
68
69
70
  #ifdef CONFIG_X86_PAE
  #define SWITCHER_PMD_INDEX 	(PTRS_PER_PMD - 1)
  #define RESERVE_MEM 		2U
  #define CHECK_GPGD_MASK		_PAGE_PRESENT
  #else
  #define RESERVE_MEM 		4U
  #define CHECK_GPGD_MASK		_PAGE_TABLE
  #endif
2e04ef769   Rusty Russell   lguest: fix comme...
71
72
  /*
   * We actually need a separate PTE page for each CPU.  Remember that after the
bff672e63   Rusty Russell   lguest: documenta...
73
   * Switcher code itself comes two pages for each CPU, and we don't want this
2e04ef769   Rusty Russell   lguest: fix comme...
74
75
   * CPU's guest to see the pages of any other CPU.
   */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
76
  static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
d7e28ffe6   Rusty Russell   lguest: the host ...
77
  #define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
2e04ef769   Rusty Russell   lguest: fix comme...
78
79
  /*H:320
   * The page table code is curly enough to need helper functions to keep it
a91d74a3c   Rusty Russell   lguest: update co...
80
81
   * clear and clean.  The kernel itself provides many of them; one advantage
   * of insisting that the Guest and Host use the same CONFIG_PAE setting.
bff672e63   Rusty Russell   lguest: documenta...
82
   *
df29f43e6   Matias Zabaljauregui   Pagetables to use...
83
   * There are two functions which return pointers to the shadow (aka "real")
bff672e63   Rusty Russell   lguest: documenta...
84
85
86
   * page tables.
   *
   * spgd_addr() takes the virtual address and returns a pointer to the top-level
e1e72965e   Rusty Russell   lguest: documenta...
87
88
   * page directory entry (PGD) for that address.  Since we keep track of several
   * page tables, the "i" argument tells us which one we're interested in (it's
2e04ef769   Rusty Russell   lguest: fix comme...
89
90
   * usually the current one).
   */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
91
  static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
d7e28ffe6   Rusty Russell   lguest: the host ...
92
  {
df29f43e6   Matias Zabaljauregui   Pagetables to use...
93
  	unsigned int index = pgd_index(vaddr);
d7e28ffe6   Rusty Russell   lguest: the host ...
94

acdd0b629   Matias Zabaljauregui   lguest: PAE support
95
  #ifndef CONFIG_X86_PAE
bff672e63   Rusty Russell   lguest: documenta...
96
  	/* We kill any Guest trying to touch the Switcher addresses. */
d7e28ffe6   Rusty Russell   lguest: the host ...
97
  	if (index >= SWITCHER_PGD_INDEX) {
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
98
  		kill_guest(cpu, "attempt to access switcher pages");
d7e28ffe6   Rusty Russell   lguest: the host ...
99
100
  		index = 0;
  	}
acdd0b629   Matias Zabaljauregui   lguest: PAE support
101
  #endif
bff672e63   Rusty Russell   lguest: documenta...
102
  	/* Return a pointer index'th pgd entry for the i'th page table. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
103
  	return &cpu->lg->pgdirs[i].pgdir[index];
d7e28ffe6   Rusty Russell   lguest: the host ...
104
  }
acdd0b629   Matias Zabaljauregui   lguest: PAE support
105
  #ifdef CONFIG_X86_PAE
2e04ef769   Rusty Russell   lguest: fix comme...
106
107
  /*
   * This routine then takes the PGD entry given above, which contains the
acdd0b629   Matias Zabaljauregui   lguest: PAE support
108
   * address of the PMD page.  It then returns a pointer to the PMD entry for the
2e04ef769   Rusty Russell   lguest: fix comme...
109
110
   * given address.
   */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
  {
  	unsigned int index = pmd_index(vaddr);
  	pmd_t *page;
  
  	/* We kill any Guest trying to touch the Switcher addresses. */
  	if (pgd_index(vaddr) == SWITCHER_PGD_INDEX &&
  					index >= SWITCHER_PMD_INDEX) {
  		kill_guest(cpu, "attempt to access switcher pages");
  		index = 0;
  	}
  
  	/* You should never call this if the PGD entry wasn't valid */
  	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
  	page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
  
  	return &page[index];
  }
  #endif
2e04ef769   Rusty Russell   lguest: fix comme...
130
131
  /*
   * This routine then takes the page directory entry returned above, which
e1e72965e   Rusty Russell   lguest: documenta...
132
   * contains the address of the page table entry (PTE) page.  It then returns a
2e04ef769   Rusty Russell   lguest: fix comme...
133
134
   * pointer to the PTE entry for the given address.
   */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
135
  static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
d7e28ffe6   Rusty Russell   lguest: the host ...
136
  {
acdd0b629   Matias Zabaljauregui   lguest: PAE support
137
138
139
140
141
142
143
  #ifdef CONFIG_X86_PAE
  	pmd_t *pmd = spmd_addr(cpu, spgd, vaddr);
  	pte_t *page = __va(pmd_pfn(*pmd) << PAGE_SHIFT);
  
  	/* You should never call this if the PMD entry wasn't valid */
  	BUG_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT));
  #else
df29f43e6   Matias Zabaljauregui   Pagetables to use...
144
  	pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
bff672e63   Rusty Russell   lguest: documenta...
145
  	/* You should never call this if the PGD entry wasn't valid */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
146
  	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
acdd0b629   Matias Zabaljauregui   lguest: PAE support
147
  #endif
90603d15f   Matias Zabaljauregui   lguest: use nativ...
148
  	return &page[pte_index(vaddr)];
d7e28ffe6   Rusty Russell   lguest: the host ...
149
  }
2e04ef769   Rusty Russell   lguest: fix comme...
150
  /*
9f54288de   Rusty Russell   lguest: update co...
151
   * These functions are just like the above, except they access the Guest
2e04ef769   Rusty Russell   lguest: fix comme...
152
153
   * page tables.  Hence they return a Guest address.
   */
1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
154
  static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
d7e28ffe6   Rusty Russell   lguest: the host ...
155
  {
df29f43e6   Matias Zabaljauregui   Pagetables to use...
156
  	unsigned int index = vaddr >> (PGDIR_SHIFT);
1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
157
  	return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
d7e28ffe6   Rusty Russell   lguest: the host ...
158
  }
acdd0b629   Matias Zabaljauregui   lguest: PAE support
159
  #ifdef CONFIG_X86_PAE
a91d74a3c   Rusty Russell   lguest: update co...
160
  /* Follow the PGD to the PMD. */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
161
  static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr)
d7e28ffe6   Rusty Russell   lguest: the host ...
162
  {
df29f43e6   Matias Zabaljauregui   Pagetables to use...
163
164
  	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
  	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
acdd0b629   Matias Zabaljauregui   lguest: PAE support
165
166
  	return gpage + pmd_index(vaddr) * sizeof(pmd_t);
  }
acdd0b629   Matias Zabaljauregui   lguest: PAE support
167

a91d74a3c   Rusty Russell   lguest: update co...
168
  /* Follow the PMD to the PTE. */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
169
  static unsigned long gpte_addr(struct lg_cpu *cpu,
92b4d8df8   Rusty Russell   lguest: PAE fixes
170
  			       pmd_t gpmd, unsigned long vaddr)
acdd0b629   Matias Zabaljauregui   lguest: PAE support
171
  {
92b4d8df8   Rusty Russell   lguest: PAE fixes
172
  	unsigned long gpage = pmd_pfn(gpmd) << PAGE_SHIFT;
acdd0b629   Matias Zabaljauregui   lguest: PAE support
173

acdd0b629   Matias Zabaljauregui   lguest: PAE support
174
  	BUG_ON(!(pmd_flags(gpmd) & _PAGE_PRESENT));
92b4d8df8   Rusty Russell   lguest: PAE fixes
175
176
  	return gpage + pte_index(vaddr) * sizeof(pte_t);
  }
acdd0b629   Matias Zabaljauregui   lguest: PAE support
177
  #else
a91d74a3c   Rusty Russell   lguest: update co...
178
  /* Follow the PGD to the PTE (no mid-level for !PAE). */
92b4d8df8   Rusty Russell   lguest: PAE fixes
179
180
181
182
183
184
  static unsigned long gpte_addr(struct lg_cpu *cpu,
  				pgd_t gpgd, unsigned long vaddr)
  {
  	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
  
  	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
90603d15f   Matias Zabaljauregui   lguest: use nativ...
185
  	return gpage + pte_index(vaddr) * sizeof(pte_t);
d7e28ffe6   Rusty Russell   lguest: the host ...
186
  }
92b4d8df8   Rusty Russell   lguest: PAE fixes
187
  #endif
a6bd8e130   Rusty Russell   lguest: comment d...
188
  /*:*/
9f54288de   Rusty Russell   lguest: update co...
189
  /*M:007
2e04ef769   Rusty Russell   lguest: fix comme...
190
191
192
   * get_pfn is slow: we could probably try to grab batches of pages here as
   * an optimization (ie. pre-faulting).
  :*/
d7e28ffe6   Rusty Russell   lguest: the host ...
193

2e04ef769   Rusty Russell   lguest: fix comme...
194
195
  /*H:350
   * This routine takes a page number given by the Guest and converts it to
bff672e63   Rusty Russell   lguest: documenta...
196
197
198
199
200
   * an actual, physical page number.  It can fail for several reasons: the
   * virtual address might not be mapped by the Launcher, the write flag is set
   * and the page is read-only, or the write flag was set and the page was
   * shared so had to be copied, but we ran out of memory.
   *
a6bd8e130   Rusty Russell   lguest: comment d...
201
   * This holds a reference to the page, so release_pte() is careful to put that
2e04ef769   Rusty Russell   lguest: fix comme...
202
203
   * back.
   */
d7e28ffe6   Rusty Russell   lguest: the host ...
204
205
206
  static unsigned long get_pfn(unsigned long virtpfn, int write)
  {
  	struct page *page;
71a3f4edc   Rusty Russell   lguest: use get_u...
207
208
209
210
  
  	/* gup me one page at this address please! */
  	if (get_user_pages_fast(virtpfn << PAGE_SHIFT, 1, write, &page) == 1)
  		return page_to_pfn(page);
bff672e63   Rusty Russell   lguest: documenta...
211
  	/* This value indicates failure. */
71a3f4edc   Rusty Russell   lguest: use get_u...
212
  	return -1UL;
d7e28ffe6   Rusty Russell   lguest: the host ...
213
  }
2e04ef769   Rusty Russell   lguest: fix comme...
214
215
  /*H:340
   * Converting a Guest page table entry to a shadow (ie. real) page table
bff672e63   Rusty Russell   lguest: documenta...
216
217
   * entry can be a little tricky.  The flags are (almost) the same, but the
   * Guest PTE contains a virtual page number: the CPU needs the real page
2e04ef769   Rusty Russell   lguest: fix comme...
218
219
   * number.
   */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
220
  static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write)
d7e28ffe6   Rusty Russell   lguest: the host ...
221
  {
df29f43e6   Matias Zabaljauregui   Pagetables to use...
222
  	unsigned long pfn, base, flags;
d7e28ffe6   Rusty Russell   lguest: the host ...
223

2e04ef769   Rusty Russell   lguest: fix comme...
224
225
  	/*
  	 * The Guest sets the global flag, because it thinks that it is using
bff672e63   Rusty Russell   lguest: documenta...
226
227
  	 * PGE.  We only told it to use PGE so it would tell us whether it was
  	 * flushing a kernel mapping or a userspace mapping.  We don't actually
2e04ef769   Rusty Russell   lguest: fix comme...
228
229
  	 * use the global bit, so throw it away.
  	 */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
230
  	flags = (pte_flags(gpte) & ~_PAGE_GLOBAL);
bff672e63   Rusty Russell   lguest: documenta...
231

3c6b5bfa3   Rusty Russell   Introduce guest m...
232
  	/* The Guest's pages are offset inside the Launcher. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
233
  	base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE;
3c6b5bfa3   Rusty Russell   Introduce guest m...
234

2e04ef769   Rusty Russell   lguest: fix comme...
235
236
  	/*
  	 * We need a temporary "unsigned long" variable to hold the answer from
bff672e63   Rusty Russell   lguest: documenta...
237
238
  	 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't
  	 * fit in spte.pfn.  get_pfn() finds the real physical number of the
2e04ef769   Rusty Russell   lguest: fix comme...
239
240
  	 * page, given the virtual number.
  	 */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
241
  	pfn = get_pfn(base + pte_pfn(gpte), write);
d7e28ffe6   Rusty Russell   lguest: the host ...
242
  	if (pfn == -1UL) {
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
243
  		kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte));
2e04ef769   Rusty Russell   lguest: fix comme...
244
245
  		/*
  		 * When we destroy the Guest, we'll go through the shadow page
bff672e63   Rusty Russell   lguest: documenta...
246
  		 * tables and release_pte() them.  Make sure we don't think
2e04ef769   Rusty Russell   lguest: fix comme...
247
248
  		 * this one is valid!
  		 */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
249
  		flags = 0;
d7e28ffe6   Rusty Russell   lguest: the host ...
250
  	}
df29f43e6   Matias Zabaljauregui   Pagetables to use...
251
252
  	/* Now we assemble our shadow PTE from the page number and flags. */
  	return pfn_pte(pfn, __pgprot(flags));
d7e28ffe6   Rusty Russell   lguest: the host ...
253
  }
bff672e63   Rusty Russell   lguest: documenta...
254
  /*H:460 And to complete the chain, release_pte() looks like this: */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
255
  static void release_pte(pte_t pte)
d7e28ffe6   Rusty Russell   lguest: the host ...
256
  {
2e04ef769   Rusty Russell   lguest: fix comme...
257
258
259
260
  	/*
  	 * Remember that get_user_pages_fast() took a reference to the page, in
  	 * get_pfn()?  We have to put it back now.
  	 */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
261
  	if (pte_flags(pte) & _PAGE_PRESENT)
90603d15f   Matias Zabaljauregui   lguest: use nativ...
262
  		put_page(pte_page(pte));
d7e28ffe6   Rusty Russell   lguest: the host ...
263
  }
bff672e63   Rusty Russell   lguest: documenta...
264
  /*:*/
d7e28ffe6   Rusty Russell   lguest: the host ...
265

382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
266
  static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
d7e28ffe6   Rusty Russell   lguest: the host ...
267
  {
31f4b46ec   Ahmed S. Darwish   lguest: accept gu...
268
269
  	if ((pte_flags(gpte) & _PAGE_PSE) ||
  	    pte_pfn(gpte) >= cpu->lg->pfn_limit)
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
270
  		kill_guest(cpu, "bad page table entry");
d7e28ffe6   Rusty Russell   lguest: the host ...
271
  }
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
272
  static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
d7e28ffe6   Rusty Russell   lguest: the host ...
273
  {
acdd0b629   Matias Zabaljauregui   lguest: PAE support
274
  	if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) ||
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
275
276
  	   (pgd_pfn(gpgd) >= cpu->lg->pfn_limit))
  		kill_guest(cpu, "bad page directory entry");
d7e28ffe6   Rusty Russell   lguest: the host ...
277
  }
acdd0b629   Matias Zabaljauregui   lguest: PAE support
278
279
280
281
282
283
284
285
  #ifdef CONFIG_X86_PAE
  static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
  {
  	if ((pmd_flags(gpmd) & ~_PAGE_TABLE) ||
  	   (pmd_pfn(gpmd) >= cpu->lg->pfn_limit))
  		kill_guest(cpu, "bad page middle directory entry");
  }
  #endif
bff672e63   Rusty Russell   lguest: documenta...
286
  /*H:330
e1e72965e   Rusty Russell   lguest: documenta...
287
   * (i) Looking up a page table entry when the Guest faults.
bff672e63   Rusty Russell   lguest: documenta...
288
289
290
291
292
293
294
   *
   * We saw this call in run_guest(): when we see a page fault in the Guest, we
   * come here.  That's because we only set up the shadow page tables lazily as
   * they're needed, so we get page faults all the time and quietly fix them up
   * and return to the Guest without it knowing.
   *
   * If we fixed up the fault (ie. we mapped the address), this routine returns
2e04ef769   Rusty Russell   lguest: fix comme...
295
296
   * true.  Otherwise, it was a real fault and we need to tell the Guest.
   */
df1693abc   Matias Zabaljauregui   lguest: use bool ...
297
  bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
d7e28ffe6   Rusty Russell   lguest: the host ...
298
  {
df29f43e6   Matias Zabaljauregui   Pagetables to use...
299
300
  	pgd_t gpgd;
  	pgd_t *spgd;
d7e28ffe6   Rusty Russell   lguest: the host ...
301
  	unsigned long gpte_ptr;
df29f43e6   Matias Zabaljauregui   Pagetables to use...
302
303
  	pte_t gpte;
  	pte_t *spte;
d7e28ffe6   Rusty Russell   lguest: the host ...
304

a91d74a3c   Rusty Russell   lguest: update co...
305
  	/* Mid level for PAE. */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
306
307
308
309
  #ifdef CONFIG_X86_PAE
  	pmd_t *spmd;
  	pmd_t gpmd;
  #endif
bff672e63   Rusty Russell   lguest: documenta...
310
  	/* First step: get the top-level Guest page table entry. */
5dea1c88e   Rusty Russell   lguest: use a spe...
311
312
313
314
315
316
317
318
319
  	if (unlikely(cpu->linear_pages)) {
  		/* Faking up a linear mapping. */
  		gpgd = __pgd(CHECK_GPGD_MASK);
  	} else {
  		gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
  		/* Toplevel not present?  We can't map it in. */
  		if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
  			return false;
  	}
d7e28ffe6   Rusty Russell   lguest: the host ...
320

bff672e63   Rusty Russell   lguest: documenta...
321
  	/* Now look at the matching shadow entry. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
322
  	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
df29f43e6   Matias Zabaljauregui   Pagetables to use...
323
  	if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
bff672e63   Rusty Russell   lguest: documenta...
324
  		/* No shadow entry: allocate a new shadow PTE page. */
d7e28ffe6   Rusty Russell   lguest: the host ...
325
  		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
2e04ef769   Rusty Russell   lguest: fix comme...
326
327
328
329
  		/*
  		 * This is not really the Guest's fault, but killing it is
  		 * simple for this corner case.
  		 */
d7e28ffe6   Rusty Russell   lguest: the host ...
330
  		if (!ptepage) {
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
331
  			kill_guest(cpu, "out of memory allocating pte page");
df1693abc   Matias Zabaljauregui   lguest: use bool ...
332
  			return false;
d7e28ffe6   Rusty Russell   lguest: the host ...
333
  		}
bff672e63   Rusty Russell   lguest: documenta...
334
  		/* We check that the Guest pgd is OK. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
335
  		check_gpgd(cpu, gpgd);
2e04ef769   Rusty Russell   lguest: fix comme...
336
337
338
339
  		/*
  		 * And we copy the flags to the shadow PGD entry.  The page
  		 * number in the shadow PGD is the page we just allocated.
  		 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
340
  		set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
d7e28ffe6   Rusty Russell   lguest: the host ...
341
  	}
acdd0b629   Matias Zabaljauregui   lguest: PAE support
342
  #ifdef CONFIG_X86_PAE
5dea1c88e   Rusty Russell   lguest: use a spe...
343
344
345
346
347
348
349
350
351
  	if (unlikely(cpu->linear_pages)) {
  		/* Faking up a linear mapping. */
  		gpmd = __pmd(_PAGE_TABLE);
  	} else {
  		gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
  		/* Middle level not present?  We can't map it in. */
  		if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
  			return false;
  	}
acdd0b629   Matias Zabaljauregui   lguest: PAE support
352
353
354
355
356
357
358
  
  	/* Now look at the matching shadow entry. */
  	spmd = spmd_addr(cpu, *spgd, vaddr);
  
  	if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
  		/* No shadow entry: allocate a new shadow PTE page. */
  		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
2e04ef769   Rusty Russell   lguest: fix comme...
359
360
361
362
  		/*
  		 * This is not really the Guest's fault, but killing it is
  		 * simple for this corner case.
  		 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
363
364
365
366
367
368
369
  		if (!ptepage) {
  			kill_guest(cpu, "out of memory allocating pte page");
  			return false;
  		}
  
  		/* We check that the Guest pmd is OK. */
  		check_gpmd(cpu, gpmd);
2e04ef769   Rusty Russell   lguest: fix comme...
370
371
372
373
  		/*
  		 * And we copy the flags to the shadow PMD entry.  The page
  		 * number in the shadow PMD is the page we just allocated.
  		 */
4c1ea3dd7   Rusty Russell   lguest: use set_p...
374
  		set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
acdd0b629   Matias Zabaljauregui   lguest: PAE support
375
  	}
92b4d8df8   Rusty Russell   lguest: PAE fixes
376

2e04ef769   Rusty Russell   lguest: fix comme...
377
378
379
380
  	/*
  	 * OK, now we look at the lower level in the Guest page table: keep its
  	 * address, because we might update it later.
  	 */
92b4d8df8   Rusty Russell   lguest: PAE fixes
381
382
  	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
  #else
2e04ef769   Rusty Russell   lguest: fix comme...
383
384
385
386
  	/*
  	 * OK, now we look at the lower level in the Guest page table: keep its
  	 * address, because we might update it later.
  	 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
387
  	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
92b4d8df8   Rusty Russell   lguest: PAE fixes
388
  #endif
a91d74a3c   Rusty Russell   lguest: update co...
389

5dea1c88e   Rusty Russell   lguest: use a spe...
390
391
392
393
394
395
396
  	if (unlikely(cpu->linear_pages)) {
  		/* Linear?  Make up a PTE which points to same page. */
  		gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
  	} else {
  		/* Read the actual PTE value. */
  		gpte = lgread(cpu, gpte_ptr, pte_t);
  	}
d7e28ffe6   Rusty Russell   lguest: the host ...
397

bff672e63   Rusty Russell   lguest: documenta...
398
  	/* If this page isn't in the Guest page tables, we can't page it in. */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
399
  	if (!(pte_flags(gpte) & _PAGE_PRESENT))
df1693abc   Matias Zabaljauregui   lguest: use bool ...
400
  		return false;
d7e28ffe6   Rusty Russell   lguest: the host ...
401

2e04ef769   Rusty Russell   lguest: fix comme...
402
403
404
405
  	/*
  	 * Check they're not trying to write to a page the Guest wants
  	 * read-only (bit 2 of errcode == write).
  	 */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
406
  	if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW))
df1693abc   Matias Zabaljauregui   lguest: use bool ...
407
  		return false;
d7e28ffe6   Rusty Russell   lguest: the host ...
408

e1e72965e   Rusty Russell   lguest: documenta...
409
  	/* User access to a kernel-only page? (bit 3 == user access) */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
410
  	if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
df1693abc   Matias Zabaljauregui   lguest: use bool ...
411
  		return false;
d7e28ffe6   Rusty Russell   lguest: the host ...
412

2e04ef769   Rusty Russell   lguest: fix comme...
413
414
415
416
  	/*
  	 * Check that the Guest PTE flags are OK, and the page number is below
  	 * the pfn_limit (ie. not mapping the Launcher binary).
  	 */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
417
  	check_gpte(cpu, gpte);
e1e72965e   Rusty Russell   lguest: documenta...
418

bff672e63   Rusty Russell   lguest: documenta...
419
  	/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
420
  	gpte = pte_mkyoung(gpte);
d7e28ffe6   Rusty Russell   lguest: the host ...
421
  	if (errcode & 2)
df29f43e6   Matias Zabaljauregui   Pagetables to use...
422
  		gpte = pte_mkdirty(gpte);
d7e28ffe6   Rusty Russell   lguest: the host ...
423

bff672e63   Rusty Russell   lguest: documenta...
424
  	/* Get the pointer to the shadow PTE entry we're going to set. */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
425
  	spte = spte_addr(cpu, *spgd, vaddr);
2e04ef769   Rusty Russell   lguest: fix comme...
426
427
428
429
430
  
  	/*
  	 * If there was a valid shadow PTE entry here before, we release it.
  	 * This can happen with a write to a previously read-only entry.
  	 */
d7e28ffe6   Rusty Russell   lguest: the host ...
431
  	release_pte(*spte);
2e04ef769   Rusty Russell   lguest: fix comme...
432
433
434
435
  	/*
  	 * If this is a write, we insist that the Guest page is writable (the
  	 * final arg to gpte_to_spte()).
  	 */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
436
  	if (pte_dirty(gpte))
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
437
  		*spte = gpte_to_spte(cpu, gpte, 1);
df29f43e6   Matias Zabaljauregui   Pagetables to use...
438
  	else
2e04ef769   Rusty Russell   lguest: fix comme...
439
440
  		/*
  		 * If this is a read, don't set the "writable" bit in the page
bff672e63   Rusty Russell   lguest: documenta...
441
  		 * table entry, even if the Guest says it's writable.  That way
e1e72965e   Rusty Russell   lguest: documenta...
442
  		 * we will come back here when a write does actually occur, so
2e04ef769   Rusty Russell   lguest: fix comme...
443
444
  		 * we can update the Guest's _PAGE_DIRTY flag.
  		 */
4c1ea3dd7   Rusty Russell   lguest: use set_p...
445
  		set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
d7e28ffe6   Rusty Russell   lguest: the host ...
446

2e04ef769   Rusty Russell   lguest: fix comme...
447
448
449
450
  	/*
  	 * Finally, we write the Guest PTE entry back: we've set the
  	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
  	 */
5dea1c88e   Rusty Russell   lguest: use a spe...
451
452
  	if (likely(!cpu->linear_pages))
  		lgwrite(cpu, gpte_ptr, pte_t, gpte);
bff672e63   Rusty Russell   lguest: documenta...
453

2e04ef769   Rusty Russell   lguest: fix comme...
454
455
  	/*
  	 * The fault is fixed, the page table is populated, the mapping
e1e72965e   Rusty Russell   lguest: documenta...
456
457
  	 * manipulated, the result returned and the code complete.  A small
  	 * delay and a trace of alliteration are the only indications the Guest
2e04ef769   Rusty Russell   lguest: fix comme...
458
459
  	 * has that a page fault occurred at all.
  	 */
df1693abc   Matias Zabaljauregui   lguest: use bool ...
460
  	return true;
d7e28ffe6   Rusty Russell   lguest: the host ...
461
  }
e1e72965e   Rusty Russell   lguest: documenta...
462
463
  /*H:360
   * (ii) Making sure the Guest stack is mapped.
bff672e63   Rusty Russell   lguest: documenta...
464
   *
e1e72965e   Rusty Russell   lguest: documenta...
465
466
467
468
   * Remember that direct traps into the Guest need a mapped Guest kernel stack.
   * pin_stack_pages() calls us here: we could simply call demand_page(), but as
   * we've seen that logic is quite long, and usually the stack pages are already
   * mapped, so it's overkill.
bff672e63   Rusty Russell   lguest: documenta...
469
470
   *
   * This is a quick version which answers the question: is this virtual address
2e04ef769   Rusty Russell   lguest: fix comme...
471
472
   * mapped by the shadow page tables, and is it writable?
   */
df1693abc   Matias Zabaljauregui   lguest: use bool ...
473
  static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
d7e28ffe6   Rusty Russell   lguest: the host ...
474
  {
df29f43e6   Matias Zabaljauregui   Pagetables to use...
475
  	pgd_t *spgd;
d7e28ffe6   Rusty Russell   lguest: the host ...
476
  	unsigned long flags;
acdd0b629   Matias Zabaljauregui   lguest: PAE support
477
478
479
  #ifdef CONFIG_X86_PAE
  	pmd_t *spmd;
  #endif
e1e72965e   Rusty Russell   lguest: documenta...
480
  	/* Look at the current top level entry: is it present? */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
481
  	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
df29f43e6   Matias Zabaljauregui   Pagetables to use...
482
  	if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
df1693abc   Matias Zabaljauregui   lguest: use bool ...
483
  		return false;
d7e28ffe6   Rusty Russell   lguest: the host ...
484

acdd0b629   Matias Zabaljauregui   lguest: PAE support
485
486
487
488
489
  #ifdef CONFIG_X86_PAE
  	spmd = spmd_addr(cpu, *spgd, vaddr);
  	if (!(pmd_flags(*spmd) & _PAGE_PRESENT))
  		return false;
  #endif
2e04ef769   Rusty Russell   lguest: fix comme...
490
491
492
493
  	/*
  	 * Check the flags on the pte entry itself: it must be present and
  	 * writable.
  	 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
494
  	flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr)));
df29f43e6   Matias Zabaljauregui   Pagetables to use...
495

d7e28ffe6   Rusty Russell   lguest: the host ...
496
497
  	return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
  }
2e04ef769   Rusty Russell   lguest: fix comme...
498
499
  /*
   * So, when pin_stack_pages() asks us to pin a page, we check if it's already
bff672e63   Rusty Russell   lguest: documenta...
500
   * in the page tables, and if not, we call demand_page() with error code 2
2e04ef769   Rusty Russell   lguest: fix comme...
501
502
   * (meaning "write").
   */
1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
503
  void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
d7e28ffe6   Rusty Russell   lguest: the host ...
504
  {
1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
505
  	if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2))
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
506
  		kill_guest(cpu, "bad stack page %#lx", vaddr);
d7e28ffe6   Rusty Russell   lguest: the host ...
507
  }
a91d74a3c   Rusty Russell   lguest: update co...
508
  /*:*/
d7e28ffe6   Rusty Russell   lguest: the host ...
509

acdd0b629   Matias Zabaljauregui   lguest: PAE support
510
511
512
513
514
515
516
517
518
519
520
521
522
  #ifdef CONFIG_X86_PAE
  static void release_pmd(pmd_t *spmd)
  {
  	/* If the entry's not present, there's nothing to release. */
  	if (pmd_flags(*spmd) & _PAGE_PRESENT) {
  		unsigned int i;
  		pte_t *ptepage = __va(pmd_pfn(*spmd) << PAGE_SHIFT);
  		/* For each entry in the page, we might need to release it. */
  		for (i = 0; i < PTRS_PER_PTE; i++)
  			release_pte(ptepage[i]);
  		/* Now we can free the page of PTEs */
  		free_page((long)ptepage);
  		/* And zero out the PMD entry so we never release it twice. */
4c1ea3dd7   Rusty Russell   lguest: use set_p...
523
  		set_pmd(spmd, __pmd(0));
acdd0b629   Matias Zabaljauregui   lguest: PAE support
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
  	}
  }
  
  static void release_pgd(pgd_t *spgd)
  {
  	/* If the entry's not present, there's nothing to release. */
  	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
  		unsigned int i;
  		pmd_t *pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
  
  		for (i = 0; i < PTRS_PER_PMD; i++)
  			release_pmd(&pmdpage[i]);
  
  		/* Now we can free the page of PMDs */
  		free_page((long)pmdpage);
  		/* And zero out the PGD entry so we never release it twice. */
  		set_pgd(spgd, __pgd(0));
  	}
  }
  
  #else /* !CONFIG_X86_PAE */
a91d74a3c   Rusty Russell   lguest: update co...
545
546
547
548
549
  /*H:450
   * If we chase down the release_pgd() code, the non-PAE version looks like
   * this.  The PAE version is almost identical, but instead of calling
   * release_pte it calls release_pmd(), which looks much like this.
   */
90603d15f   Matias Zabaljauregui   lguest: use nativ...
550
  static void release_pgd(pgd_t *spgd)
d7e28ffe6   Rusty Russell   lguest: the host ...
551
  {
bff672e63   Rusty Russell   lguest: documenta...
552
  	/* If the entry's not present, there's nothing to release. */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
553
  	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
d7e28ffe6   Rusty Russell   lguest: the host ...
554
  		unsigned int i;
2e04ef769   Rusty Russell   lguest: fix comme...
555
556
  		/*
  		 * Converting the pfn to find the actual PTE page is easy: turn
bff672e63   Rusty Russell   lguest: documenta...
557
  		 * the page number into a physical address, then convert to a
2e04ef769   Rusty Russell   lguest: fix comme...
558
559
  		 * virtual address (easy for kernel pages like this one).
  		 */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
560
  		pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
bff672e63   Rusty Russell   lguest: documenta...
561
  		/* For each entry in the page, we might need to release it. */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
562
  		for (i = 0; i < PTRS_PER_PTE; i++)
d7e28ffe6   Rusty Russell   lguest: the host ...
563
  			release_pte(ptepage[i]);
bff672e63   Rusty Russell   lguest: documenta...
564
  		/* Now we can free the page of PTEs */
d7e28ffe6   Rusty Russell   lguest: the host ...
565
  		free_page((long)ptepage);
e1e72965e   Rusty Russell   lguest: documenta...
566
  		/* And zero out the PGD entry so we never release it twice. */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
567
  		*spgd = __pgd(0);
d7e28ffe6   Rusty Russell   lguest: the host ...
568
569
  	}
  }
acdd0b629   Matias Zabaljauregui   lguest: PAE support
570
  #endif
2e04ef769   Rusty Russell   lguest: fix comme...
571
572
573
  
  /*H:445
   * We saw flush_user_mappings() twice: once from the flush_user_mappings()
e1e72965e   Rusty Russell   lguest: documenta...
574
   * hypercall and once in new_pgdir() when we re-used a top-level pgdir page.
2e04ef769   Rusty Russell   lguest: fix comme...
575
576
   * It simply releases every PTE page from 0 up to the Guest's kernel address.
   */
d7e28ffe6   Rusty Russell   lguest: the host ...
577
578
579
  static void flush_user_mappings(struct lguest *lg, int idx)
  {
  	unsigned int i;
bff672e63   Rusty Russell   lguest: documenta...
580
  	/* Release every pgd entry up to the kernel's address. */
47436aa4a   Rusty Russell   Boot with virtual...
581
  	for (i = 0; i < pgd_index(lg->kernel_address); i++)
90603d15f   Matias Zabaljauregui   lguest: use nativ...
582
  		release_pgd(lg->pgdirs[idx].pgdir + i);
d7e28ffe6   Rusty Russell   lguest: the host ...
583
  }
2e04ef769   Rusty Russell   lguest: fix comme...
584
585
  /*H:440
   * (v) Flushing (throwing away) page tables,
e1e72965e   Rusty Russell   lguest: documenta...
586
587
   *
   * The Guest has a hypercall to throw away the page tables: it's used when a
2e04ef769   Rusty Russell   lguest: fix comme...
588
589
   * large number of mappings have been changed.
   */
1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
590
  void guest_pagetable_flush_user(struct lg_cpu *cpu)
d7e28ffe6   Rusty Russell   lguest: the host ...
591
  {
bff672e63   Rusty Russell   lguest: documenta...
592
  	/* Drop the userspace part of the current page table. */
1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
593
  	flush_user_mappings(cpu->lg, cpu->cpu_pgd);
d7e28ffe6   Rusty Russell   lguest: the host ...
594
  }
bff672e63   Rusty Russell   lguest: documenta...
595
  /*:*/
d7e28ffe6   Rusty Russell   lguest: the host ...
596

47436aa4a   Rusty Russell   Boot with virtual...
597
  /* We walk down the guest page tables to get a guest-physical address */
1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
598
  unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
47436aa4a   Rusty Russell   Boot with virtual...
599
600
601
  {
  	pgd_t gpgd;
  	pte_t gpte;
acdd0b629   Matias Zabaljauregui   lguest: PAE support
602
603
604
  #ifdef CONFIG_X86_PAE
  	pmd_t gpmd;
  #endif
5dea1c88e   Rusty Russell   lguest: use a spe...
605
606
607
608
  
  	/* Still not set up?  Just map 1:1. */
  	if (unlikely(cpu->linear_pages))
  		return vaddr;
47436aa4a   Rusty Russell   Boot with virtual...
609
  	/* First step: get the top-level Guest page table entry. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
610
  	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
47436aa4a   Rusty Russell   Boot with virtual...
611
  	/* Toplevel not present?  We can't map it in. */
6afbdd059   Rusty Russell   lguest: fix spuri...
612
  	if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) {
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
613
  		kill_guest(cpu, "Bad address %#lx", vaddr);
6afbdd059   Rusty Russell   lguest: fix spuri...
614
615
  		return -1UL;
  	}
47436aa4a   Rusty Russell   Boot with virtual...
616

acdd0b629   Matias Zabaljauregui   lguest: PAE support
617
618
619
620
  #ifdef CONFIG_X86_PAE
  	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
  	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
  		kill_guest(cpu, "Bad address %#lx", vaddr);
92b4d8df8   Rusty Russell   lguest: PAE fixes
621
622
  	gpte = lgread(cpu, gpte_addr(cpu, gpmd, vaddr), pte_t);
  #else
acdd0b629   Matias Zabaljauregui   lguest: PAE support
623
  	gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
92b4d8df8   Rusty Russell   lguest: PAE fixes
624
  #endif
47436aa4a   Rusty Russell   Boot with virtual...
625
  	if (!(pte_flags(gpte) & _PAGE_PRESENT))
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
626
  		kill_guest(cpu, "Bad address %#lx", vaddr);
47436aa4a   Rusty Russell   Boot with virtual...
627
628
629
  
  	return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
  }
2e04ef769   Rusty Russell   lguest: fix comme...
630
631
  /*
   * We keep several page tables.  This is a simple routine to find the page
bff672e63   Rusty Russell   lguest: documenta...
632
   * table (if any) corresponding to this top-level address the Guest has given
2e04ef769   Rusty Russell   lguest: fix comme...
633
634
   * us.
   */
d7e28ffe6   Rusty Russell   lguest: the host ...
635
636
637
638
  static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
  {
  	unsigned int i;
  	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
4357bd945   Rusty Russell   lguest: Revert 1c...
639
  		if (lg->pgdirs[i].pgdir && lg->pgdirs[i].gpgdir == pgtable)
d7e28ffe6   Rusty Russell   lguest: the host ...
640
641
642
  			break;
  	return i;
  }
2e04ef769   Rusty Russell   lguest: fix comme...
643
644
  /*H:435
   * And this is us, creating the new page directory.  If we really do
bff672e63   Rusty Russell   lguest: documenta...
645
   * allocate a new one (and so the kernel parts are not there), we set
2e04ef769   Rusty Russell   lguest: fix comme...
646
647
   * blank_pgdir.
   */
1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
648
  static unsigned int new_pgdir(struct lg_cpu *cpu,
ee3db0f2b   Rusty Russell   Rename "cr3" to "...
649
  			      unsigned long gpgdir,
d7e28ffe6   Rusty Russell   lguest: the host ...
650
651
652
  			      int *blank_pgdir)
  {
  	unsigned int next;
acdd0b629   Matias Zabaljauregui   lguest: PAE support
653
654
655
  #ifdef CONFIG_X86_PAE
  	pmd_t *pmd_table;
  #endif
d7e28ffe6   Rusty Russell   lguest: the host ...
656

2e04ef769   Rusty Russell   lguest: fix comme...
657
658
659
660
  	/*
  	 * We pick one entry at random to throw out.  Choosing the Least
  	 * Recently Used might be better, but this is easy.
  	 */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
661
  	next = random32() % ARRAY_SIZE(cpu->lg->pgdirs);
bff672e63   Rusty Russell   lguest: documenta...
662
  	/* If it's never been allocated at all before, try now. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
663
664
665
  	if (!cpu->lg->pgdirs[next].pgdir) {
  		cpu->lg->pgdirs[next].pgdir =
  					(pgd_t *)get_zeroed_page(GFP_KERNEL);
bff672e63   Rusty Russell   lguest: documenta...
666
  		/* If the allocation fails, just keep using the one we have */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
667
  		if (!cpu->lg->pgdirs[next].pgdir)
1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
668
  			next = cpu->cpu_pgd;
acdd0b629   Matias Zabaljauregui   lguest: PAE support
669
670
  		else {
  #ifdef CONFIG_X86_PAE
2e04ef769   Rusty Russell   lguest: fix comme...
671
672
673
674
  			/*
  			 * In PAE mode, allocate a pmd page and populate the
  			 * last pgd entry.
  			 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
675
676
677
678
679
680
681
682
683
  			pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL);
  			if (!pmd_table) {
  				free_page((long)cpu->lg->pgdirs[next].pgdir);
  				set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0));
  				next = cpu->cpu_pgd;
  			} else {
  				set_pgd(cpu->lg->pgdirs[next].pgdir +
  					SWITCHER_PGD_INDEX,
  					__pgd(__pa(pmd_table) | _PAGE_PRESENT));
2e04ef769   Rusty Russell   lguest: fix comme...
684
685
686
687
  				/*
  				 * This is a blank page, so there are no kernel
  				 * mappings: caller must map the stack!
  				 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
688
689
690
  				*blank_pgdir = 1;
  			}
  #else
d7e28ffe6   Rusty Russell   lguest: the host ...
691
  			*blank_pgdir = 1;
acdd0b629   Matias Zabaljauregui   lguest: PAE support
692
693
  #endif
  		}
d7e28ffe6   Rusty Russell   lguest: the host ...
694
  	}
bff672e63   Rusty Russell   lguest: documenta...
695
  	/* Record which Guest toplevel this shadows. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
696
  	cpu->lg->pgdirs[next].gpgdir = gpgdir;
d7e28ffe6   Rusty Russell   lguest: the host ...
697
  	/* Release all the non-kernel mappings. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
698
  	flush_user_mappings(cpu->lg, next);
d7e28ffe6   Rusty Russell   lguest: the host ...
699
700
701
  
  	return next;
  }
2e04ef769   Rusty Russell   lguest: fix comme...
702
703
  /*H:470
   * Finally, a routine which throws away everything: all PGD entries in all
e1e72965e   Rusty Russell   lguest: documenta...
704
   * the shadow page tables, including the Guest's kernel mappings.  This is used
2e04ef769   Rusty Russell   lguest: fix comme...
705
706
   * when we destroy the Guest.
   */
d7e28ffe6   Rusty Russell   lguest: the host ...
707
708
709
  static void release_all_pagetables(struct lguest *lg)
  {
  	unsigned int i, j;
bff672e63   Rusty Russell   lguest: documenta...
710
  	/* Every shadow pagetable this Guest has */
d7e28ffe6   Rusty Russell   lguest: the host ...
711
  	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
acdd0b629   Matias Zabaljauregui   lguest: PAE support
712
713
714
715
716
717
718
719
720
  		if (lg->pgdirs[i].pgdir) {
  #ifdef CONFIG_X86_PAE
  			pgd_t *spgd;
  			pmd_t *pmdpage;
  			unsigned int k;
  
  			/* Get the last pmd page. */
  			spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX;
  			pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
2e04ef769   Rusty Russell   lguest: fix comme...
721
722
723
724
  			/*
  			 * And release the pmd entries of that pmd page,
  			 * except for the switcher pmd.
  			 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
725
726
727
  			for (k = 0; k < SWITCHER_PMD_INDEX; k++)
  				release_pmd(&pmdpage[k]);
  #endif
bff672e63   Rusty Russell   lguest: documenta...
728
  			/* Every PGD entry except the Switcher at the top */
d7e28ffe6   Rusty Russell   lguest: the host ...
729
  			for (j = 0; j < SWITCHER_PGD_INDEX; j++)
90603d15f   Matias Zabaljauregui   lguest: use nativ...
730
  				release_pgd(lg->pgdirs[i].pgdir + j);
acdd0b629   Matias Zabaljauregui   lguest: PAE support
731
  		}
d7e28ffe6   Rusty Russell   lguest: the host ...
732
  }
2e04ef769   Rusty Russell   lguest: fix comme...
733
734
  /*
   * We also throw away everything when a Guest tells us it's changed a kernel
bff672e63   Rusty Russell   lguest: documenta...
735
   * mapping.  Since kernel mappings are in every page table, it's easiest to
e1e72965e   Rusty Russell   lguest: documenta...
736
   * throw them all away.  This traps the Guest in amber for a while as
2e04ef769   Rusty Russell   lguest: fix comme...
737
738
   * everything faults back in, but it's rare.
   */
4665ac8e2   Glauber de Oliveira Costa   lguest: makes spe...
739
  void guest_pagetable_clear_all(struct lg_cpu *cpu)
d7e28ffe6   Rusty Russell   lguest: the host ...
740
  {
4665ac8e2   Glauber de Oliveira Costa   lguest: makes spe...
741
  	release_all_pagetables(cpu->lg);
bff672e63   Rusty Russell   lguest: documenta...
742
  	/* We need the Guest kernel stack mapped again. */
4665ac8e2   Glauber de Oliveira Costa   lguest: makes spe...
743
  	pin_stack_pages(cpu);
d7e28ffe6   Rusty Russell   lguest: the host ...
744
  }
5dea1c88e   Rusty Russell   lguest: use a spe...
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
  
  /*H:430
   * (iv) Switching page tables
   *
   * Now we've seen all the page table setting and manipulation, let's see
   * what happens when the Guest changes page tables (ie. changes the top-level
   * pgdir).  This occurs on almost every context switch.
   */
  void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
  {
  	int newpgdir, repin = 0;
  
  	/*
  	 * The very first time they call this, we're actually running without
  	 * any page tables; we've been making it up.  Throw them away now.
  	 */
  	if (unlikely(cpu->linear_pages)) {
  		release_all_pagetables(cpu->lg);
  		cpu->linear_pages = false;
  		/* Force allocation of a new pgdir. */
  		newpgdir = ARRAY_SIZE(cpu->lg->pgdirs);
  	} else {
  		/* Look to see if we have this one already. */
  		newpgdir = find_pgdir(cpu->lg, pgtable);
  	}
  
  	/*
  	 * If not, we allocate or mug an existing one: if it's a fresh one,
  	 * repin gets set to 1.
  	 */
  	if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
  		newpgdir = new_pgdir(cpu, pgtable, &repin);
  	/* Change the current pgd index to the new one. */
  	cpu->cpu_pgd = newpgdir;
  	/* If it was completely blank, we map in the Guest kernel stack */
  	if (repin)
  		pin_stack_pages(cpu);
  }
e1e72965e   Rusty Russell   lguest: documenta...
783
  /*:*/
2e04ef769   Rusty Russell   lguest: fix comme...
784
785
786
  
  /*M:009
   * Since we throw away all mappings when a kernel mapping changes, our
e1e72965e   Rusty Russell   lguest: documenta...
787
788
789
790
791
   * performance sucks for guests using highmem.  In fact, a guest with
   * PAGE_OFFSET 0xc0000000 (the default) and more than about 700MB of RAM is
   * usually slower than a Guest with less memory.
   *
   * This, of course, cannot be fixed.  It would take some kind of... well, I
2e04ef769   Rusty Russell   lguest: fix comme...
792
793
   * don't know, but the term "puissant code-fu" comes to mind.
  :*/
d7e28ffe6   Rusty Russell   lguest: the host ...
794

2e04ef769   Rusty Russell   lguest: fix comme...
795
796
  /*H:420
   * This is the routine which actually sets the page table entry for then
bff672e63   Rusty Russell   lguest: documenta...
797
798
799
800
801
802
803
804
805
806
807
808
809
   * "idx"'th shadow page table.
   *
   * Normally, we can just throw out the old entry and replace it with 0: if they
   * use it demand_page() will put the new entry in.  We need to do this anyway:
   * The Guest expects _PAGE_ACCESSED to be set on its PTE the first time a page
   * is read from, and _PAGE_DIRTY when it's written to.
   *
   * But Avi Kivity pointed out that most Operating Systems (Linux included) set
   * these bits on PTEs immediately anyway.  This is done to save the CPU from
   * having to update them, but it helps us the same way: if they set
   * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if
   * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
   */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
810
  static void do_set_pte(struct lg_cpu *cpu, int idx,
df29f43e6   Matias Zabaljauregui   Pagetables to use...
811
  		       unsigned long vaddr, pte_t gpte)
d7e28ffe6   Rusty Russell   lguest: the host ...
812
  {
e1e72965e   Rusty Russell   lguest: documenta...
813
  	/* Look up the matching shadow page directory entry. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
814
  	pgd_t *spgd = spgd_addr(cpu, idx, vaddr);
acdd0b629   Matias Zabaljauregui   lguest: PAE support
815
816
817
  #ifdef CONFIG_X86_PAE
  	pmd_t *spmd;
  #endif
bff672e63   Rusty Russell   lguest: documenta...
818
819
  
  	/* If the top level isn't present, there's no entry to update. */
df29f43e6   Matias Zabaljauregui   Pagetables to use...
820
  	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
acdd0b629   Matias Zabaljauregui   lguest: PAE support
821
822
823
824
  #ifdef CONFIG_X86_PAE
  		spmd = spmd_addr(cpu, *spgd, vaddr);
  		if (pmd_flags(*spmd) & _PAGE_PRESENT) {
  #endif
2e04ef769   Rusty Russell   lguest: fix comme...
825
  			/* Otherwise, start by releasing the existing entry. */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
826
827
  			pte_t *spte = spte_addr(cpu, *spgd, vaddr);
  			release_pte(*spte);
2e04ef769   Rusty Russell   lguest: fix comme...
828
829
830
831
832
833
  			/*
  			 * If they're setting this entry as dirty or accessed,
  			 * we might as well put that entry they've given us in
  			 * now.  This shaves 10% off a copy-on-write
  			 * micro-benchmark.
  			 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
834
835
  			if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
  				check_gpte(cpu, gpte);
4c1ea3dd7   Rusty Russell   lguest: use set_p...
836
837
  				set_pte(spte,
  					gpte_to_spte(cpu, gpte,
acdd0b629   Matias Zabaljauregui   lguest: PAE support
838
  						pte_flags(gpte) & _PAGE_DIRTY));
2e04ef769   Rusty Russell   lguest: fix comme...
839
840
841
842
843
  			} else {
  				/*
  				 * Otherwise kill it and we can demand_page()
  				 * it in later.
  				 */
4c1ea3dd7   Rusty Russell   lguest: use set_p...
844
  				set_pte(spte, __pte(0));
2e04ef769   Rusty Russell   lguest: fix comme...
845
  			}
acdd0b629   Matias Zabaljauregui   lguest: PAE support
846
847
848
  #ifdef CONFIG_X86_PAE
  		}
  #endif
d7e28ffe6   Rusty Russell   lguest: the host ...
849
850
  	}
  }
2e04ef769   Rusty Russell   lguest: fix comme...
851
852
  /*H:410
   * Updating a PTE entry is a little trickier.
bff672e63   Rusty Russell   lguest: documenta...
853
854
855
856
857
858
859
   *
   * We keep track of several different page tables (the Guest uses one for each
   * process, so it makes sense to cache at least a few).  Each of these have
   * identical kernel parts: ie. every mapping above PAGE_OFFSET is the same for
   * all processes.  So when the page table above that address changes, we update
   * all the page tables, not just the current one.  This is rare.
   *
a6bd8e130   Rusty Russell   lguest: comment d...
860
   * The benefit is that when we have to track a new page table, we can keep all
2e04ef769   Rusty Russell   lguest: fix comme...
861
862
   * the kernel mappings.  This speeds up context switch immensely.
   */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
863
  void guest_set_pte(struct lg_cpu *cpu,
ee3db0f2b   Rusty Russell   Rename "cr3" to "...
864
  		   unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
d7e28ffe6   Rusty Russell   lguest: the host ...
865
  {
2e04ef769   Rusty Russell   lguest: fix comme...
866
867
868
869
  	/*
  	 * Kernel mappings must be changed on all top levels.  Slow, but doesn't
  	 * happen often.
  	 */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
870
  	if (vaddr >= cpu->lg->kernel_address) {
d7e28ffe6   Rusty Russell   lguest: the host ...
871
  		unsigned int i;
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
872
873
874
  		for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++)
  			if (cpu->lg->pgdirs[i].pgdir)
  				do_set_pte(cpu, i, vaddr, gpte);
d7e28ffe6   Rusty Russell   lguest: the host ...
875
  	} else {
bff672e63   Rusty Russell   lguest: documenta...
876
  		/* Is this page table one we have a shadow for? */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
877
878
  		int pgdir = find_pgdir(cpu->lg, gpgdir);
  		if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs))
bff672e63   Rusty Russell   lguest: documenta...
879
  			/* If so, do the update. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
880
  			do_set_pte(cpu, pgdir, vaddr, gpte);
d7e28ffe6   Rusty Russell   lguest: the host ...
881
882
  	}
  }
bff672e63   Rusty Russell   lguest: documenta...
883
  /*H:400
e1e72965e   Rusty Russell   lguest: documenta...
884
   * (iii) Setting up a page table entry when the Guest tells us one has changed.
bff672e63   Rusty Russell   lguest: documenta...
885
886
887
888
889
890
891
892
893
894
   *
   * Just like we did in interrupts_and_traps.c, it makes sense for us to deal
   * with the other side of page tables while we're here: what happens when the
   * Guest asks for a page table to be updated?
   *
   * We already saw that demand_page() will fill in the shadow page tables when
   * needed, so we can simply remove shadow page table entries whenever the Guest
   * tells us they've changed.  When the Guest tries to use the new entry it will
   * fault and demand_page() will fix it up.
   *
fd589a8f0   Anand Gadiyar   trivial: fix typo...
895
   * So with that in mind here's our code to update a (top-level) PGD entry:
bff672e63   Rusty Russell   lguest: documenta...
896
   */
ebe0ba84f   Matias Zabaljauregui   lguest: replace h...
897
  void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
d7e28ffe6   Rusty Russell   lguest: the host ...
898
899
900
901
902
  {
  	int pgdir;
  
  	if (idx >= SWITCHER_PGD_INDEX)
  		return;
bff672e63   Rusty Russell   lguest: documenta...
903
  	/* If they're talking about a page table we have a shadow for... */
ee3db0f2b   Rusty Russell   Rename "cr3" to "...
904
  	pgdir = find_pgdir(lg, gpgdir);
d7e28ffe6   Rusty Russell   lguest: the host ...
905
  	if (pgdir < ARRAY_SIZE(lg->pgdirs))
bff672e63   Rusty Russell   lguest: documenta...
906
  		/* ... throw it away. */
90603d15f   Matias Zabaljauregui   lguest: use nativ...
907
  		release_pgd(lg->pgdirs[pgdir].pgdir + idx);
d7e28ffe6   Rusty Russell   lguest: the host ...
908
  }
a91d74a3c   Rusty Russell   lguest: update co...
909

acdd0b629   Matias Zabaljauregui   lguest: PAE support
910
  #ifdef CONFIG_X86_PAE
a91d74a3c   Rusty Russell   lguest: update co...
911
  /* For setting a mid-level, we just throw everything away.  It's easy. */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
912
913
914
915
916
  void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
  {
  	guest_pagetable_clear_all(&lg->cpus[0]);
  }
  #endif
d7e28ffe6   Rusty Russell   lguest: the host ...
917

2e04ef769   Rusty Russell   lguest: fix comme...
918
919
  /*H:500
   * (vii) Setting up the page tables initially.
bff672e63   Rusty Russell   lguest: documenta...
920
   *
5dea1c88e   Rusty Russell   lguest: use a spe...
921
922
923
924
   * When a Guest is first created, set initialize a shadow page table which
   * we will populate on future faults.  The Guest doesn't have any actual
   * pagetables yet, so we set linear_pages to tell demand_page() to fake it
   * for the moment.
2e04ef769   Rusty Russell   lguest: fix comme...
925
   */
58a245664   Matias Zabaljauregui   lguest: move the ...
926
  int init_guest_pagetable(struct lguest *lg)
d7e28ffe6   Rusty Russell   lguest: the host ...
927
  {
5dea1c88e   Rusty Russell   lguest: use a spe...
928
929
  	struct lg_cpu *cpu = &lg->cpus[0];
  	int allocated = 0;
58a245664   Matias Zabaljauregui   lguest: move the ...
930

5dea1c88e   Rusty Russell   lguest: use a spe...
931
932
933
  	/* lg (and lg->cpus[]) starts zeroed: this allocates a new pgdir */
  	cpu->cpu_pgd = new_pgdir(cpu, 0, &allocated);
  	if (!allocated)
d7e28ffe6   Rusty Russell   lguest: the host ...
934
  		return -ENOMEM;
a91d74a3c   Rusty Russell   lguest: update co...
935

5dea1c88e   Rusty Russell   lguest: use a spe...
936
937
  	/* We start with a linear mapping until the initialize. */
  	cpu->linear_pages = true;
d7e28ffe6   Rusty Russell   lguest: the host ...
938
939
  	return 0;
  }
a91d74a3c   Rusty Russell   lguest: update co...
940
  /*H:508 When the Guest calls LHCALL_LGUEST_INIT we do more setup. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
941
  void page_table_guest_data_init(struct lg_cpu *cpu)
47436aa4a   Rusty Russell   Boot with virtual...
942
943
  {
  	/* We get the kernel address: above this is all kernel memory. */
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
944
  	if (get_user(cpu->lg->kernel_address,
acdd0b629   Matias Zabaljauregui   lguest: PAE support
945
  		&cpu->lg->lguest_data->kernel_address)
2e04ef769   Rusty Russell   lguest: fix comme...
946
947
948
949
  		/*
  		 * We tell the Guest that it can't use the top 2 or 4 MB
  		 * of virtual addresses used by the Switcher.
  		 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
950
  		|| put_user(RESERVE_MEM * 1024 * 1024,
5dea1c88e   Rusty Russell   lguest: use a spe...
951
  			    &cpu->lg->lguest_data->reserve_mem)) {
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
952
  		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
5dea1c88e   Rusty Russell   lguest: use a spe...
953
954
  		return;
  	}
47436aa4a   Rusty Russell   Boot with virtual...
955

2e04ef769   Rusty Russell   lguest: fix comme...
956
957
  	/*
  	 * In flush_user_mappings() we loop from 0 to
47436aa4a   Rusty Russell   Boot with virtual...
958
  	 * "pgd_index(lg->kernel_address)".  This assumes it won't hit the
2e04ef769   Rusty Russell   lguest: fix comme...
959
960
  	 * Switcher mappings, so check that now.
  	 */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
961
962
963
964
  #ifdef CONFIG_X86_PAE
  	if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX &&
  		pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX)
  #else
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
965
  	if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX)
acdd0b629   Matias Zabaljauregui   lguest: PAE support
966
  #endif
382ac6b3f   Glauber de Oliveira Costa   lguest: get rid o...
967
968
  		kill_guest(cpu, "bad kernel address %#lx",
  				 cpu->lg->kernel_address);
47436aa4a   Rusty Russell   Boot with virtual...
969
  }
bff672e63   Rusty Russell   lguest: documenta...
970
  /* When a Guest dies, our cleanup is fairly simple. */
d7e28ffe6   Rusty Russell   lguest: the host ...
971
972
973
  void free_guest_pagetable(struct lguest *lg)
  {
  	unsigned int i;
bff672e63   Rusty Russell   lguest: documenta...
974
  	/* Throw away all page table pages. */
d7e28ffe6   Rusty Russell   lguest: the host ...
975
  	release_all_pagetables(lg);
bff672e63   Rusty Russell   lguest: documenta...
976
  	/* Now free the top levels: free_page() can handle 0 just fine. */
d7e28ffe6   Rusty Russell   lguest: the host ...
977
978
979
  	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
  		free_page((long)lg->pgdirs[i].pgdir);
  }
2e04ef769   Rusty Russell   lguest: fix comme...
980
981
  /*H:480
   * (vi) Mapping the Switcher when the Guest is about to run.
bff672e63   Rusty Russell   lguest: documenta...
982
   *
e1e72965e   Rusty Russell   lguest: documenta...
983
   * The Switcher and the two pages for this CPU need to be visible in the
bff672e63   Rusty Russell   lguest: documenta...
984
   * Guest (and not the pages for other CPUs).  We have the appropriate PTE pages
e1e72965e   Rusty Russell   lguest: documenta...
985
   * for each CPU already set up, we just need to hook them in now we know which
2e04ef769   Rusty Russell   lguest: fix comme...
986
987
   * Guest is about to run on this CPU.
   */
0c78441cf   Glauber de Oliveira Costa   lguest: map_switc...
988
  void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
d7e28ffe6   Rusty Russell   lguest: the host ...
989
  {
c9f295496   Christoph Lameter   lguest: Use this_...
990
  	pte_t *switcher_pte_page = __this_cpu_read(switcher_pte_pages);
df29f43e6   Matias Zabaljauregui   Pagetables to use...
991
  	pte_t regs_pte;
d7e28ffe6   Rusty Russell   lguest: the host ...
992

acdd0b629   Matias Zabaljauregui   lguest: PAE support
993
994
995
  #ifdef CONFIG_X86_PAE
  	pmd_t switcher_pmd;
  	pmd_t *pmd_table;
4c1ea3dd7   Rusty Russell   lguest: use set_p...
996
997
  	switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT,
  			       PAGE_KERNEL_EXEC);
acdd0b629   Matias Zabaljauregui   lguest: PAE support
998

a91d74a3c   Rusty Russell   lguest: update co...
999
1000
  	/* Figure out where the pmd page is, by reading the PGD, and converting
  	 * it to a virtual address. */
acdd0b629   Matias Zabaljauregui   lguest: PAE support
1001
1002
1003
  	pmd_table = __va(pgd_pfn(cpu->lg->
  			pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
  								<< PAGE_SHIFT);
a91d74a3c   Rusty Russell   lguest: update co...
1004
  	/* Now write it into the shadow page table. */
4c1ea3dd7   Rusty Russell   lguest: use set_p...
1005
  	set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
acdd0b629   Matias Zabaljauregui   lguest: PAE support
1006
1007
  #else
  	pgd_t switcher_pgd;
2e04ef769   Rusty Russell   lguest: fix comme...
1008
1009
1010
1011
  	/*
  	 * Make the last PGD entry for this Guest point to the Switcher's PTE
  	 * page for this CPU (with appropriate flags).
  	 */
ed1dc7781   Matias Zabaljauregui   lguest: map switc...
1012
  	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC);
df29f43e6   Matias Zabaljauregui   Pagetables to use...
1013

1713608f2   Glauber de Oliveira Costa   lguest: per-vcpu ...
1014
  	cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
d7e28ffe6   Rusty Russell   lguest: the host ...
1015

acdd0b629   Matias Zabaljauregui   lguest: PAE support
1016
  #endif
2e04ef769   Rusty Russell   lguest: fix comme...
1017
1018
  	/*
  	 * We also change the Switcher PTE page.  When we're running the Guest,
bff672e63   Rusty Russell   lguest: documenta...
1019
1020
1021
1022
1023
  	 * we want the Guest's "regs" page to appear where the first Switcher
  	 * page for this CPU is.  This is an optimization: when the Switcher
  	 * saves the Guest registers, it saves them into the first page of this
  	 * CPU's "struct lguest_pages": if we make sure the Guest's register
  	 * page is already mapped there, we don't have to copy them out
2e04ef769   Rusty Russell   lguest: fix comme...
1024
1025
  	 * again.
  	 */
4c1ea3dd7   Rusty Russell   lguest: use set_p...
1026
1027
  	regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL);
  	set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte);
d7e28ffe6   Rusty Russell   lguest: the host ...
1028
  }
bff672e63   Rusty Russell   lguest: documenta...
1029
  /*:*/
d7e28ffe6   Rusty Russell   lguest: the host ...
1030
1031
1032
1033
1034
1035
1036
1037
  
  static void free_switcher_pte_pages(void)
  {
  	unsigned int i;
  
  	for_each_possible_cpu(i)
  		free_page((long)switcher_pte_page(i));
  }
2e04ef769   Rusty Russell   lguest: fix comme...
1038
1039
  /*H:520
   * Setting up the Switcher PTE page for given CPU is fairly easy, given
bff672e63   Rusty Russell   lguest: documenta...
1040
1041
   * the CPU number and the "struct page"s for the Switcher code itself.
   *
2e04ef769   Rusty Russell   lguest: fix comme...
1042
1043
   * Currently the Switcher is less than a page long, so "pages" is always 1.
   */
d7e28ffe6   Rusty Russell   lguest: the host ...
1044
1045
1046
1047
1048
  static __init void populate_switcher_pte_page(unsigned int cpu,
  					      struct page *switcher_page[],
  					      unsigned int pages)
  {
  	unsigned int i;
df29f43e6   Matias Zabaljauregui   Pagetables to use...
1049
  	pte_t *pte = switcher_pte_page(cpu);
d7e28ffe6   Rusty Russell   lguest: the host ...
1050

bff672e63   Rusty Russell   lguest: documenta...
1051
  	/* The first entries are easy: they map the Switcher code. */
d7e28ffe6   Rusty Russell   lguest: the host ...
1052
  	for (i = 0; i < pages; i++) {
4c1ea3dd7   Rusty Russell   lguest: use set_p...
1053
  		set_pte(&pte[i], mk_pte(switcher_page[i],
90603d15f   Matias Zabaljauregui   lguest: use nativ...
1054
  				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
d7e28ffe6   Rusty Russell   lguest: the host ...
1055
  	}
bff672e63   Rusty Russell   lguest: documenta...
1056
  	/* The only other thing we map is this CPU's pair of pages. */
d7e28ffe6   Rusty Russell   lguest: the host ...
1057
  	i = pages + cpu*2;
bff672e63   Rusty Russell   lguest: documenta...
1058
  	/* First page (Guest registers) is writable from the Guest */
4c1ea3dd7   Rusty Russell   lguest: use set_p...
1059
  	set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
90603d15f   Matias Zabaljauregui   lguest: use nativ...
1060
  			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
df29f43e6   Matias Zabaljauregui   Pagetables to use...
1061

2e04ef769   Rusty Russell   lguest: fix comme...
1062
1063
1064
1065
  	/*
  	 * The second page contains the "struct lguest_ro_state", and is
  	 * read-only.
  	 */
4c1ea3dd7   Rusty Russell   lguest: use set_p...
1066
  	set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
90603d15f   Matias Zabaljauregui   lguest: use nativ...
1067
  			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
d7e28ffe6   Rusty Russell   lguest: the host ...
1068
  }
2e04ef769   Rusty Russell   lguest: fix comme...
1069
1070
  /*
   * We've made it through the page table code.  Perhaps our tired brains are
e1e72965e   Rusty Russell   lguest: documenta...
1071
1072
   * still processing the details, or perhaps we're simply glad it's over.
   *
a6bd8e130   Rusty Russell   lguest: comment d...
1073
1074
1075
1076
1077
   * If nothing else, note that all this complexity in juggling shadow page tables
   * in sync with the Guest's page tables is for one reason: for most Guests this
   * page table dance determines how bad performance will be.  This is why Xen
   * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD
   * have implemented shadow page table support directly into hardware.
e1e72965e   Rusty Russell   lguest: documenta...
1078
   *
2e04ef769   Rusty Russell   lguest: fix comme...
1079
1080
   * There is just one file remaining in the Host.
   */
e1e72965e   Rusty Russell   lguest: documenta...
1081

2e04ef769   Rusty Russell   lguest: fix comme...
1082
1083
1084
1085
  /*H:510
   * At boot or module load time, init_pagetables() allocates and populates
   * the Switcher PTE page for each CPU.
   */
d7e28ffe6   Rusty Russell   lguest: the host ...
1086
1087
1088
1089
1090
  __init int init_pagetables(struct page **switcher_page, unsigned int pages)
  {
  	unsigned int i;
  
  	for_each_possible_cpu(i) {
df29f43e6   Matias Zabaljauregui   Pagetables to use...
1091
  		switcher_pte_page(i) = (pte_t *)get_zeroed_page(GFP_KERNEL);
d7e28ffe6   Rusty Russell   lguest: the host ...
1092
1093
1094
1095
1096
1097
1098
1099
  		if (!switcher_pte_page(i)) {
  			free_switcher_pte_pages();
  			return -ENOMEM;
  		}
  		populate_switcher_pte_page(i, switcher_page, pages);
  	}
  	return 0;
  }
bff672e63   Rusty Russell   lguest: documenta...
1100
  /*:*/
d7e28ffe6   Rusty Russell   lguest: the host ...
1101

bff672e63   Rusty Russell   lguest: documenta...
1102
  /* Cleaning up simply involves freeing the PTE page for each CPU. */
d7e28ffe6   Rusty Russell   lguest: the host ...
1103
1104
1105
1106
  void free_pagetables(void)
  {
  	free_switcher_pte_pages();
  }