Commit eb64c3c6cdb8fa8a4d324eb71a9033b62e150918

Authored by Linus Torvalds

Merge tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull additional xen update from David Vrabel:
 "Xen: additional features for 3.19-rc0

   - Linear p2m for x86 PV guests which simplifies the p2m code,
     improves performance and will allow for > 512 GB PV guests in the
     future.

  A last-minute, configuration specific issue was discovered with this
  change which is why it was not included in my previous pull request.
  This is now been fixed and tested"

* tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: switch to post-init routines in xen mmu.c earlier
  Revert "swiotlb-xen: pass dev_addr to swiotlb_tbl_unmap_single"
  xen: annotate xen_set_identity_and_remap_chunk() with __init
  xen: introduce helper functions to do safe read and write accesses
  xen: Speed up set_phys_to_machine() by using read-only mappings
  xen: switch to linear virtual mapped sparse p2m list
  xen: Hide get_phys_to_machine() to be able to tune common path
  x86: Introduce function to get pmd entry pointer
  xen: Delay invalidating extra memory
  xen: Delay m2p_override initialization
  xen: Delay remapping memory of pv-domain
  xen: use common page allocation function in p2m.c
  xen: Make functions static
  xen: fix some style issues in p2m.c

Showing 7 changed files Side-by-side Diff

arch/x86/include/asm/pgtable_types.h
... ... @@ -452,6 +452,7 @@
452 452 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
453 453 extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
454 454 unsigned int *level);
  455 +extern pmd_t *lookup_pmd_address(unsigned long address);
455 456 extern phys_addr_t slow_virt_to_phys(void *__address);
456 457 extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
457 458 unsigned numpages, unsigned long page_flags);
arch/x86/include/asm/xen/page.h
... ... @@ -41,10 +41,12 @@
41 41  
42 42 extern unsigned long *machine_to_phys_mapping;
43 43 extern unsigned long machine_to_phys_nr;
  44 +extern unsigned long *xen_p2m_addr;
  45 +extern unsigned long xen_p2m_size;
  46 +extern unsigned long xen_max_p2m_pfn;
44 47  
45 48 extern unsigned long get_phys_to_machine(unsigned long pfn);
46 49 extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
47   -extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn);
48 50 extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
49 51 extern unsigned long set_phys_range_identity(unsigned long pfn_s,
50 52 unsigned long pfn_e);
51 53  
52 54  
... ... @@ -52,17 +54,52 @@
52 54 extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
53 55 struct gnttab_map_grant_ref *kmap_ops,
54 56 struct page **pages, unsigned int count);
55   -extern int m2p_add_override(unsigned long mfn, struct page *page,
56   - struct gnttab_map_grant_ref *kmap_op);
57 57 extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
58 58 struct gnttab_map_grant_ref *kmap_ops,
59 59 struct page **pages, unsigned int count);
60   -extern int m2p_remove_override(struct page *page,
61   - struct gnttab_map_grant_ref *kmap_op,
62   - unsigned long mfn);
63   -extern struct page *m2p_find_override(unsigned long mfn);
64 60 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
65 61  
  62 +/*
  63 + * Helper functions to write or read unsigned long values to/from
  64 + * memory, when the access may fault.
  65 + */
  66 +static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
  67 +{
  68 + return __put_user(val, (unsigned long __user *)addr);
  69 +}
  70 +
  71 +static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
  72 +{
  73 + return __get_user(*val, (unsigned long __user *)addr);
  74 +}
  75 +
  76 +/*
  77 + * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine():
  78 + * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator
  79 + * bits (identity or foreign) are set.
  80 + * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set
  81 + * identity or foreign indicator will be still set. __pfn_to_mfn() is
  82 + * encapsulating get_phys_to_machine() which is called in special cases only.
  83 + * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special
  84 + * cases needing an extended handling.
  85 + */
  86 +static inline unsigned long __pfn_to_mfn(unsigned long pfn)
  87 +{
  88 + unsigned long mfn;
  89 +
  90 + if (pfn < xen_p2m_size)
  91 + mfn = xen_p2m_addr[pfn];
  92 + else if (unlikely(pfn < xen_max_p2m_pfn))
  93 + return get_phys_to_machine(pfn);
  94 + else
  95 + return IDENTITY_FRAME(pfn);
  96 +
  97 + if (unlikely(mfn == INVALID_P2M_ENTRY))
  98 + return get_phys_to_machine(pfn);
  99 +
  100 + return mfn;
  101 +}
  102 +
66 103 static inline unsigned long pfn_to_mfn(unsigned long pfn)
67 104 {
68 105 unsigned long mfn;
... ... @@ -70,7 +107,7 @@
70 107 if (xen_feature(XENFEAT_auto_translated_physmap))
71 108 return pfn;
72 109  
73   - mfn = get_phys_to_machine(pfn);
  110 + mfn = __pfn_to_mfn(pfn);
74 111  
75 112 if (mfn != INVALID_P2M_ENTRY)
76 113 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
... ... @@ -83,7 +120,7 @@
83 120 if (xen_feature(XENFEAT_auto_translated_physmap))
84 121 return 1;
85 122  
86   - return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
  123 + return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY;
87 124 }
88 125  
89 126 static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
... ... @@ -102,7 +139,7 @@
102 139 * In such cases it doesn't matter what we return (we return garbage),
103 140 * but we must handle the fault without crashing!
104 141 */
105   - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
  142 + ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn);
106 143 if (ret < 0)
107 144 return ~0;
108 145  
... ... @@ -117,7 +154,7 @@
117 154 return mfn;
118 155  
119 156 pfn = mfn_to_pfn_no_overrides(mfn);
120   - if (get_phys_to_machine(pfn) != mfn) {
  157 + if (__pfn_to_mfn(pfn) != mfn) {
121 158 /*
122 159 * If this appears to be a foreign mfn (because the pfn
123 160 * doesn't map back to the mfn), then check the local override
... ... @@ -133,8 +170,7 @@
133 170 * entry doesn't map back to the mfn and m2p_override doesn't have a
134 171 * valid entry for it.
135 172 */
136   - if (pfn == ~0 &&
137   - get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
  173 + if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
138 174 pfn = mfn;
139 175  
140 176 return pfn;
... ... @@ -180,7 +216,7 @@
180 216 return mfn;
181 217  
182 218 pfn = mfn_to_pfn(mfn);
183   - if (get_phys_to_machine(pfn) != mfn)
  219 + if (__pfn_to_mfn(pfn) != mfn)
184 220 return -1; /* force !pfn_valid() */
185 221 return pfn;
186 222 }
arch/x86/mm/pageattr.c
... ... @@ -384,6 +384,26 @@
384 384 }
385 385  
386 386 /*
  387 + * Lookup the PMD entry for a virtual address. Return a pointer to the entry
  388 + * or NULL if not present.
  389 + */
  390 +pmd_t *lookup_pmd_address(unsigned long address)
  391 +{
  392 + pgd_t *pgd;
  393 + pud_t *pud;
  394 +
  395 + pgd = pgd_offset_k(address);
  396 + if (pgd_none(*pgd))
  397 + return NULL;
  398 +
  399 + pud = pud_offset(pgd, address);
  400 + if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
  401 + return NULL;
  402 +
  403 + return pmd_offset(pud, address);
  404 +}
  405 +
  406 +/*
387 407 * This is necessary because __pa() does not work on some
388 408 * kinds of memory, like vmalloc() or the alloc_remap()
389 409 * areas on 32-bit NUMA systems. The percpu areas can
... ... @@ -387,7 +387,7 @@
387 387 unsigned long mfn;
388 388  
389 389 if (!xen_feature(XENFEAT_auto_translated_physmap))
390   - mfn = get_phys_to_machine(pfn);
  390 + mfn = __pfn_to_mfn(pfn);
391 391 else
392 392 mfn = pfn;
393 393 /*
394 394  
395 395  
396 396  
397 397  
... ... @@ -1113,20 +1113,16 @@
1113 1113 * instead of somewhere later and be confusing. */
1114 1114 xen_mc_flush();
1115 1115 }
1116   -static void __init xen_pagetable_p2m_copy(void)
  1116 +
  1117 +static void __init xen_pagetable_p2m_free(void)
1117 1118 {
1118 1119 unsigned long size;
1119 1120 unsigned long addr;
1120   - unsigned long new_mfn_list;
1121 1121  
1122   - if (xen_feature(XENFEAT_auto_translated_physmap))
1123   - return;
1124   -
1125 1122 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1126 1123  
1127   - new_mfn_list = xen_revector_p2m_tree();
1128 1124 /* No memory or already called. */
1129   - if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
  1125 + if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
1130 1126 return;
1131 1127  
1132 1128 /* using __ka address and sticking INVALID_P2M_ENTRY! */
... ... @@ -1144,8 +1140,6 @@
1144 1140  
1145 1141 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1146 1142 memblock_free(__pa(xen_start_info->mfn_list), size);
1147   - /* And revector! Bye bye old array */
1148   - xen_start_info->mfn_list = new_mfn_list;
1149 1143  
1150 1144 /* At this stage, cleanup_highmap has already cleaned __ka space
1151 1145 * from _brk_limit way up to the max_pfn_mapped (which is the end of
1152 1146  
1153 1147  
1154 1148  
1155 1149  
1156 1150  
... ... @@ -1169,17 +1163,35 @@
1169 1163 }
1170 1164 #endif
1171 1165  
1172   -static void __init xen_pagetable_init(void)
  1166 +static void __init xen_pagetable_p2m_setup(void)
1173 1167 {
1174   - paging_init();
  1168 + if (xen_feature(XENFEAT_auto_translated_physmap))
  1169 + return;
  1170 +
  1171 + xen_vmalloc_p2m_tree();
  1172 +
1175 1173 #ifdef CONFIG_X86_64
1176   - xen_pagetable_p2m_copy();
  1174 + xen_pagetable_p2m_free();
1177 1175 #endif
  1176 + /* And revector! Bye bye old array */
  1177 + xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
  1178 +}
  1179 +
  1180 +static void __init xen_pagetable_init(void)
  1181 +{
  1182 + paging_init();
  1183 + xen_post_allocator_init();
  1184 +
  1185 + xen_pagetable_p2m_setup();
  1186 +
1178 1187 /* Allocate and initialize top and mid mfn levels for p2m structure */
1179 1188 xen_build_mfn_list_list();
1180 1189  
  1190 + /* Remap memory freed due to conflicts with E820 map */
  1191 + if (!xen_feature(XENFEAT_auto_translated_physmap))
  1192 + xen_remap_memory();
  1193 +
1181 1194 xen_setup_shared_info();
1182   - xen_post_allocator_init();
1183 1195 }
1184 1196 static void xen_write_cr2(unsigned long cr2)
1185 1197 {
Changes suppressed. Click to show
... ... @@ -3,21 +3,22 @@
3 3 * guests themselves, but it must also access and update the p2m array
4 4 * during suspend/resume when all the pages are reallocated.
5 5 *
6   - * The p2m table is logically a flat array, but we implement it as a
7   - * three-level tree to allow the address space to be sparse.
  6 + * The logical flat p2m table is mapped to a linear kernel memory area.
  7 + * For accesses by Xen a three-level tree linked via mfns only is set up to
  8 + * allow the address space to be sparse.
8 9 *
9   - * Xen
10   - * |
11   - * p2m_top p2m_top_mfn
12   - * / \ / \
13   - * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn
14   - * / \ / \ / /
15   - * p2m p2m p2m p2m p2m p2m p2m ...
  10 + * Xen
  11 + * |
  12 + * p2m_top_mfn
  13 + * / \
  14 + * p2m_mid_mfn p2m_mid_mfn
  15 + * / /
  16 + * p2m p2m p2m ...
16 17 *
17 18 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
18 19 *
19   - * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
20   - * maximum representable pseudo-physical address space is:
  20 + * The p2m_top_mfn level is limited to 1 page, so the maximum representable
  21 + * pseudo-physical address space is:
21 22 * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
22 23 *
23 24 * P2M_PER_PAGE depends on the architecture, as a mfn is always
... ... @@ -30,6 +31,9 @@
30 31 * leaf entries, or for the top root, or middle one, for which there is a void
31 32 * entry, we assume it is "missing". So (for example)
32 33 * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
  34 + * We have a dedicated page p2m_missing with all entries being
  35 + * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m
  36 + * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns.
33 37 *
34 38 * We also have the possibility of setting 1-1 mappings on certain regions, so
35 39 * that:
36 40  
37 41  
... ... @@ -39,122 +43,20 @@
39 43 * PCI BARs, or ACPI spaces), we can create mappings easily because we
40 44 * get the PFN value to match the MFN.
41 45 *
42   - * For this to work efficiently we have one new page p2m_identity and
43   - * allocate (via reserved_brk) any other pages we need to cover the sides
44   - * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
45   - * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
46   - * no other fancy value).
  46 + * For this to work efficiently we have one new page p2m_identity. All entries
  47 + * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only
  48 + * recognizes that and MFNs, no other fancy value).
47 49 *
48 50 * On lookup we spot that the entry points to p2m_identity and return the
49 51 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
50 52 * If the entry points to an allocated page, we just proceed as before and
51   - * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
  53 + * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
52 54 * appropriate functions (pfn_to_mfn).
53 55 *
54 56 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
55 57 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
56 58 * non-identity pfn. To protect ourselves against we elect to set (and get) the
57 59 * IDENTITY_FRAME_BIT on all identity mapped PFNs.
58   - *
59   - * This simplistic diagram is used to explain the more subtle piece of code.
60   - * There is also a digram of the P2M at the end that can help.
61   - * Imagine your E820 looking as so:
62   - *
63   - * 1GB 2GB 4GB
64   - * /-------------------+---------\/----\ /----------\ /---+-----\
65   - * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
66   - * \-------------------+---------/\----/ \----------/ \---+-----/
67   - * ^- 1029MB ^- 2001MB
68   - *
69   - * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
70   - * 2048MB = 524288 (0x80000)]
71   - *
72   - * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
73   - * is actually not present (would have to kick the balloon driver to put it in).
74   - *
75   - * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
76   - * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
77   - * of the PFN and the end PFN (263424 and 512256 respectively). The first step
78   - * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
79   - * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
80   - * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as
81   - * required to split any existing p2m_mid_missing middle pages.
82   - *
83   - * With the E820 example above, 263424 is not 1GB aligned so we allocate a
84   - * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
85   - * Each entry in the allocate page is "missing" (points to p2m_missing).
86   - *
87   - * Next stage is to determine if we need to do a more granular boundary check
88   - * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
89   - * We check if the start pfn and end pfn violate that boundary check, and if
90   - * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer
91   - * granularity of setting which PFNs are missing and which ones are identity.
92   - * In our example 263424 and 512256 both fail the check so we reserve_brk two
93   - * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
94   - * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
95   - *
96   - * At this point we would at minimum reserve_brk one page, but could be up to
97   - * three. Each call to set_phys_range_identity has at maximum a three page
98   - * cost. If we were to query the P2M at this stage, all those entries from
99   - * start PFN through end PFN (so 1029MB -> 2001MB) would return
100   - * INVALID_P2M_ENTRY ("missing").
101   - *
102   - * The next step is to walk from the start pfn to the end pfn setting
103   - * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
104   - * If we find that the middle entry is pointing to p2m_missing we can swap it
105   - * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and
106   - * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions).
107   - * At this point we do not need to worry about boundary aligment (so no need to
108   - * reserve_brk a middle page, figure out which PFNs are "missing" and which
109   - * ones are identity), as that has been done earlier. If we find that the
110   - * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
111   - * that page (which covers 512 PFNs) and set the appropriate PFN with
112   - * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
113   - * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
114   - * IDENTITY_FRAME_BIT set.
115   - *
116   - * All other regions that are void (or not filled) either point to p2m_missing
117   - * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
118   - * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
119   - * contain the INVALID_P2M_ENTRY value and are considered "missing."
120   - *
121   - * Finally, the region beyond the end of of the E820 (4 GB in this example)
122   - * is set to be identity (in case there are MMIO regions placed here).
123   - *
124   - * This is what the p2m ends up looking (for the E820 above) with this
125   - * fabulous drawing:
126   - *
127   - * p2m /--------------\
128   - * /-----\ | &mfn_list[0],| /-----------------\
129   - * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
130   - * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
131   - * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
132   - * |-----| \ | [p2m_identity]+\\ | .... |
133   - * | 2 |--\ \-------------------->| ... | \\ \----------------/
134   - * |-----| \ \---------------/ \\
135   - * | 3 |-\ \ \\ p2m_identity [1]
136   - * |-----| \ \-------------------->/---------------\ /-----------------\
137   - * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... |
138   - * \-----/ | | | [p2m_identity]+-->| ..., ~0 |
139   - * | | | .... | \-----------------/
140   - * | | +-[x], ~0, ~0.. +\
141   - * | | \---------------/ \
142   - * | | \-> /---------------\
143   - * | V p2m_mid_missing p2m_missing | IDENTITY[@0] |
144   - * | /-----------------\ /------------\ | IDENTITY[@256]|
145   - * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... |
146   - * | | [p2m_missing] +---->| ..., ~0 | \---------------/
147   - * | | ... | \------------/
148   - * | \-----------------/
149   - * |
150   - * | p2m_mid_identity
151   - * | /-----------------\
152   - * \-->| [p2m_identity] +---->[1]
153   - * | [p2m_identity] +---->[1]
154   - * | ... |
155   - * \-----------------/
156   - *
157   - * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
158 60 */
159 61  
160 62 #include <linux/init.h>
161 63  
... ... @@ -164,9 +66,11 @@
164 66 #include <linux/sched.h>
165 67 #include <linux/seq_file.h>
166 68 #include <linux/bootmem.h>
  69 +#include <linux/slab.h>
167 70  
168 71 #include <asm/cache.h>
169 72 #include <asm/setup.h>
  73 +#include <asm/uaccess.h>
170 74  
171 75 #include <asm/xen/page.h>
172 76 #include <asm/xen/hypercall.h>
173 77  
174 78  
175 79  
176 80  
177 81  
... ... @@ -178,32 +82,27 @@
178 82 #include "multicalls.h"
179 83 #include "xen-ops.h"
180 84  
  85 +#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE)
  86 +
181 87 static void __init m2p_override_init(void);
182 88  
  89 +unsigned long *xen_p2m_addr __read_mostly;
  90 +EXPORT_SYMBOL_GPL(xen_p2m_addr);
  91 +unsigned long xen_p2m_size __read_mostly;
  92 +EXPORT_SYMBOL_GPL(xen_p2m_size);
183 93 unsigned long xen_max_p2m_pfn __read_mostly;
  94 +EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
184 95  
  96 +static DEFINE_SPINLOCK(p2m_update_lock);
  97 +
185 98 static unsigned long *p2m_mid_missing_mfn;
186 99 static unsigned long *p2m_top_mfn;
187 100 static unsigned long **p2m_top_mfn_p;
  101 +static unsigned long *p2m_missing;
  102 +static unsigned long *p2m_identity;
  103 +static pte_t *p2m_missing_pte;
  104 +static pte_t *p2m_identity_pte;
188 105  
189   -/* Placeholders for holes in the address space */
190   -static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
191   -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
192   -
193   -static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
194   -
195   -static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
196   -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
197   -
198   -RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
199   -
200   -/* For each I/O range remapped we may lose up to two leaf pages for the boundary
201   - * violations and three mid pages to cover up to 3GB. With
202   - * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
203   - * remapped region.
204   - */
205   -RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
206   -
207 106 static inline unsigned p2m_top_index(unsigned long pfn)
208 107 {
209 108 BUG_ON(pfn >= MAX_P2M_PFN);
... ... @@ -220,14 +119,6 @@
220 119 return pfn % P2M_PER_PAGE;
221 120 }
222 121  
223   -static void p2m_top_init(unsigned long ***top)
224   -{
225   - unsigned i;
226   -
227   - for (i = 0; i < P2M_TOP_PER_PAGE; i++)
228   - top[i] = p2m_mid_missing;
229   -}
230   -
231 122 static void p2m_top_mfn_init(unsigned long *top)
232 123 {
233 124 unsigned i;
234 125  
235 126  
236 127  
237 128  
238 129  
239 130  
... ... @@ -244,30 +135,45 @@
244 135 top[i] = p2m_mid_missing_mfn;
245 136 }
246 137  
247   -static void p2m_mid_init(unsigned long **mid, unsigned long *leaf)
  138 +static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
248 139 {
249 140 unsigned i;
250 141  
251 142 for (i = 0; i < P2M_MID_PER_PAGE; i++)
252   - mid[i] = leaf;
  143 + mid[i] = virt_to_mfn(leaf);
253 144 }
254 145  
255   -static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
  146 +static void p2m_init(unsigned long *p2m)
256 147 {
257 148 unsigned i;
258 149  
259   - for (i = 0; i < P2M_MID_PER_PAGE; i++)
260   - mid[i] = virt_to_mfn(leaf);
  150 + for (i = 0; i < P2M_PER_PAGE; i++)
  151 + p2m[i] = INVALID_P2M_ENTRY;
261 152 }
262 153  
263   -static void p2m_init(unsigned long *p2m)
  154 +static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
264 155 {
265 156 unsigned i;
266 157  
267   - for (i = 0; i < P2M_MID_PER_PAGE; i++)
268   - p2m[i] = INVALID_P2M_ENTRY;
  158 + for (i = 0; i < P2M_PER_PAGE; i++)
  159 + p2m[i] = IDENTITY_FRAME(pfn + i);
269 160 }
270 161  
  162 +static void * __ref alloc_p2m_page(void)
  163 +{
  164 + if (unlikely(!slab_is_available()))
  165 + return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
  166 +
  167 + return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
  168 +}
  169 +
  170 +/* Only to be called in case of a race for a page just allocated! */
  171 +static void free_p2m_page(void *p)
  172 +{
  173 + BUG_ON(!slab_is_available());
  174 + free_page((unsigned long)p);
  175 +}
  176 +
271 177 /*
272 178 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
273 179 *
274 180  
275 181  
276 182  
277 183  
278 184  
279 185  
280 186  
... ... @@ -280,40 +186,46 @@
280 186 */
281 187 void __ref xen_build_mfn_list_list(void)
282 188 {
283   - unsigned long pfn;
  189 + unsigned long pfn, mfn;
  190 + pte_t *ptep;
  191 + unsigned int level, topidx, mididx;
  192 + unsigned long *mid_mfn_p;
284 193  
285 194 if (xen_feature(XENFEAT_auto_translated_physmap))
286 195 return;
287 196  
288 197 /* Pre-initialize p2m_top_mfn to be completely missing */
289 198 if (p2m_top_mfn == NULL) {
290   - p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
  199 + p2m_mid_missing_mfn = alloc_p2m_page();
291 200 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
292 201  
293   - p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
  202 + p2m_top_mfn_p = alloc_p2m_page();
294 203 p2m_top_mfn_p_init(p2m_top_mfn_p);
295 204  
296   - p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
  205 + p2m_top_mfn = alloc_p2m_page();
297 206 p2m_top_mfn_init(p2m_top_mfn);
298 207 } else {
299 208 /* Reinitialise, mfn's all change after migration */
300 209 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
301 210 }
302 211  
303   - for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
304   - unsigned topidx = p2m_top_index(pfn);
305   - unsigned mididx = p2m_mid_index(pfn);
306   - unsigned long **mid;
307   - unsigned long *mid_mfn_p;
  212 + for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN;
  213 + pfn += P2M_PER_PAGE) {
  214 + topidx = p2m_top_index(pfn);
  215 + mididx = p2m_mid_index(pfn);
308 216  
309   - mid = p2m_top[topidx];
310 217 mid_mfn_p = p2m_top_mfn_p[topidx];
  218 + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn),
  219 + &level);
  220 + BUG_ON(!ptep || level != PG_LEVEL_4K);
  221 + mfn = pte_mfn(*ptep);
  222 + ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
311 223  
312 224 /* Don't bother allocating any mfn mid levels if
313 225 * they're just missing, just update the stored mfn,
314 226 * since all could have changed over a migrate.
315 227 */
316   - if (mid == p2m_mid_missing) {
  228 + if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) {
317 229 BUG_ON(mididx);
318 230 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
319 231 p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
320 232  
... ... @@ -322,19 +234,14 @@
322 234 }
323 235  
324 236 if (mid_mfn_p == p2m_mid_missing_mfn) {
325   - /*
326   - * XXX boot-time only! We should never find
327   - * missing parts of the mfn tree after
328   - * runtime.
329   - */
330   - mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
  237 + mid_mfn_p = alloc_p2m_page();
331 238 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
332 239  
333 240 p2m_top_mfn_p[topidx] = mid_mfn_p;
334 241 }
335 242  
336 243 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
337   - mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
  244 + mid_mfn_p[mididx] = mfn;
338 245 }
339 246 }
340 247  
341 248  
342 249  
343 250  
344 251  
345 252  
346 253  
347 254  
348 255  
349 256  
350 257  
351 258  
352 259  
353 260  
354 261  
355 262  
356 263  
357 264  
358 265  
359 266  
360 267  
361 268  
362 269  
363 270  
364 271  
365 272  
366 273  
367 274  
368 275  
369 276  
370 277  
371 278  
372 279  
373 280  
374 281  
375 282  
376 283  
377 284  
378 285  
379 286  
... ... @@ -353,171 +260,235 @@
353 260 /* Set up p2m_top to point to the domain-builder provided p2m pages */
354 261 void __init xen_build_dynamic_phys_to_machine(void)
355 262 {
356   - unsigned long *mfn_list;
357   - unsigned long max_pfn;
358 263 unsigned long pfn;
359 264  
360 265 if (xen_feature(XENFEAT_auto_translated_physmap))
361 266 return;
362 267  
363   - mfn_list = (unsigned long *)xen_start_info->mfn_list;
364   - max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
365   - xen_max_p2m_pfn = max_pfn;
  268 + xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
  269 + xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
366 270  
367   - p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
368   - p2m_init(p2m_missing);
369   - p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
370   - p2m_init(p2m_identity);
  271 + for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++)
  272 + xen_p2m_addr[pfn] = INVALID_P2M_ENTRY;
371 273  
372   - p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
373   - p2m_mid_init(p2m_mid_missing, p2m_missing);
374   - p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
375   - p2m_mid_init(p2m_mid_identity, p2m_identity);
  274 + xen_max_p2m_pfn = xen_p2m_size;
  275 +}
376 276  
377   - p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
378   - p2m_top_init(p2m_top);
  277 +#define P2M_TYPE_IDENTITY 0
  278 +#define P2M_TYPE_MISSING 1
  279 +#define P2M_TYPE_PFN 2
  280 +#define P2M_TYPE_UNKNOWN 3
379 281  
380   - /*
381   - * The domain builder gives us a pre-constructed p2m array in
382   - * mfn_list for all the pages initially given to us, so we just
383   - * need to graft that into our tree structure.
384   - */
385   - for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
386   - unsigned topidx = p2m_top_index(pfn);
387   - unsigned mididx = p2m_mid_index(pfn);
  282 +static int xen_p2m_elem_type(unsigned long pfn)
  283 +{
  284 + unsigned long mfn;
388 285  
389   - if (p2m_top[topidx] == p2m_mid_missing) {
390   - unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
391   - p2m_mid_init(mid, p2m_missing);
  286 + if (pfn >= xen_p2m_size)
  287 + return P2M_TYPE_IDENTITY;
392 288  
393   - p2m_top[topidx] = mid;
394   - }
  289 + mfn = xen_p2m_addr[pfn];
395 290  
396   - /*
397   - * As long as the mfn_list has enough entries to completely
398   - * fill a p2m page, pointing into the array is ok. But if
399   - * not the entries beyond the last pfn will be undefined.
400   - */
401   - if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
402   - unsigned long p2midx;
  291 + if (mfn == INVALID_P2M_ENTRY)
  292 + return P2M_TYPE_MISSING;
403 293  
404   - p2midx = max_pfn % P2M_PER_PAGE;
405   - for ( ; p2midx < P2M_PER_PAGE; p2midx++)
406   - mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY;
407   - }
408   - p2m_top[topidx][mididx] = &mfn_list[pfn];
409   - }
  294 + if (mfn & IDENTITY_FRAME_BIT)
  295 + return P2M_TYPE_IDENTITY;
410 296  
411   - m2p_override_init();
  297 + return P2M_TYPE_PFN;
412 298 }
413   -#ifdef CONFIG_X86_64
414   -unsigned long __init xen_revector_p2m_tree(void)
  299 +
  300 +static void __init xen_rebuild_p2m_list(unsigned long *p2m)
415 301 {
416   - unsigned long va_start;
417   - unsigned long va_end;
  302 + unsigned int i, chunk;
418 303 unsigned long pfn;
419   - unsigned long pfn_free = 0;
420   - unsigned long *mfn_list = NULL;
421   - unsigned long size;
  304 + unsigned long *mfns;
  305 + pte_t *ptep;
  306 + pmd_t *pmdp;
  307 + int type;
422 308  
423   - va_start = xen_start_info->mfn_list;
424   - /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
425   - * so make sure it is rounded up to that */
426   - size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
427   - va_end = va_start + size;
  309 + p2m_missing = alloc_p2m_page();
  310 + p2m_init(p2m_missing);
  311 + p2m_identity = alloc_p2m_page();
  312 + p2m_init(p2m_identity);
428 313  
429   - /* If we were revectored already, don't do it again. */
430   - if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
431   - return 0;
432   -
433   - mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
434   - if (!mfn_list) {
435   - pr_warn("Could not allocate space for a new P2M tree!\n");
436   - return xen_start_info->mfn_list;
  314 + p2m_missing_pte = alloc_p2m_page();
  315 + paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT);
  316 + p2m_identity_pte = alloc_p2m_page();
  317 + paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT);
  318 + for (i = 0; i < PTRS_PER_PTE; i++) {
  319 + set_pte(p2m_missing_pte + i,
  320 + pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO));
  321 + set_pte(p2m_identity_pte + i,
  322 + pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO));
437 323 }
438   - /* Fill it out with INVALID_P2M_ENTRY value */
439   - memset(mfn_list, 0xFF, size);
440 324  
441   - for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
442   - unsigned topidx = p2m_top_index(pfn);
443   - unsigned mididx;
444   - unsigned long *mid_p;
  325 + for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) {
  326 + /*
  327 + * Try to map missing/identity PMDs or p2m-pages if possible.
  328 + * We have to respect the structure of the mfn_list_list
  329 + * which will be built just afterwards.
  330 + * Chunk size to test is one p2m page if we are in the middle
  331 + * of a mfn_list_list mid page and the complete mid page area
  332 + * if we are at index 0 of the mid page. Please note that a
  333 + * mid page might cover more than one PMD, e.g. on 32 bit PAE
  334 + * kernels.
  335 + */
  336 + chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ?
  337 + P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE;
445 338  
446   - if (!p2m_top[topidx])
447   - continue;
  339 + type = xen_p2m_elem_type(pfn);
  340 + i = 0;
  341 + if (type != P2M_TYPE_PFN)
  342 + for (i = 1; i < chunk; i++)
  343 + if (xen_p2m_elem_type(pfn + i) != type)
  344 + break;
  345 + if (i < chunk)
  346 + /* Reset to minimal chunk size. */
  347 + chunk = P2M_PER_PAGE;
448 348  
449   - if (p2m_top[topidx] == p2m_mid_missing)
  349 + if (type == P2M_TYPE_PFN || i < chunk) {
  350 + /* Use initial p2m page contents. */
  351 +#ifdef CONFIG_X86_64
  352 + mfns = alloc_p2m_page();
  353 + copy_page(mfns, xen_p2m_addr + pfn);
  354 +#else
  355 + mfns = xen_p2m_addr + pfn;
  356 +#endif
  357 + ptep = populate_extra_pte((unsigned long)(p2m + pfn));
  358 + set_pte(ptep,
  359 + pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
450 360 continue;
  361 + }
451 362  
452   - mididx = p2m_mid_index(pfn);
453   - mid_p = p2m_top[topidx][mididx];
454   - if (!mid_p)
  363 + if (chunk == P2M_PER_PAGE) {
  364 + /* Map complete missing or identity p2m-page. */
  365 + mfns = (type == P2M_TYPE_MISSING) ?
  366 + p2m_missing : p2m_identity;
  367 + ptep = populate_extra_pte((unsigned long)(p2m + pfn));
  368 + set_pte(ptep,
  369 + pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO));
455 370 continue;
456   - if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
457   - continue;
  371 + }
458 372  
459   - if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
460   - continue;
  373 + /* Complete missing or identity PMD(s) can be mapped. */
  374 + ptep = (type == P2M_TYPE_MISSING) ?
  375 + p2m_missing_pte : p2m_identity_pte;
  376 + for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
  377 + pmdp = populate_extra_pmd(
  378 + (unsigned long)(p2m + pfn + i * PTRS_PER_PTE));
  379 + set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
  380 + }
  381 + }
  382 +}
461 383  
462   - /* The old va. Rebase it on mfn_list */
463   - if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
464   - unsigned long *new;
  384 +void __init xen_vmalloc_p2m_tree(void)
  385 +{
  386 + static struct vm_struct vm;
465 387  
466   - if (pfn_free > (size / sizeof(unsigned long))) {
467   - WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
468   - size / sizeof(unsigned long), pfn_free);
469   - return 0;
470   - }
471   - new = &mfn_list[pfn_free];
  388 + vm.flags = VM_ALLOC;
  389 + vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
  390 + PMD_SIZE * PMDS_PER_MID_PAGE);
  391 + vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
  392 + pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
472 393  
473   - copy_page(new, mid_p);
474   - p2m_top[topidx][mididx] = &mfn_list[pfn_free];
  394 + xen_max_p2m_pfn = vm.size / sizeof(unsigned long);
475 395  
476   - pfn_free += P2M_PER_PAGE;
  396 + xen_rebuild_p2m_list(vm.addr);
477 397  
478   - }
479   - /* This should be the leafs allocated for identity from _brk. */
480   - }
481   - return (unsigned long)mfn_list;
  398 + xen_p2m_addr = vm.addr;
  399 + xen_p2m_size = xen_max_p2m_pfn;
482 400  
  401 + xen_inv_extra_mem();
  402 +
  403 + m2p_override_init();
483 404 }
484   -#else
485   -unsigned long __init xen_revector_p2m_tree(void)
486   -{
487   - return 0;
488   -}
489   -#endif
  405 +
490 406 unsigned long get_phys_to_machine(unsigned long pfn)
491 407 {
492   - unsigned topidx, mididx, idx;
  408 + pte_t *ptep;
  409 + unsigned int level;
493 410  
494   - if (unlikely(pfn >= MAX_P2M_PFN))
  411 + if (unlikely(pfn >= xen_p2m_size)) {
  412 + if (pfn < xen_max_p2m_pfn)
  413 + return xen_chk_extra_mem(pfn);
  414 +
495 415 return IDENTITY_FRAME(pfn);
  416 + }
496 417  
497   - topidx = p2m_top_index(pfn);
498   - mididx = p2m_mid_index(pfn);
499   - idx = p2m_index(pfn);
  418 + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
  419 + BUG_ON(!ptep || level != PG_LEVEL_4K);
500 420  
501 421 /*
502 422 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
503 423 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
504 424 * would be wrong.
505 425 */
506   - if (p2m_top[topidx][mididx] == p2m_identity)
  426 + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
507 427 return IDENTITY_FRAME(pfn);
508 428  
509   - return p2m_top[topidx][mididx][idx];
  429 + return xen_p2m_addr[pfn];
510 430 }
511 431 EXPORT_SYMBOL_GPL(get_phys_to_machine);
512 432  
513   -static void *alloc_p2m_page(void)
  433 +/*
  434 + * Allocate new pmd(s). It is checked whether the old pmd is still in place.
  435 + * If not, nothing is changed. This is okay as the only reason for allocating
  436 + * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
  437 + * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
  438 + */
  439 +static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
514 440 {
515   - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
516   -}
  441 + pte_t *ptechk;
  442 + pte_t *pteret = ptep;
  443 + pte_t *pte_newpg[PMDS_PER_MID_PAGE];
  444 + pmd_t *pmdp;
  445 + unsigned int level;
  446 + unsigned long flags;
  447 + unsigned long vaddr;
  448 + int i;
517 449  
518   -static void free_p2m_page(void *p)
519   -{
520   - free_page((unsigned long)p);
  450 + /* Do all allocations first to bail out in error case. */
  451 + for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
  452 + pte_newpg[i] = alloc_p2m_page();
  453 + if (!pte_newpg[i]) {
  454 + for (i--; i >= 0; i--)
  455 + free_p2m_page(pte_newpg[i]);
  456 +
  457 + return NULL;
  458 + }
  459 + }
  460 +
  461 + vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1);
  462 +
  463 + for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
  464 + copy_page(pte_newpg[i], pte_pg);
  465 + paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT);
  466 +
  467 + pmdp = lookup_pmd_address(vaddr);
  468 + BUG_ON(!pmdp);
  469 +
  470 + spin_lock_irqsave(&p2m_update_lock, flags);
  471 +
  472 + ptechk = lookup_address(vaddr, &level);
  473 + if (ptechk == pte_pg) {
  474 + set_pmd(pmdp,
  475 + __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
  476 + if (vaddr == (addr & ~(PMD_SIZE - 1)))
  477 + pteret = pte_offset_kernel(pmdp, addr);
  478 + pte_newpg[i] = NULL;
  479 + }
  480 +
  481 + spin_unlock_irqrestore(&p2m_update_lock, flags);
  482 +
  483 + if (pte_newpg[i]) {
  484 + paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT);
  485 + free_p2m_page(pte_newpg[i]);
  486 + }
  487 +
  488 + vaddr += PMD_SIZE;
  489 + }
  490 +
  491 + return pteret;
521 492 }
522 493  
523 494 /*
524 495  
525 496  
526 497  
527 498  
528 499  
529 500  
530 501  
531 502  
532 503  
533 504  
534 505  
535 506  
... ... @@ -530,58 +501,62 @@
530 501 static bool alloc_p2m(unsigned long pfn)
531 502 {
532 503 unsigned topidx, mididx;
533   - unsigned long ***top_p, **mid;
534 504 unsigned long *top_mfn_p, *mid_mfn;
535   - unsigned long *p2m_orig;
  505 + pte_t *ptep, *pte_pg;
  506 + unsigned int level;
  507 + unsigned long flags;
  508 + unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
  509 + unsigned long p2m_pfn;
536 510  
537 511 topidx = p2m_top_index(pfn);
538 512 mididx = p2m_mid_index(pfn);
539 513  
540   - top_p = &p2m_top[topidx];
541   - mid = ACCESS_ONCE(*top_p);
  514 + ptep = lookup_address(addr, &level);
  515 + BUG_ON(!ptep || level != PG_LEVEL_4K);
  516 + pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
542 517  
543   - if (mid == p2m_mid_missing) {
544   - /* Mid level is missing, allocate a new one */
545   - mid = alloc_p2m_page();
546   - if (!mid)
  518 + if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
  519 + /* PMD level is missing, allocate a new one */
  520 + ptep = alloc_p2m_pmd(addr, ptep, pte_pg);
  521 + if (!ptep)
547 522 return false;
548   -
549   - p2m_mid_init(mid, p2m_missing);
550   -
551   - if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
552   - free_p2m_page(mid);
553 523 }
554 524  
555   - top_mfn_p = &p2m_top_mfn[topidx];
556   - mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
  525 + if (p2m_top_mfn) {
  526 + top_mfn_p = &p2m_top_mfn[topidx];
  527 + mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
557 528  
558   - BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
  529 + BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
559 530  
560   - if (mid_mfn == p2m_mid_missing_mfn) {
561   - /* Separately check the mid mfn level */
562   - unsigned long missing_mfn;
563   - unsigned long mid_mfn_mfn;
564   - unsigned long old_mfn;
  531 + if (mid_mfn == p2m_mid_missing_mfn) {
  532 + /* Separately check the mid mfn level */
  533 + unsigned long missing_mfn;
  534 + unsigned long mid_mfn_mfn;
  535 + unsigned long old_mfn;
565 536  
566   - mid_mfn = alloc_p2m_page();
567   - if (!mid_mfn)
568   - return false;
  537 + mid_mfn = alloc_p2m_page();
  538 + if (!mid_mfn)
  539 + return false;
569 540  
570   - p2m_mid_mfn_init(mid_mfn, p2m_missing);
  541 + p2m_mid_mfn_init(mid_mfn, p2m_missing);
571 542  
572   - missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
573   - mid_mfn_mfn = virt_to_mfn(mid_mfn);
574   - old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
575   - if (old_mfn != missing_mfn) {
576   - free_p2m_page(mid_mfn);
577   - mid_mfn = mfn_to_virt(old_mfn);
578   - } else {
579   - p2m_top_mfn_p[topidx] = mid_mfn;
  543 + missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
  544 + mid_mfn_mfn = virt_to_mfn(mid_mfn);
  545 + old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
  546 + if (old_mfn != missing_mfn) {
  547 + free_p2m_page(mid_mfn);
  548 + mid_mfn = mfn_to_virt(old_mfn);
  549 + } else {
  550 + p2m_top_mfn_p[topidx] = mid_mfn;
  551 + }
580 552 }
  553 + } else {
  554 + mid_mfn = NULL;
581 555 }
582 556  
583   - p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]);
584   - if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) {
  557 + p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep));
  558 + if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) ||
  559 + p2m_pfn == PFN_DOWN(__pa(p2m_missing))) {
585 560 /* p2m leaf page is missing */
586 561 unsigned long *p2m;
587 562  
588 563  
589 564  
590 565  
591 566  
592 567  
593 568  
594 569  
595 570  
... ... @@ -589,183 +564,36 @@
589 564 if (!p2m)
590 565 return false;
591 566  
592   - p2m_init(p2m);
593   -
594   - if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
595   - free_p2m_page(p2m);
  567 + if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
  568 + p2m_init(p2m);
596 569 else
597   - mid_mfn[mididx] = virt_to_mfn(p2m);
598   - }
  570 + p2m_init_identity(p2m, pfn);
599 571  
600   - return true;
601   -}
  572 + spin_lock_irqsave(&p2m_update_lock, flags);
602 573  
603   -static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
604   -{
605   - unsigned topidx, mididx, idx;
606   - unsigned long *p2m;
607   -
608   - topidx = p2m_top_index(pfn);
609   - mididx = p2m_mid_index(pfn);
610   - idx = p2m_index(pfn);
611   -
612   - /* Pfff.. No boundary cross-over, lets get out. */
613   - if (!idx && check_boundary)
614   - return false;
615   -
616   - WARN(p2m_top[topidx][mididx] == p2m_identity,
617   - "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
618   - topidx, mididx);
619   -
620   - /*
621   - * Could be done by xen_build_dynamic_phys_to_machine..
622   - */
623   - if (p2m_top[topidx][mididx] != p2m_missing)
624   - return false;
625   -
626   - /* Boundary cross-over for the edges: */
627   - p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
628   -
629   - p2m_init(p2m);
630   -
631   - p2m_top[topidx][mididx] = p2m;
632   -
633   - return true;
634   -}
635   -
636   -static bool __init early_alloc_p2m_middle(unsigned long pfn)
637   -{
638   - unsigned topidx = p2m_top_index(pfn);
639   - unsigned long **mid;
640   -
641   - mid = p2m_top[topidx];
642   - if (mid == p2m_mid_missing) {
643   - mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
644   -
645   - p2m_mid_init(mid, p2m_missing);
646   -
647   - p2m_top[topidx] = mid;
648   - }
649   - return true;
650   -}
651   -
652   -/*
653   - * Skim over the P2M tree looking at pages that are either filled with
654   - * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and
655   - * replace the P2M leaf with a p2m_missing or p2m_identity.
656   - * Stick the old page in the new P2M tree location.
657   - */
658   -static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn)
659   -{
660   - unsigned topidx;
661   - unsigned mididx;
662   - unsigned ident_pfns;
663   - unsigned inv_pfns;
664   - unsigned long *p2m;
665   - unsigned idx;
666   - unsigned long pfn;
667   -
668   - /* We only look when this entails a P2M middle layer */
669   - if (p2m_index(set_pfn))
670   - return false;
671   -
672   - for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
673   - topidx = p2m_top_index(pfn);
674   -
675   - if (!p2m_top[topidx])
676   - continue;
677   -
678   - if (p2m_top[topidx] == p2m_mid_missing)
679   - continue;
680   -
681   - mididx = p2m_mid_index(pfn);
682   - p2m = p2m_top[topidx][mididx];
683   - if (!p2m)
684   - continue;
685   -
686   - if ((p2m == p2m_missing) || (p2m == p2m_identity))
687   - continue;
688   -
689   - if ((unsigned long)p2m == INVALID_P2M_ENTRY)
690   - continue;
691   -
692   - ident_pfns = 0;
693   - inv_pfns = 0;
694   - for (idx = 0; idx < P2M_PER_PAGE; idx++) {
695   - /* IDENTITY_PFNs are 1:1 */
696   - if (p2m[idx] == IDENTITY_FRAME(pfn + idx))
697   - ident_pfns++;
698   - else if (p2m[idx] == INVALID_P2M_ENTRY)
699   - inv_pfns++;
700   - else
701   - break;
  574 + if (pte_pfn(*ptep) == p2m_pfn) {
  575 + set_pte(ptep,
  576 + pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
  577 + if (mid_mfn)
  578 + mid_mfn[mididx] = virt_to_mfn(p2m);
  579 + p2m = NULL;
702 580 }
703   - if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE))
704   - goto found;
705   - }
706   - return false;
707   -found:
708   - /* Found one, replace old with p2m_identity or p2m_missing */
709   - p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
710 581  
711   - /* Reset where we want to stick the old page in. */
712   - topidx = p2m_top_index(set_pfn);
713   - mididx = p2m_mid_index(set_pfn);
  582 + spin_unlock_irqrestore(&p2m_update_lock, flags);
714 583  
715   - /* This shouldn't happen */
716   - if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
717   - early_alloc_p2m_middle(set_pfn);
718   -
719   - if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
720   - return false;
721   -
722   - p2m_init(p2m);
723   - p2m_top[topidx][mididx] = p2m;
724   -
725   - return true;
726   -}
727   -bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
728   -{
729   - if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
730   - if (!early_alloc_p2m_middle(pfn))
731   - return false;
732   -
733   - if (early_can_reuse_p2m_middle(pfn))
734   - return __set_phys_to_machine(pfn, mfn);
735   -
736   - if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
737   - return false;
738   -
739   - if (!__set_phys_to_machine(pfn, mfn))
740   - return false;
  584 + if (p2m)
  585 + free_p2m_page(p2m);
741 586 }
742 587  
743 588 return true;
744 589 }
745 590  
746   -static void __init early_split_p2m(unsigned long pfn)
747   -{
748   - unsigned long mididx, idx;
749   -
750   - mididx = p2m_mid_index(pfn);
751   - idx = p2m_index(pfn);
752   -
753   - /*
754   - * Allocate new middle and leaf pages if this pfn lies in the
755   - * middle of one.
756   - */
757   - if (mididx || idx)
758   - early_alloc_p2m_middle(pfn);
759   - if (idx)
760   - early_alloc_p2m(pfn, false);
761   -}
762   -
763 591 unsigned long __init set_phys_range_identity(unsigned long pfn_s,
764 592 unsigned long pfn_e)
765 593 {
766 594 unsigned long pfn;
767 595  
768   - if (unlikely(pfn_s >= MAX_P2M_PFN))
  596 + if (unlikely(pfn_s >= xen_p2m_size))
769 597 return 0;
770 598  
771 599 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
772 600  
773 601  
774 602  
775 603  
776 604  
777 605  
778 606  
779 607  
780 608  
781 609  
782 610  
783 611  
... ... @@ -774,101 +602,51 @@
774 602 if (pfn_s > pfn_e)
775 603 return 0;
776 604  
777   - if (pfn_e > MAX_P2M_PFN)
778   - pfn_e = MAX_P2M_PFN;
  605 + if (pfn_e > xen_p2m_size)
  606 + pfn_e = xen_p2m_size;
779 607  
780   - early_split_p2m(pfn_s);
781   - early_split_p2m(pfn_e);
  608 + for (pfn = pfn_s; pfn < pfn_e; pfn++)
  609 + xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn);
782 610  
783   - for (pfn = pfn_s; pfn < pfn_e;) {
784   - unsigned topidx = p2m_top_index(pfn);
785   - unsigned mididx = p2m_mid_index(pfn);
786   -
787   - if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
788   - break;
789   - pfn++;
790   -
791   - /*
792   - * If the PFN was set to a middle or leaf identity
793   - * page the remainder must also be identity, so skip
794   - * ahead to the next middle or leaf entry.
795   - */
796   - if (p2m_top[topidx] == p2m_mid_identity)
797   - pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE);
798   - else if (p2m_top[topidx][mididx] == p2m_identity)
799   - pfn = ALIGN(pfn, P2M_PER_PAGE);
800   - }
801   -
802   - WARN((pfn - pfn_s) != (pfn_e - pfn_s),
803   - "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
804   - (pfn_e - pfn_s) - (pfn - pfn_s));
805   -
806 611 return pfn - pfn_s;
807 612 }
808 613  
809   -/* Try to install p2m mapping; fail if intermediate bits missing */
810 614 bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
811 615 {
812   - unsigned topidx, mididx, idx;
  616 + pte_t *ptep;
  617 + unsigned int level;
813 618  
814 619 /* don't track P2M changes in autotranslate guests */
815 620 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
816 621 return true;
817 622  
818   - if (unlikely(pfn >= MAX_P2M_PFN)) {
  623 + if (unlikely(pfn >= xen_p2m_size)) {
819 624 BUG_ON(mfn != INVALID_P2M_ENTRY);
820 625 return true;
821 626 }
822 627  
823   - topidx = p2m_top_index(pfn);
824   - mididx = p2m_mid_index(pfn);
825   - idx = p2m_index(pfn);
  628 + if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
  629 + return true;
826 630  
827   - /* For sparse holes were the p2m leaf has real PFN along with
828   - * PCI holes, stick in the PFN as the MFN value.
829   - *
830   - * set_phys_range_identity() will have allocated new middle
831   - * and leaf pages as required so an existing p2m_mid_missing
832   - * or p2m_missing mean that whole range will be identity so
833   - * these can be switched to p2m_mid_identity or p2m_identity.
834   - */
835   - if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
836   - if (p2m_top[topidx] == p2m_mid_identity)
837   - return true;
  631 + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
  632 + BUG_ON(!ptep || level != PG_LEVEL_4K);
838 633  
839   - if (p2m_top[topidx] == p2m_mid_missing) {
840   - WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing,
841   - p2m_mid_identity) != p2m_mid_missing);
842   - return true;
843   - }
844   -
845   - if (p2m_top[topidx][mididx] == p2m_identity)
846   - return true;
847   -
848   - /* Swap over from MISSING to IDENTITY if needed. */
849   - if (p2m_top[topidx][mididx] == p2m_missing) {
850   - WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
851   - p2m_identity) != p2m_missing);
852   - return true;
853   - }
854   - }
855   -
856   - if (p2m_top[topidx][mididx] == p2m_missing)
  634 + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing)))
857 635 return mfn == INVALID_P2M_ENTRY;
858 636  
859   - p2m_top[topidx][mididx][idx] = mfn;
  637 + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
  638 + return mfn == IDENTITY_FRAME(pfn);
860 639  
861   - return true;
  640 + return false;
862 641 }
863 642  
864 643 bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
865 644 {
866   - if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
  645 + if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
867 646 if (!alloc_p2m(pfn))
868 647 return false;
869 648  
870   - if (!__set_phys_to_machine(pfn, mfn))
871   - return false;
  649 + return __set_phys_to_machine(pfn, mfn);
872 650 }
873 651  
874 652 return true;
875 653  
... ... @@ -877,15 +655,16 @@
877 655 #define M2P_OVERRIDE_HASH_SHIFT 10
878 656 #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT)
879 657  
880   -static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH);
  658 +static struct list_head *m2p_overrides;
881 659 static DEFINE_SPINLOCK(m2p_override_lock);
882 660  
883 661 static void __init m2p_override_init(void)
884 662 {
885 663 unsigned i;
886 664  
887   - m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
888   - sizeof(unsigned long));
  665 + m2p_overrides = alloc_bootmem_align(
  666 + sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
  667 + sizeof(unsigned long));
889 668  
890 669 for (i = 0; i < M2P_OVERRIDE_HASH; i++)
891 670 INIT_LIST_HEAD(&m2p_overrides[i]);
892 671  
... ... @@ -896,68 +675,9 @@
896 675 return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
897 676 }
898 677  
899   -int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
900   - struct gnttab_map_grant_ref *kmap_ops,
901   - struct page **pages, unsigned int count)
902   -{
903   - int i, ret = 0;
904   - bool lazy = false;
905   - pte_t *pte;
906   -
907   - if (xen_feature(XENFEAT_auto_translated_physmap))
908   - return 0;
909   -
910   - if (kmap_ops &&
911   - !in_interrupt() &&
912   - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
913   - arch_enter_lazy_mmu_mode();
914   - lazy = true;
915   - }
916   -
917   - for (i = 0; i < count; i++) {
918   - unsigned long mfn, pfn;
919   -
920   - /* Do not add to override if the map failed. */
921   - if (map_ops[i].status)
922   - continue;
923   -
924   - if (map_ops[i].flags & GNTMAP_contains_pte) {
925   - pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
926   - (map_ops[i].host_addr & ~PAGE_MASK));
927   - mfn = pte_mfn(*pte);
928   - } else {
929   - mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
930   - }
931   - pfn = page_to_pfn(pages[i]);
932   -
933   - WARN_ON(PagePrivate(pages[i]));
934   - SetPagePrivate(pages[i]);
935   - set_page_private(pages[i], mfn);
936   - pages[i]->index = pfn_to_mfn(pfn);
937   -
938   - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
939   - ret = -ENOMEM;
940   - goto out;
941   - }
942   -
943   - if (kmap_ops) {
944   - ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
945   - if (ret)
946   - goto out;
947   - }
948   - }
949   -
950   -out:
951   - if (lazy)
952   - arch_leave_lazy_mmu_mode();
953   -
954   - return ret;
955   -}
956   -EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
957   -
958 678 /* Add an MFN override for a particular page */
959   -int m2p_add_override(unsigned long mfn, struct page *page,
960   - struct gnttab_map_grant_ref *kmap_op)
  679 +static int m2p_add_override(unsigned long mfn, struct page *page,
  680 + struct gnttab_map_grant_ref *kmap_op)
961 681 {
962 682 unsigned long flags;
963 683 unsigned long pfn;
... ... @@ -970,7 +690,7 @@
970 690 address = (unsigned long)__va(pfn << PAGE_SHIFT);
971 691 ptep = lookup_address(address, &level);
972 692 if (WARN(ptep == NULL || level != PG_LEVEL_4K,
973   - "m2p_add_override: pfn %lx not mapped", pfn))
  693 + "m2p_add_override: pfn %lx not mapped", pfn))
974 694 return -EINVAL;
975 695 }
976 696  
977 697  
978 698  
979 699  
... ... @@ -1004,19 +724,19 @@
1004 724 * because mfn_to_pfn (that ends up being called by GUPF) will
1005 725 * return the backend pfn rather than the frontend pfn. */
1006 726 pfn = mfn_to_pfn_no_overrides(mfn);
1007   - if (get_phys_to_machine(pfn) == mfn)
  727 + if (__pfn_to_mfn(pfn) == mfn)
1008 728 set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
1009 729  
1010 730 return 0;
1011 731 }
1012   -EXPORT_SYMBOL_GPL(m2p_add_override);
1013 732  
1014   -int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
1015   - struct gnttab_map_grant_ref *kmap_ops,
1016   - struct page **pages, unsigned int count)
  733 +int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
  734 + struct gnttab_map_grant_ref *kmap_ops,
  735 + struct page **pages, unsigned int count)
1017 736 {
1018 737 int i, ret = 0;
1019 738 bool lazy = false;
  739 + pte_t *pte;
1020 740  
1021 741 if (xen_feature(XENFEAT_auto_translated_physmap))
1022 742 return 0;
1023 743  
1024 744  
1025 745  
1026 746  
1027 747  
1028 748  
1029 749  
1030 750  
1031 751  
... ... @@ -1029,37 +749,77 @@
1029 749 }
1030 750  
1031 751 for (i = 0; i < count; i++) {
1032   - unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i]));
1033   - unsigned long pfn = page_to_pfn(pages[i]);
  752 + unsigned long mfn, pfn;
1034 753  
1035   - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
1036   - ret = -EINVAL;
1037   - goto out;
  754 + /* Do not add to override if the map failed. */
  755 + if (map_ops[i].status)
  756 + continue;
  757 +
  758 + if (map_ops[i].flags & GNTMAP_contains_pte) {
  759 + pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
  760 + (map_ops[i].host_addr & ~PAGE_MASK));
  761 + mfn = pte_mfn(*pte);
  762 + } else {
  763 + mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
1038 764 }
  765 + pfn = page_to_pfn(pages[i]);
1039 766  
1040   - set_page_private(pages[i], INVALID_P2M_ENTRY);
1041   - WARN_ON(!PagePrivate(pages[i]));
1042   - ClearPagePrivate(pages[i]);
1043   - set_phys_to_machine(pfn, pages[i]->index);
  767 + WARN_ON(PagePrivate(pages[i]));
  768 + SetPagePrivate(pages[i]);
  769 + set_page_private(pages[i], mfn);
  770 + pages[i]->index = pfn_to_mfn(pfn);
1044 771  
1045   - if (kmap_ops)
1046   - ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
1047   - if (ret)
  772 + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
  773 + ret = -ENOMEM;
1048 774 goto out;
  775 + }
  776 +
  777 + if (kmap_ops) {
  778 + ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
  779 + if (ret)
  780 + goto out;
  781 + }
1049 782 }
1050 783  
1051 784 out:
1052 785 if (lazy)
1053 786 arch_leave_lazy_mmu_mode();
  787 +
1054 788 return ret;
1055 789 }
1056   -EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
  790 +EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
1057 791  
1058   -int m2p_remove_override(struct page *page,
1059   - struct gnttab_map_grant_ref *kmap_op,
1060   - unsigned long mfn)
  792 +static struct page *m2p_find_override(unsigned long mfn)
1061 793 {
1062 794 unsigned long flags;
  795 + struct list_head *bucket;
  796 + struct page *p, *ret;
  797 +
  798 + if (unlikely(!m2p_overrides))
  799 + return NULL;
  800 +
  801 + ret = NULL;
  802 + bucket = &m2p_overrides[mfn_hash(mfn)];
  803 +
  804 + spin_lock_irqsave(&m2p_override_lock, flags);
  805 +
  806 + list_for_each_entry(p, bucket, lru) {
  807 + if (page_private(p) == mfn) {
  808 + ret = p;
  809 + break;
  810 + }
  811 + }
  812 +
  813 + spin_unlock_irqrestore(&m2p_override_lock, flags);
  814 +
  815 + return ret;
  816 +}
  817 +
  818 +static int m2p_remove_override(struct page *page,
  819 + struct gnttab_map_grant_ref *kmap_op,
  820 + unsigned long mfn)
  821 +{
  822 + unsigned long flags;
1063 823 unsigned long pfn;
1064 824 unsigned long uninitialized_var(address);
1065 825 unsigned level;
... ... @@ -1072,7 +832,7 @@
1072 832 ptep = lookup_address(address, &level);
1073 833  
1074 834 if (WARN(ptep == NULL || level != PG_LEVEL_4K,
1075   - "m2p_remove_override: pfn %lx not mapped", pfn))
  835 + "m2p_remove_override: pfn %lx not mapped", pfn))
1076 836 return -EINVAL;
1077 837 }
1078 838  
... ... @@ -1102,9 +862,8 @@
1102 862 * hypercall actually returned an error.
1103 863 */
1104 864 if (kmap_op->handle == GNTST_general_error) {
1105   - printk(KERN_WARNING "m2p_remove_override: "
1106   - "pfn %lx mfn %lx, failed to modify kernel mappings",
1107   - pfn, mfn);
  865 + pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings",
  866 + pfn, mfn);
1108 867 put_balloon_scratch_page();
1109 868 return -1;
1110 869 }
1111 870  
... ... @@ -1112,14 +871,14 @@
1112 871 xen_mc_batch();
1113 872  
1114 873 mcs = __xen_mc_entry(
1115   - sizeof(struct gnttab_unmap_and_replace));
  874 + sizeof(struct gnttab_unmap_and_replace));
1116 875 unmap_op = mcs.args;
1117 876 unmap_op->host_addr = kmap_op->host_addr;
1118 877 unmap_op->new_addr = scratch_page_address;
1119 878 unmap_op->handle = kmap_op->handle;
1120 879  
1121 880 MULTI_grant_table_op(mcs.mc,
1122   - GNTTABOP_unmap_and_replace, unmap_op, 1);
  881 + GNTTABOP_unmap_and_replace, unmap_op, 1);
1123 882  
1124 883 mcs = __xen_mc_entry(0);
1125 884 MULTI_update_va_mapping(mcs.mc, scratch_page_address,
1126 885  
1127 886  
1128 887  
1129 888  
1130 889  
1131 890  
1132 891  
1133 892  
1134 893  
1135 894  
... ... @@ -1145,35 +904,56 @@
1145 904 * pfn again. */
1146 905 mfn &= ~FOREIGN_FRAME_BIT;
1147 906 pfn = mfn_to_pfn_no_overrides(mfn);
1148   - if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
  907 + if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) &&
1149 908 m2p_find_override(mfn) == NULL)
1150 909 set_phys_to_machine(pfn, mfn);
1151 910  
1152 911 return 0;
1153 912 }
1154   -EXPORT_SYMBOL_GPL(m2p_remove_override);
1155 913  
1156   -struct page *m2p_find_override(unsigned long mfn)
  914 +int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
  915 + struct gnttab_map_grant_ref *kmap_ops,
  916 + struct page **pages, unsigned int count)
1157 917 {
1158   - unsigned long flags;
1159   - struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)];
1160   - struct page *p, *ret;
  918 + int i, ret = 0;
  919 + bool lazy = false;
1161 920  
1162   - ret = NULL;
  921 + if (xen_feature(XENFEAT_auto_translated_physmap))
  922 + return 0;
1163 923  
1164   - spin_lock_irqsave(&m2p_override_lock, flags);
  924 + if (kmap_ops &&
  925 + !in_interrupt() &&
  926 + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
  927 + arch_enter_lazy_mmu_mode();
  928 + lazy = true;
  929 + }
1165 930  
1166   - list_for_each_entry(p, bucket, lru) {
1167   - if (page_private(p) == mfn) {
1168   - ret = p;
1169   - break;
  931 + for (i = 0; i < count; i++) {
  932 + unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
  933 + unsigned long pfn = page_to_pfn(pages[i]);
  934 +
  935 + if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
  936 + ret = -EINVAL;
  937 + goto out;
1170 938 }
1171   - }
1172 939  
1173   - spin_unlock_irqrestore(&m2p_override_lock, flags);
  940 + set_page_private(pages[i], INVALID_P2M_ENTRY);
  941 + WARN_ON(!PagePrivate(pages[i]));
  942 + ClearPagePrivate(pages[i]);
  943 + set_phys_to_machine(pfn, pages[i]->index);
1174 944  
  945 + if (kmap_ops)
  946 + ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
  947 + if (ret)
  948 + goto out;
  949 + }
  950 +
  951 +out:
  952 + if (lazy)
  953 + arch_leave_lazy_mmu_mode();
1175 954 return ret;
1176 955 }
  956 +EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
1177 957  
1178 958 unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
1179 959 {
1180 960  
1181 961  
1182 962  
1183 963  
1184 964  
1185 965  
1186 966  
... ... @@ -1192,79 +972,29 @@
1192 972 #include "debugfs.h"
1193 973 static int p2m_dump_show(struct seq_file *m, void *v)
1194 974 {
1195   - static const char * const level_name[] = { "top", "middle",
1196   - "entry", "abnormal", "error"};
1197   -#define TYPE_IDENTITY 0
1198   -#define TYPE_MISSING 1
1199   -#define TYPE_PFN 2
1200   -#define TYPE_UNKNOWN 3
1201 975 static const char * const type_name[] = {
1202   - [TYPE_IDENTITY] = "identity",
1203   - [TYPE_MISSING] = "missing",
1204   - [TYPE_PFN] = "pfn",
1205   - [TYPE_UNKNOWN] = "abnormal"};
1206   - unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
1207   - unsigned int uninitialized_var(prev_level);
1208   - unsigned int uninitialized_var(prev_type);
  976 + [P2M_TYPE_IDENTITY] = "identity",
  977 + [P2M_TYPE_MISSING] = "missing",
  978 + [P2M_TYPE_PFN] = "pfn",
  979 + [P2M_TYPE_UNKNOWN] = "abnormal"};
  980 + unsigned long pfn, first_pfn;
  981 + int type, prev_type;
1209 982  
1210   - if (!p2m_top)
1211   - return 0;
  983 + prev_type = xen_p2m_elem_type(0);
  984 + first_pfn = 0;
1212 985  
1213   - for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
1214   - unsigned topidx = p2m_top_index(pfn);
1215   - unsigned mididx = p2m_mid_index(pfn);
1216   - unsigned idx = p2m_index(pfn);
1217   - unsigned lvl, type;
1218   -
1219   - lvl = 4;
1220   - type = TYPE_UNKNOWN;
1221   - if (p2m_top[topidx] == p2m_mid_missing) {
1222   - lvl = 0; type = TYPE_MISSING;
1223   - } else if (p2m_top[topidx] == NULL) {
1224   - lvl = 0; type = TYPE_UNKNOWN;
1225   - } else if (p2m_top[topidx][mididx] == NULL) {
1226   - lvl = 1; type = TYPE_UNKNOWN;
1227   - } else if (p2m_top[topidx][mididx] == p2m_identity) {
1228   - lvl = 1; type = TYPE_IDENTITY;
1229   - } else if (p2m_top[topidx][mididx] == p2m_missing) {
1230   - lvl = 1; type = TYPE_MISSING;
1231   - } else if (p2m_top[topidx][mididx][idx] == 0) {
1232   - lvl = 2; type = TYPE_UNKNOWN;
1233   - } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
1234   - lvl = 2; type = TYPE_IDENTITY;
1235   - } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
1236   - lvl = 2; type = TYPE_MISSING;
1237   - } else if (p2m_top[topidx][mididx][idx] == pfn) {
1238   - lvl = 2; type = TYPE_PFN;
1239   - } else if (p2m_top[topidx][mididx][idx] != pfn) {
1240   - lvl = 2; type = TYPE_PFN;
1241   - }
1242   - if (pfn == 0) {
1243   - prev_level = lvl;
  986 + for (pfn = 0; pfn < xen_p2m_size; pfn++) {
  987 + type = xen_p2m_elem_type(pfn);
  988 + if (type != prev_type) {
  989 + seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
  990 + type_name[prev_type]);
1244 991 prev_type = type;
  992 + first_pfn = pfn;
1245 993 }
1246   - if (pfn == MAX_DOMAIN_PAGES-1) {
1247   - lvl = 3;
1248   - type = TYPE_UNKNOWN;
1249   - }
1250   - if (prev_type != type) {
1251   - seq_printf(m, " [0x%lx->0x%lx] %s\n",
1252   - prev_pfn_type, pfn, type_name[prev_type]);
1253   - prev_pfn_type = pfn;
1254   - prev_type = type;
1255   - }
1256   - if (prev_level != lvl) {
1257   - seq_printf(m, " [0x%lx->0x%lx] level %s\n",
1258   - prev_pfn_level, pfn, level_name[prev_level]);
1259   - prev_pfn_level = pfn;
1260   - prev_level = lvl;
1261   - }
1262 994 }
  995 + seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
  996 + type_name[prev_type]);
1263 997 return 0;
1264   -#undef TYPE_IDENTITY
1265   -#undef TYPE_MISSING
1266   -#undef TYPE_PFN
1267   -#undef TYPE_UNKNOWN
1268 998 }
1269 999  
1270 1000 static int p2m_dump_open(struct inode *inode, struct file *filp)
arch/x86/xen/setup.c
... ... @@ -30,6 +30,7 @@
30 30 #include "xen-ops.h"
31 31 #include "vdso.h"
32 32 #include "p2m.h"
  33 +#include "mmu.h"
33 34  
34 35 /* These are code, but not functions. Defined in entry.S */
35 36 extern const char xen_hypervisor_callback[];
... ... @@ -47,8 +48,19 @@
47 48 /* Number of pages released from the initial allocation. */
48 49 unsigned long xen_released_pages;
49 50  
50   -/* Buffer used to remap identity mapped pages */
51   -unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
  51 +/*
  52 + * Buffer used to remap identity mapped pages. We only need the virtual space.
  53 + * The physical page behind this address is remapped as needed to different
  54 + * buffer pages.
  55 + */
  56 +#define REMAP_SIZE (P2M_PER_PAGE - 3)
  57 +static struct {
  58 + unsigned long next_area_mfn;
  59 + unsigned long target_pfn;
  60 + unsigned long size;
  61 + unsigned long mfns[REMAP_SIZE];
  62 +} xen_remap_buf __initdata __aligned(PAGE_SIZE);
  63 +static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
52 64  
53 65 /*
54 66 * The maximum amount of extra memory compared to the base size. The
... ... @@ -64,7 +76,6 @@
64 76  
65 77 static void __init xen_add_extra_mem(u64 start, u64 size)
66 78 {
67   - unsigned long pfn;
68 79 int i;
69 80  
70 81 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
71 82  
72 83  
73 84  
74 85  
75 86  
76 87  
77 88  
78 89  
79 90  
80 91  
81 92  
... ... @@ -84,75 +95,76 @@
84 95 printk(KERN_WARNING "Warning: not enough extra memory regions\n");
85 96  
86 97 memblock_reserve(start, size);
  98 +}
87 99  
88   - xen_max_p2m_pfn = PFN_DOWN(start + size);
89   - for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
90   - unsigned long mfn = pfn_to_mfn(pfn);
  100 +static void __init xen_del_extra_mem(u64 start, u64 size)
  101 +{
  102 + int i;
  103 + u64 start_r, size_r;
91 104  
92   - if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
93   - continue;
94   - WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
95   - pfn, mfn);
  105 + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  106 + start_r = xen_extra_mem[i].start;
  107 + size_r = xen_extra_mem[i].size;
96 108  
97   - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
  109 + /* Start of region. */
  110 + if (start_r == start) {
  111 + BUG_ON(size > size_r);
  112 + xen_extra_mem[i].start += size;
  113 + xen_extra_mem[i].size -= size;
  114 + break;
  115 + }
  116 + /* End of region. */
  117 + if (start_r + size_r == start + size) {
  118 + BUG_ON(size > size_r);
  119 + xen_extra_mem[i].size -= size;
  120 + break;
  121 + }
  122 + /* Mid of region. */
  123 + if (start > start_r && start < start_r + size_r) {
  124 + BUG_ON(start + size > start_r + size_r);
  125 + xen_extra_mem[i].size = start - start_r;
  126 + /* Calling memblock_reserve() again is okay. */
  127 + xen_add_extra_mem(start + size, start_r + size_r -
  128 + (start + size));
  129 + break;
  130 + }
98 131 }
  132 + memblock_free(start, size);
99 133 }
100 134  
101   -static unsigned long __init xen_do_chunk(unsigned long start,
102   - unsigned long end, bool release)
  135 +/*
  136 + * Called during boot before the p2m list can take entries beyond the
  137 + * hypervisor supplied p2m list. Entries in extra mem are to be regarded as
  138 + * invalid.
  139 + */
  140 +unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
103 141 {
104   - struct xen_memory_reservation reservation = {
105   - .address_bits = 0,
106   - .extent_order = 0,
107   - .domid = DOMID_SELF
108   - };
109   - unsigned long len = 0;
110   - unsigned long pfn;
111   - int ret;
  142 + int i;
  143 + unsigned long addr = PFN_PHYS(pfn);
112 144  
113   - for (pfn = start; pfn < end; pfn++) {
114   - unsigned long frame;
115   - unsigned long mfn = pfn_to_mfn(pfn);
  145 + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  146 + if (addr >= xen_extra_mem[i].start &&
  147 + addr < xen_extra_mem[i].start + xen_extra_mem[i].size)
  148 + return INVALID_P2M_ENTRY;
  149 + }
116 150  
117   - if (release) {
118   - /* Make sure pfn exists to start with */
119   - if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
120   - continue;
121   - frame = mfn;
122   - } else {
123   - if (mfn != INVALID_P2M_ENTRY)
124   - continue;
125   - frame = pfn;
126   - }
127   - set_xen_guest_handle(reservation.extent_start, &frame);
128   - reservation.nr_extents = 1;
  151 + return IDENTITY_FRAME(pfn);
  152 +}
129 153  
130   - ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
131   - &reservation);
132   - WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
133   - release ? "release" : "populate", pfn, ret);
  154 +/*
  155 + * Mark all pfns of extra mem as invalid in p2m list.
  156 + */
  157 +void __init xen_inv_extra_mem(void)
  158 +{
  159 + unsigned long pfn, pfn_s, pfn_e;
  160 + int i;
134 161  
135   - if (ret == 1) {
136   - if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
137   - if (release)
138   - break;
139   - set_xen_guest_handle(reservation.extent_start, &frame);
140   - reservation.nr_extents = 1;
141   - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
142   - &reservation);
143   - break;
144   - }
145   - len++;
146   - } else
147   - break;
  162 + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  163 + pfn_s = PFN_DOWN(xen_extra_mem[i].start);
  164 + pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
  165 + for (pfn = pfn_s; pfn < pfn_e; pfn++)
  166 + set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
148 167 }
149   - if (len)
150   - printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
151   - release ? "Freeing" : "Populating",
152   - start, end, len,
153   - release ? "freed" : "added");
154   -
155   - return len;
156 168 }
157 169  
158 170 /*
159 171  
160 172  
161 173  
162 174  
163 175  
164 176  
... ... @@ -198,26 +210,62 @@
198 210 return done;
199 211 }
200 212  
  213 +static int __init xen_free_mfn(unsigned long mfn)
  214 +{
  215 + struct xen_memory_reservation reservation = {
  216 + .address_bits = 0,
  217 + .extent_order = 0,
  218 + .domid = DOMID_SELF
  219 + };
  220 +
  221 + set_xen_guest_handle(reservation.extent_start, &mfn);
  222 + reservation.nr_extents = 1;
  223 +
  224 + return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
  225 +}
  226 +
201 227 /*
202   - * This releases a chunk of memory and then does the identity map. It's used as
  228 + * This releases a chunk of memory and then does the identity map. It's used
203 229 * as a fallback if the remapping fails.
204 230 */
205 231 static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
206 232 unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
207 233 unsigned long *released)
208 234 {
  235 + unsigned long len = 0;
  236 + unsigned long pfn, end;
  237 + int ret;
  238 +
209 239 WARN_ON(start_pfn > end_pfn);
210 240  
  241 + end = min(end_pfn, nr_pages);
  242 + for (pfn = start_pfn; pfn < end; pfn++) {
  243 + unsigned long mfn = pfn_to_mfn(pfn);
  244 +
  245 + /* Make sure pfn exists to start with */
  246 + if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
  247 + continue;
  248 +
  249 + ret = xen_free_mfn(mfn);
  250 + WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
  251 +
  252 + if (ret == 1) {
  253 + if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
  254 + break;
  255 + len++;
  256 + } else
  257 + break;
  258 + }
  259 +
211 260 /* Need to release pages first */
212   - *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
  261 + *released += len;
213 262 *identity += set_phys_range_identity(start_pfn, end_pfn);
214 263 }
215 264  
216 265 /*
217   - * Helper function to update both the p2m and m2p tables.
  266 + * Helper function to update the p2m and m2p tables and kernel mapping.
218 267 */
219   -static unsigned long __init xen_update_mem_tables(unsigned long pfn,
220   - unsigned long mfn)
  268 +static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
221 269 {
222 270 struct mmu_update update = {
223 271 .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
224 272  
225 273  
226 274  
227 275  
228 276  
229 277  
230 278  
231 279  
232 280  
233 281  
234 282  
235 283  
236 284  
237 285  
238 286  
239 287  
... ... @@ -225,161 +273,88 @@
225 273 };
226 274  
227 275 /* Update p2m */
228   - if (!early_set_phys_to_machine(pfn, mfn)) {
  276 + if (!set_phys_to_machine(pfn, mfn)) {
229 277 WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
230 278 pfn, mfn);
231   - return false;
  279 + BUG();
232 280 }
233 281  
234 282 /* Update m2p */
235 283 if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
236 284 WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
237 285 mfn, pfn);
238   - return false;
  286 + BUG();
239 287 }
240 288  
241   - return true;
  289 + /* Update kernel mapping, but not for highmem. */
  290 + if ((pfn << PAGE_SHIFT) >= __pa(high_memory))
  291 + return;
  292 +
  293 + if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
  294 + mfn_pte(mfn, PAGE_KERNEL), 0)) {
  295 + WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
  296 + mfn, pfn);
  297 + BUG();
  298 + }
242 299 }
243 300  
244 301 /*
245 302 * This function updates the p2m and m2p tables with an identity map from
246   - * start_pfn to start_pfn+size and remaps the underlying RAM of the original
247   - * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
248   - * to not exhaust the reserved brk space. Doing it in properly aligned blocks
249   - * ensures we only allocate the minimum required leaf pages in the p2m table. It
250   - * copies the existing mfns from the p2m table under the 1:1 map, overwrites
251   - * them with the identity map and then updates the p2m and m2p tables with the
252   - * remapped memory.
  303 + * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
  304 + * original allocation at remap_pfn. The information needed for remapping is
  305 + * saved in the memory itself to avoid the need for allocating buffers. The
  306 + * complete remap information is contained in a list of MFNs each containing
  307 + * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
  308 + * This enables us to preserve the original mfn sequence while doing the
  309 + * remapping at a time when the memory management is capable of allocating
  310 + * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
  311 + * its callers.
253 312 */
254   -static unsigned long __init xen_do_set_identity_and_remap_chunk(
  313 +static void __init xen_do_set_identity_and_remap_chunk(
255 314 unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
256 315 {
  316 + unsigned long buf = (unsigned long)&xen_remap_buf;
  317 + unsigned long mfn_save, mfn;
257 318 unsigned long ident_pfn_iter, remap_pfn_iter;
258   - unsigned long ident_start_pfn_align, remap_start_pfn_align;
259   - unsigned long ident_end_pfn_align, remap_end_pfn_align;
260   - unsigned long ident_boundary_pfn, remap_boundary_pfn;
261   - unsigned long ident_cnt = 0;
262   - unsigned long remap_cnt = 0;
  319 + unsigned long ident_end_pfn = start_pfn + size;
263 320 unsigned long left = size;
264   - unsigned long mod;
265   - int i;
  321 + unsigned long ident_cnt = 0;
  322 + unsigned int i, chunk;
266 323  
267 324 WARN_ON(size == 0);
268 325  
269 326 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
270 327  
271   - /*
272   - * Determine the proper alignment to remap memory in P2M_PER_PAGE sized
273   - * blocks. We need to keep track of both the existing pfn mapping and
274   - * the new pfn remapping.
275   - */
276   - mod = start_pfn % P2M_PER_PAGE;
277   - ident_start_pfn_align =
278   - mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
279   - mod = remap_pfn % P2M_PER_PAGE;
280   - remap_start_pfn_align =
281   - mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
282   - mod = (start_pfn + size) % P2M_PER_PAGE;
283   - ident_end_pfn_align = start_pfn + size - mod;
284   - mod = (remap_pfn + size) % P2M_PER_PAGE;
285   - remap_end_pfn_align = remap_pfn + size - mod;
  328 + mfn_save = virt_to_mfn(buf);
286 329  
287   - /* Iterate over each p2m leaf node in each range */
288   - for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
289   - ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
290   - ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
291   - /* Check we aren't past the end */
292   - BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
293   - BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
  330 + for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
  331 + ident_pfn_iter < ident_end_pfn;
  332 + ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
  333 + chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
294 334  
295   - /* Save p2m mappings */
296   - for (i = 0; i < P2M_PER_PAGE; i++)
297   - xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
  335 + /* Map first pfn to xen_remap_buf */
  336 + mfn = pfn_to_mfn(ident_pfn_iter);
  337 + set_pte_mfn(buf, mfn, PAGE_KERNEL);
298 338  
299   - /* Set identity map which will free a p2m leaf */
300   - ident_cnt += set_phys_range_identity(ident_pfn_iter,
301   - ident_pfn_iter + P2M_PER_PAGE);
  339 + /* Save mapping information in page */
  340 + xen_remap_buf.next_area_mfn = xen_remap_mfn;
  341 + xen_remap_buf.target_pfn = remap_pfn_iter;
  342 + xen_remap_buf.size = chunk;
  343 + for (i = 0; i < chunk; i++)
  344 + xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
302 345  
303   -#ifdef DEBUG
304   - /* Helps verify a p2m leaf has been freed */
305   - for (i = 0; i < P2M_PER_PAGE; i++) {
306   - unsigned int pfn = ident_pfn_iter + i;
307   - BUG_ON(pfn_to_mfn(pfn) != pfn);
308   - }
309   -#endif
310   - /* Now remap memory */
311   - for (i = 0; i < P2M_PER_PAGE; i++) {
312   - unsigned long mfn = xen_remap_buf[i];
  346 + /* Put remap buf into list. */
  347 + xen_remap_mfn = mfn;
313 348  
314   - /* This will use the p2m leaf freed above */
315   - if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
316   - WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
317   - remap_pfn_iter + i, mfn);
318   - return 0;
319   - }
  349 + /* Set identity map */
  350 + ident_cnt += set_phys_range_identity(ident_pfn_iter,
  351 + ident_pfn_iter + chunk);
320 352  
321   - remap_cnt++;
322   - }
323   -
324   - left -= P2M_PER_PAGE;
  353 + left -= chunk;
325 354 }
326 355  
327   - /* Max boundary space possible */
328   - BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
329   -
330   - /* Now handle the boundary conditions */
331   - ident_boundary_pfn = start_pfn;
332   - remap_boundary_pfn = remap_pfn;
333   - for (i = 0; i < left; i++) {
334   - unsigned long mfn;
335   -
336   - /* These two checks move from the start to end boundaries */
337   - if (ident_boundary_pfn == ident_start_pfn_align)
338   - ident_boundary_pfn = ident_pfn_iter;
339   - if (remap_boundary_pfn == remap_start_pfn_align)
340   - remap_boundary_pfn = remap_pfn_iter;
341   -
342   - /* Check we aren't past the end */
343   - BUG_ON(ident_boundary_pfn >= start_pfn + size);
344   - BUG_ON(remap_boundary_pfn >= remap_pfn + size);
345   -
346   - mfn = pfn_to_mfn(ident_boundary_pfn);
347   -
348   - if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
349   - WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
350   - remap_pfn_iter + i, mfn);
351   - return 0;
352   - }
353   - remap_cnt++;
354   -
355   - ident_boundary_pfn++;
356   - remap_boundary_pfn++;
357   - }
358   -
359   - /* Finish up the identity map */
360   - if (ident_start_pfn_align >= ident_end_pfn_align) {
361   - /*
362   - * In this case we have an identity range which does not span an
363   - * aligned block so everything needs to be identity mapped here.
364   - * If we didn't check this we might remap too many pages since
365   - * the align boundaries are not meaningful in this case.
366   - */
367   - ident_cnt += set_phys_range_identity(start_pfn,
368   - start_pfn + size);
369   - } else {
370   - /* Remapped above so check each end of the chunk */
371   - if (start_pfn < ident_start_pfn_align)
372   - ident_cnt += set_phys_range_identity(start_pfn,
373   - ident_start_pfn_align);
374   - if (start_pfn + size > ident_pfn_iter)
375   - ident_cnt += set_phys_range_identity(ident_pfn_iter,
376   - start_pfn + size);
377   - }
378   -
379   - BUG_ON(ident_cnt != size);
380   - BUG_ON(remap_cnt != size);
381   -
382   - return size;
  356 + /* Restore old xen_remap_buf mapping */
  357 + set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
383 358 }
384 359  
385 360 /*
... ... @@ -396,8 +371,7 @@
396 371 static unsigned long __init xen_set_identity_and_remap_chunk(
397 372 const struct e820entry *list, size_t map_size, unsigned long start_pfn,
398 373 unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
399   - unsigned long *identity, unsigned long *remapped,
400   - unsigned long *released)
  374 + unsigned long *identity, unsigned long *released)
401 375 {
402 376 unsigned long pfn;
403 377 unsigned long i = 0;
404 378  
... ... @@ -431,19 +405,12 @@
431 405 if (size > remap_range_size)
432 406 size = remap_range_size;
433 407  
434   - if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) {
435   - WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
436   - cur_pfn, size, remap_pfn);
437   - xen_set_identity_and_release_chunk(cur_pfn,
438   - cur_pfn + left, nr_pages, identity, released);
439   - break;
440   - }
  408 + xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
441 409  
442 410 /* Update variables to reflect new mappings. */
443 411 i += size;
444 412 remap_pfn += size;
445 413 *identity += size;
446   - *remapped += size;
447 414 }
448 415  
449 416 /*
450 417  
... ... @@ -458,13 +425,12 @@
458 425 return remap_pfn;
459 426 }
460 427  
461   -static unsigned long __init xen_set_identity_and_remap(
  428 +static void __init xen_set_identity_and_remap(
462 429 const struct e820entry *list, size_t map_size, unsigned long nr_pages,
463 430 unsigned long *released)
464 431 {
465 432 phys_addr_t start = 0;
466 433 unsigned long identity = 0;
467   - unsigned long remapped = 0;
468 434 unsigned long last_pfn = nr_pages;
469 435 const struct e820entry *entry;
470 436 unsigned long num_released = 0;
... ... @@ -494,8 +460,7 @@
494 460 last_pfn = xen_set_identity_and_remap_chunk(
495 461 list, map_size, start_pfn,
496 462 end_pfn, nr_pages, last_pfn,
497   - &identity, &remapped,
498   - &num_released);
  463 + &identity, &num_released);
499 464 start = end;
500 465 }
501 466 }
502 467  
503 468  
504 469  
... ... @@ -503,12 +468,63 @@
503 468 *released = num_released;
504 469  
505 470 pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
506   - pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
507   - last_pfn);
508 471 pr_info("Released %ld page(s)\n", num_released);
  472 +}
509 473  
510   - return last_pfn;
  474 +/*
  475 + * Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
  476 + * The remap information (which mfn remap to which pfn) is contained in the
  477 + * to be remapped memory itself in a linked list anchored at xen_remap_mfn.
  478 + * This scheme allows to remap the different chunks in arbitrary order while
  479 + * the resulting mapping will be independant from the order.
  480 + */
  481 +void __init xen_remap_memory(void)
  482 +{
  483 + unsigned long buf = (unsigned long)&xen_remap_buf;
  484 + unsigned long mfn_save, mfn, pfn;
  485 + unsigned long remapped = 0;
  486 + unsigned int i;
  487 + unsigned long pfn_s = ~0UL;
  488 + unsigned long len = 0;
  489 +
  490 + mfn_save = virt_to_mfn(buf);
  491 +
  492 + while (xen_remap_mfn != INVALID_P2M_ENTRY) {
  493 + /* Map the remap information */
  494 + set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
  495 +
  496 + BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
  497 +
  498 + pfn = xen_remap_buf.target_pfn;
  499 + for (i = 0; i < xen_remap_buf.size; i++) {
  500 + mfn = xen_remap_buf.mfns[i];
  501 + xen_update_mem_tables(pfn, mfn);
  502 + remapped++;
  503 + pfn++;
  504 + }
  505 + if (pfn_s == ~0UL || pfn == pfn_s) {
  506 + pfn_s = xen_remap_buf.target_pfn;
  507 + len += xen_remap_buf.size;
  508 + } else if (pfn_s + len == xen_remap_buf.target_pfn) {
  509 + len += xen_remap_buf.size;
  510 + } else {
  511 + xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
  512 + pfn_s = xen_remap_buf.target_pfn;
  513 + len = xen_remap_buf.size;
  514 + }
  515 +
  516 + mfn = xen_remap_mfn;
  517 + xen_remap_mfn = xen_remap_buf.next_area_mfn;
  518 + }
  519 +
  520 + if (pfn_s != ~0UL && len)
  521 + xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
  522 +
  523 + set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
  524 +
  525 + pr_info("Remapped %ld page(s)\n", remapped);
511 526 }
  527 +
512 528 static unsigned long __init xen_get_max_pages(void)
513 529 {
514 530 unsigned long max_pages = MAX_DOMAIN_PAGES;
... ... @@ -569,7 +585,6 @@
569 585 int rc;
570 586 struct xen_memory_map memmap;
571 587 unsigned long max_pages;
572   - unsigned long last_pfn = 0;
573 588 unsigned long extra_pages = 0;
574 589 int i;
575 590 int op;
576 591  
577 592  
... ... @@ -616,17 +631,14 @@
616 631 extra_pages += max_pages - max_pfn;
617 632  
618 633 /*
619   - * Set identity map on non-RAM pages and remap the underlying RAM.
  634 + * Set identity map on non-RAM pages and prepare remapping the
  635 + * underlying RAM.
620 636 */
621   - last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
622   - &xen_released_pages);
  637 + xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
  638 + &xen_released_pages);
623 639  
624 640 extra_pages += xen_released_pages;
625 641  
626   - if (last_pfn > max_pfn) {
627   - max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);
628   - mem_end = PFN_PHYS(max_pfn);
629   - }
630 642 /*
631 643 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
632 644 * factor the base size. On non-highmem systems, the base
... ... @@ -653,6 +665,7 @@
653 665 size = min(size, (u64)extra_pages * PAGE_SIZE);
654 666 extra_pages -= size / PAGE_SIZE;
655 667 xen_add_extra_mem(addr, size);
  668 + xen_max_p2m_pfn = PFN_DOWN(addr + size);
656 669 } else
657 670 type = E820_UNUSABLE;
658 671 }
arch/x86/xen/xen-ops.h
... ... @@ -29,11 +29,13 @@
29 29 void xen_setup_machphys_mapping(void);
30 30 void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
31 31 void xen_reserve_top(void);
32   -extern unsigned long xen_max_p2m_pfn;
33 32  
34 33 void xen_mm_pin_all(void);
35 34 void xen_mm_unpin_all(void);
36 35  
  36 +unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
  37 +void __init xen_inv_extra_mem(void);
  38 +void __init xen_remap_memory(void);
37 39 char * __init xen_memory_setup(void);
38 40 char * xen_auto_xlated_memory_setup(void);
39 41 void __init xen_arch_setup(void);
... ... @@ -46,7 +48,7 @@
46 48 void xen_unplug_emulated_devices(void);
47 49  
48 50 void __init xen_build_dynamic_phys_to_machine(void);
49   -unsigned long __init xen_revector_p2m_tree(void);
  51 +void __init xen_vmalloc_p2m_tree(void);
50 52  
51 53 void xen_init_irq_ops(void);
52 54 void xen_setup_timer(int cpu);