Commit eb64c3c6cdb8fa8a4d324eb71a9033b62e150918
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
Merge tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull additional xen update from David Vrabel: "Xen: additional features for 3.19-rc0 - Linear p2m for x86 PV guests which simplifies the p2m code, improves performance and will allow for > 512 GB PV guests in the future. A last-minute, configuration specific issue was discovered with this change which is why it was not included in my previous pull request. This is now been fixed and tested" * tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: xen: switch to post-init routines in xen mmu.c earlier Revert "swiotlb-xen: pass dev_addr to swiotlb_tbl_unmap_single" xen: annotate xen_set_identity_and_remap_chunk() with __init xen: introduce helper functions to do safe read and write accesses xen: Speed up set_phys_to_machine() by using read-only mappings xen: switch to linear virtual mapped sparse p2m list xen: Hide get_phys_to_machine() to be able to tune common path x86: Introduce function to get pmd entry pointer xen: Delay invalidating extra memory xen: Delay m2p_override initialization xen: Delay remapping memory of pv-domain xen: use common page allocation function in p2m.c xen: Make functions static xen: fix some style issues in p2m.c
Showing 7 changed files Side-by-side Diff
arch/x86/include/asm/pgtable_types.h
... | ... | @@ -452,6 +452,7 @@ |
452 | 452 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); |
453 | 453 | extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, |
454 | 454 | unsigned int *level); |
455 | +extern pmd_t *lookup_pmd_address(unsigned long address); | |
455 | 456 | extern phys_addr_t slow_virt_to_phys(void *__address); |
456 | 457 | extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, |
457 | 458 | unsigned numpages, unsigned long page_flags); |
arch/x86/include/asm/xen/page.h
... | ... | @@ -41,10 +41,12 @@ |
41 | 41 | |
42 | 42 | extern unsigned long *machine_to_phys_mapping; |
43 | 43 | extern unsigned long machine_to_phys_nr; |
44 | +extern unsigned long *xen_p2m_addr; | |
45 | +extern unsigned long xen_p2m_size; | |
46 | +extern unsigned long xen_max_p2m_pfn; | |
44 | 47 | |
45 | 48 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
46 | 49 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
47 | -extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn); | |
48 | 50 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
49 | 51 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, |
50 | 52 | unsigned long pfn_e); |
51 | 53 | |
52 | 54 | |
... | ... | @@ -52,17 +54,52 @@ |
52 | 54 | extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, |
53 | 55 | struct gnttab_map_grant_ref *kmap_ops, |
54 | 56 | struct page **pages, unsigned int count); |
55 | -extern int m2p_add_override(unsigned long mfn, struct page *page, | |
56 | - struct gnttab_map_grant_ref *kmap_op); | |
57 | 57 | extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, |
58 | 58 | struct gnttab_map_grant_ref *kmap_ops, |
59 | 59 | struct page **pages, unsigned int count); |
60 | -extern int m2p_remove_override(struct page *page, | |
61 | - struct gnttab_map_grant_ref *kmap_op, | |
62 | - unsigned long mfn); | |
63 | -extern struct page *m2p_find_override(unsigned long mfn); | |
64 | 60 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); |
65 | 61 | |
62 | +/* | |
63 | + * Helper functions to write or read unsigned long values to/from | |
64 | + * memory, when the access may fault. | |
65 | + */ | |
66 | +static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) | |
67 | +{ | |
68 | + return __put_user(val, (unsigned long __user *)addr); | |
69 | +} | |
70 | + | |
71 | +static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val) | |
72 | +{ | |
73 | + return __get_user(*val, (unsigned long __user *)addr); | |
74 | +} | |
75 | + | |
76 | +/* | |
77 | + * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine(): | |
78 | + * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator | |
79 | + * bits (identity or foreign) are set. | |
80 | + * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set | |
81 | + * identity or foreign indicator will be still set. __pfn_to_mfn() is | |
82 | + * encapsulating get_phys_to_machine() which is called in special cases only. | |
83 | + * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special | |
84 | + * cases needing an extended handling. | |
85 | + */ | |
86 | +static inline unsigned long __pfn_to_mfn(unsigned long pfn) | |
87 | +{ | |
88 | + unsigned long mfn; | |
89 | + | |
90 | + if (pfn < xen_p2m_size) | |
91 | + mfn = xen_p2m_addr[pfn]; | |
92 | + else if (unlikely(pfn < xen_max_p2m_pfn)) | |
93 | + return get_phys_to_machine(pfn); | |
94 | + else | |
95 | + return IDENTITY_FRAME(pfn); | |
96 | + | |
97 | + if (unlikely(mfn == INVALID_P2M_ENTRY)) | |
98 | + return get_phys_to_machine(pfn); | |
99 | + | |
100 | + return mfn; | |
101 | +} | |
102 | + | |
66 | 103 | static inline unsigned long pfn_to_mfn(unsigned long pfn) |
67 | 104 | { |
68 | 105 | unsigned long mfn; |
... | ... | @@ -70,7 +107,7 @@ |
70 | 107 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
71 | 108 | return pfn; |
72 | 109 | |
73 | - mfn = get_phys_to_machine(pfn); | |
110 | + mfn = __pfn_to_mfn(pfn); | |
74 | 111 | |
75 | 112 | if (mfn != INVALID_P2M_ENTRY) |
76 | 113 | mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); |
... | ... | @@ -83,7 +120,7 @@ |
83 | 120 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
84 | 121 | return 1; |
85 | 122 | |
86 | - return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; | |
123 | + return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY; | |
87 | 124 | } |
88 | 125 | |
89 | 126 | static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) |
... | ... | @@ -102,7 +139,7 @@ |
102 | 139 | * In such cases it doesn't matter what we return (we return garbage), |
103 | 140 | * but we must handle the fault without crashing! |
104 | 141 | */ |
105 | - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); | |
142 | + ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn); | |
106 | 143 | if (ret < 0) |
107 | 144 | return ~0; |
108 | 145 | |
... | ... | @@ -117,7 +154,7 @@ |
117 | 154 | return mfn; |
118 | 155 | |
119 | 156 | pfn = mfn_to_pfn_no_overrides(mfn); |
120 | - if (get_phys_to_machine(pfn) != mfn) { | |
157 | + if (__pfn_to_mfn(pfn) != mfn) { | |
121 | 158 | /* |
122 | 159 | * If this appears to be a foreign mfn (because the pfn |
123 | 160 | * doesn't map back to the mfn), then check the local override |
... | ... | @@ -133,8 +170,7 @@ |
133 | 170 | * entry doesn't map back to the mfn and m2p_override doesn't have a |
134 | 171 | * valid entry for it. |
135 | 172 | */ |
136 | - if (pfn == ~0 && | |
137 | - get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn)) | |
173 | + if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn)) | |
138 | 174 | pfn = mfn; |
139 | 175 | |
140 | 176 | return pfn; |
... | ... | @@ -180,7 +216,7 @@ |
180 | 216 | return mfn; |
181 | 217 | |
182 | 218 | pfn = mfn_to_pfn(mfn); |
183 | - if (get_phys_to_machine(pfn) != mfn) | |
219 | + if (__pfn_to_mfn(pfn) != mfn) | |
184 | 220 | return -1; /* force !pfn_valid() */ |
185 | 221 | return pfn; |
186 | 222 | } |
arch/x86/mm/pageattr.c
... | ... | @@ -384,6 +384,26 @@ |
384 | 384 | } |
385 | 385 | |
386 | 386 | /* |
387 | + * Lookup the PMD entry for a virtual address. Return a pointer to the entry | |
388 | + * or NULL if not present. | |
389 | + */ | |
390 | +pmd_t *lookup_pmd_address(unsigned long address) | |
391 | +{ | |
392 | + pgd_t *pgd; | |
393 | + pud_t *pud; | |
394 | + | |
395 | + pgd = pgd_offset_k(address); | |
396 | + if (pgd_none(*pgd)) | |
397 | + return NULL; | |
398 | + | |
399 | + pud = pud_offset(pgd, address); | |
400 | + if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud)) | |
401 | + return NULL; | |
402 | + | |
403 | + return pmd_offset(pud, address); | |
404 | +} | |
405 | + | |
406 | +/* | |
387 | 407 | * This is necessary because __pa() does not work on some |
388 | 408 | * kinds of memory, like vmalloc() or the alloc_remap() |
389 | 409 | * areas on 32-bit NUMA systems. The percpu areas can |
arch/x86/xen/mmu.c
... | ... | @@ -387,7 +387,7 @@ |
387 | 387 | unsigned long mfn; |
388 | 388 | |
389 | 389 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
390 | - mfn = get_phys_to_machine(pfn); | |
390 | + mfn = __pfn_to_mfn(pfn); | |
391 | 391 | else |
392 | 392 | mfn = pfn; |
393 | 393 | /* |
394 | 394 | |
395 | 395 | |
396 | 396 | |
397 | 397 | |
... | ... | @@ -1113,20 +1113,16 @@ |
1113 | 1113 | * instead of somewhere later and be confusing. */ |
1114 | 1114 | xen_mc_flush(); |
1115 | 1115 | } |
1116 | -static void __init xen_pagetable_p2m_copy(void) | |
1116 | + | |
1117 | +static void __init xen_pagetable_p2m_free(void) | |
1117 | 1118 | { |
1118 | 1119 | unsigned long size; |
1119 | 1120 | unsigned long addr; |
1120 | - unsigned long new_mfn_list; | |
1121 | 1121 | |
1122 | - if (xen_feature(XENFEAT_auto_translated_physmap)) | |
1123 | - return; | |
1124 | - | |
1125 | 1122 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1126 | 1123 | |
1127 | - new_mfn_list = xen_revector_p2m_tree(); | |
1128 | 1124 | /* No memory or already called. */ |
1129 | - if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) | |
1125 | + if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list) | |
1130 | 1126 | return; |
1131 | 1127 | |
1132 | 1128 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ |
... | ... | @@ -1144,8 +1140,6 @@ |
1144 | 1140 | |
1145 | 1141 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1146 | 1142 | memblock_free(__pa(xen_start_info->mfn_list), size); |
1147 | - /* And revector! Bye bye old array */ | |
1148 | - xen_start_info->mfn_list = new_mfn_list; | |
1149 | 1143 | |
1150 | 1144 | /* At this stage, cleanup_highmap has already cleaned __ka space |
1151 | 1145 | * from _brk_limit way up to the max_pfn_mapped (which is the end of |
1152 | 1146 | |
1153 | 1147 | |
1154 | 1148 | |
1155 | 1149 | |
1156 | 1150 | |
... | ... | @@ -1169,17 +1163,35 @@ |
1169 | 1163 | } |
1170 | 1164 | #endif |
1171 | 1165 | |
1172 | -static void __init xen_pagetable_init(void) | |
1166 | +static void __init xen_pagetable_p2m_setup(void) | |
1173 | 1167 | { |
1174 | - paging_init(); | |
1168 | + if (xen_feature(XENFEAT_auto_translated_physmap)) | |
1169 | + return; | |
1170 | + | |
1171 | + xen_vmalloc_p2m_tree(); | |
1172 | + | |
1175 | 1173 | #ifdef CONFIG_X86_64 |
1176 | - xen_pagetable_p2m_copy(); | |
1174 | + xen_pagetable_p2m_free(); | |
1177 | 1175 | #endif |
1176 | + /* And revector! Bye bye old array */ | |
1177 | + xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; | |
1178 | +} | |
1179 | + | |
1180 | +static void __init xen_pagetable_init(void) | |
1181 | +{ | |
1182 | + paging_init(); | |
1183 | + xen_post_allocator_init(); | |
1184 | + | |
1185 | + xen_pagetable_p2m_setup(); | |
1186 | + | |
1178 | 1187 | /* Allocate and initialize top and mid mfn levels for p2m structure */ |
1179 | 1188 | xen_build_mfn_list_list(); |
1180 | 1189 | |
1190 | + /* Remap memory freed due to conflicts with E820 map */ | |
1191 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) | |
1192 | + xen_remap_memory(); | |
1193 | + | |
1181 | 1194 | xen_setup_shared_info(); |
1182 | - xen_post_allocator_init(); | |
1183 | 1195 | } |
1184 | 1196 | static void xen_write_cr2(unsigned long cr2) |
1185 | 1197 | { |
arch/x86/xen/p2m.c
Changes suppressed. Click to show
... | ... | @@ -3,21 +3,22 @@ |
3 | 3 | * guests themselves, but it must also access and update the p2m array |
4 | 4 | * during suspend/resume when all the pages are reallocated. |
5 | 5 | * |
6 | - * The p2m table is logically a flat array, but we implement it as a | |
7 | - * three-level tree to allow the address space to be sparse. | |
6 | + * The logical flat p2m table is mapped to a linear kernel memory area. | |
7 | + * For accesses by Xen a three-level tree linked via mfns only is set up to | |
8 | + * allow the address space to be sparse. | |
8 | 9 | * |
9 | - * Xen | |
10 | - * | | |
11 | - * p2m_top p2m_top_mfn | |
12 | - * / \ / \ | |
13 | - * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | |
14 | - * / \ / \ / / | |
15 | - * p2m p2m p2m p2m p2m p2m p2m ... | |
10 | + * Xen | |
11 | + * | | |
12 | + * p2m_top_mfn | |
13 | + * / \ | |
14 | + * p2m_mid_mfn p2m_mid_mfn | |
15 | + * / / | |
16 | + * p2m p2m p2m ... | |
16 | 17 | * |
17 | 18 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. |
18 | 19 | * |
19 | - * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | |
20 | - * maximum representable pseudo-physical address space is: | |
20 | + * The p2m_top_mfn level is limited to 1 page, so the maximum representable | |
21 | + * pseudo-physical address space is: | |
21 | 22 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages |
22 | 23 | * |
23 | 24 | * P2M_PER_PAGE depends on the architecture, as a mfn is always |
... | ... | @@ -30,6 +31,9 @@ |
30 | 31 | * leaf entries, or for the top root, or middle one, for which there is a void |
31 | 32 | * entry, we assume it is "missing". So (for example) |
32 | 33 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. |
34 | + * We have a dedicated page p2m_missing with all entries being | |
35 | + * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m | |
36 | + * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. | |
33 | 37 | * |
34 | 38 | * We also have the possibility of setting 1-1 mappings on certain regions, so |
35 | 39 | * that: |
36 | 40 | |
37 | 41 | |
... | ... | @@ -39,122 +43,20 @@ |
39 | 43 | * PCI BARs, or ACPI spaces), we can create mappings easily because we |
40 | 44 | * get the PFN value to match the MFN. |
41 | 45 | * |
42 | - * For this to work efficiently we have one new page p2m_identity and | |
43 | - * allocate (via reserved_brk) any other pages we need to cover the sides | |
44 | - * (1GB or 4MB boundary violations). All entries in p2m_identity are set to | |
45 | - * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, | |
46 | - * no other fancy value). | |
46 | + * For this to work efficiently we have one new page p2m_identity. All entries | |
47 | + * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only | |
48 | + * recognizes that and MFNs, no other fancy value). | |
47 | 49 | * |
48 | 50 | * On lookup we spot that the entry points to p2m_identity and return the |
49 | 51 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. |
50 | 52 | * If the entry points to an allocated page, we just proceed as before and |
51 | - * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in | |
53 | + * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in | |
52 | 54 | * appropriate functions (pfn_to_mfn). |
53 | 55 | * |
54 | 56 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the |
55 | 57 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a |
56 | 58 | * non-identity pfn. To protect ourselves against we elect to set (and get) the |
57 | 59 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. |
58 | - * | |
59 | - * This simplistic diagram is used to explain the more subtle piece of code. | |
60 | - * There is also a digram of the P2M at the end that can help. | |
61 | - * Imagine your E820 looking as so: | |
62 | - * | |
63 | - * 1GB 2GB 4GB | |
64 | - * /-------------------+---------\/----\ /----------\ /---+-----\ | |
65 | - * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | | |
66 | - * \-------------------+---------/\----/ \----------/ \---+-----/ | |
67 | - * ^- 1029MB ^- 2001MB | |
68 | - * | |
69 | - * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), | |
70 | - * 2048MB = 524288 (0x80000)] | |
71 | - * | |
72 | - * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB | |
73 | - * is actually not present (would have to kick the balloon driver to put it in). | |
74 | - * | |
75 | - * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: | |
76 | - * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start | |
77 | - * of the PFN and the end PFN (263424 and 512256 respectively). The first step | |
78 | - * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page | |
79 | - * covers 512^2 of page estate (1GB) and in case the start or end PFN is not | |
80 | - * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as | |
81 | - * required to split any existing p2m_mid_missing middle pages. | |
82 | - * | |
83 | - * With the E820 example above, 263424 is not 1GB aligned so we allocate a | |
84 | - * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. | |
85 | - * Each entry in the allocate page is "missing" (points to p2m_missing). | |
86 | - * | |
87 | - * Next stage is to determine if we need to do a more granular boundary check | |
88 | - * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. | |
89 | - * We check if the start pfn and end pfn violate that boundary check, and if | |
90 | - * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer | |
91 | - * granularity of setting which PFNs are missing and which ones are identity. | |
92 | - * In our example 263424 and 512256 both fail the check so we reserve_brk two | |
93 | - * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" | |
94 | - * values) and assign them to p2m[1][2] and p2m[1][488] respectively. | |
95 | - * | |
96 | - * At this point we would at minimum reserve_brk one page, but could be up to | |
97 | - * three. Each call to set_phys_range_identity has at maximum a three page | |
98 | - * cost. If we were to query the P2M at this stage, all those entries from | |
99 | - * start PFN through end PFN (so 1029MB -> 2001MB) would return | |
100 | - * INVALID_P2M_ENTRY ("missing"). | |
101 | - * | |
102 | - * The next step is to walk from the start pfn to the end pfn setting | |
103 | - * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. | |
104 | - * If we find that the middle entry is pointing to p2m_missing we can swap it | |
105 | - * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and | |
106 | - * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions). | |
107 | - * At this point we do not need to worry about boundary aligment (so no need to | |
108 | - * reserve_brk a middle page, figure out which PFNs are "missing" and which | |
109 | - * ones are identity), as that has been done earlier. If we find that the | |
110 | - * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference | |
111 | - * that page (which covers 512 PFNs) and set the appropriate PFN with | |
112 | - * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we | |
113 | - * set from p2m[1][2][256->511] and p2m[1][488][0->256] with | |
114 | - * IDENTITY_FRAME_BIT set. | |
115 | - * | |
116 | - * All other regions that are void (or not filled) either point to p2m_missing | |
117 | - * (considered missing) or have the default value of INVALID_P2M_ENTRY (also | |
118 | - * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] | |
119 | - * contain the INVALID_P2M_ENTRY value and are considered "missing." | |
120 | - * | |
121 | - * Finally, the region beyond the end of of the E820 (4 GB in this example) | |
122 | - * is set to be identity (in case there are MMIO regions placed here). | |
123 | - * | |
124 | - * This is what the p2m ends up looking (for the E820 above) with this | |
125 | - * fabulous drawing: | |
126 | - * | |
127 | - * p2m /--------------\ | |
128 | - * /-----\ | &mfn_list[0],| /-----------------\ | |
129 | - * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | | |
130 | - * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | | |
131 | - * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | | |
132 | - * |-----| \ | [p2m_identity]+\\ | .... | | |
133 | - * | 2 |--\ \-------------------->| ... | \\ \----------------/ | |
134 | - * |-----| \ \---------------/ \\ | |
135 | - * | 3 |-\ \ \\ p2m_identity [1] | |
136 | - * |-----| \ \-------------------->/---------------\ /-----------------\ | |
137 | - * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... | | |
138 | - * \-----/ | | | [p2m_identity]+-->| ..., ~0 | | |
139 | - * | | | .... | \-----------------/ | |
140 | - * | | +-[x], ~0, ~0.. +\ | |
141 | - * | | \---------------/ \ | |
142 | - * | | \-> /---------------\ | |
143 | - * | V p2m_mid_missing p2m_missing | IDENTITY[@0] | | |
144 | - * | /-----------------\ /------------\ | IDENTITY[@256]| | |
145 | - * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... | | |
146 | - * | | [p2m_missing] +---->| ..., ~0 | \---------------/ | |
147 | - * | | ... | \------------/ | |
148 | - * | \-----------------/ | |
149 | - * | | |
150 | - * | p2m_mid_identity | |
151 | - * | /-----------------\ | |
152 | - * \-->| [p2m_identity] +---->[1] | |
153 | - * | [p2m_identity] +---->[1] | |
154 | - * | ... | | |
155 | - * \-----------------/ | |
156 | - * | |
157 | - * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) | |
158 | 60 | */ |
159 | 61 | |
160 | 62 | #include <linux/init.h> |
161 | 63 | |
... | ... | @@ -164,9 +66,11 @@ |
164 | 66 | #include <linux/sched.h> |
165 | 67 | #include <linux/seq_file.h> |
166 | 68 | #include <linux/bootmem.h> |
69 | +#include <linux/slab.h> | |
167 | 70 | |
168 | 71 | #include <asm/cache.h> |
169 | 72 | #include <asm/setup.h> |
73 | +#include <asm/uaccess.h> | |
170 | 74 | |
171 | 75 | #include <asm/xen/page.h> |
172 | 76 | #include <asm/xen/hypercall.h> |
173 | 77 | |
174 | 78 | |
175 | 79 | |
176 | 80 | |
177 | 81 | |
... | ... | @@ -178,32 +82,27 @@ |
178 | 82 | #include "multicalls.h" |
179 | 83 | #include "xen-ops.h" |
180 | 84 | |
85 | +#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) | |
86 | + | |
181 | 87 | static void __init m2p_override_init(void); |
182 | 88 | |
89 | +unsigned long *xen_p2m_addr __read_mostly; | |
90 | +EXPORT_SYMBOL_GPL(xen_p2m_addr); | |
91 | +unsigned long xen_p2m_size __read_mostly; | |
92 | +EXPORT_SYMBOL_GPL(xen_p2m_size); | |
183 | 93 | unsigned long xen_max_p2m_pfn __read_mostly; |
94 | +EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); | |
184 | 95 | |
96 | +static DEFINE_SPINLOCK(p2m_update_lock); | |
97 | + | |
185 | 98 | static unsigned long *p2m_mid_missing_mfn; |
186 | 99 | static unsigned long *p2m_top_mfn; |
187 | 100 | static unsigned long **p2m_top_mfn_p; |
101 | +static unsigned long *p2m_missing; | |
102 | +static unsigned long *p2m_identity; | |
103 | +static pte_t *p2m_missing_pte; | |
104 | +static pte_t *p2m_identity_pte; | |
188 | 105 | |
189 | -/* Placeholders for holes in the address space */ | |
190 | -static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | |
191 | -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | |
192 | - | |
193 | -static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | |
194 | - | |
195 | -static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); | |
196 | -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); | |
197 | - | |
198 | -RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | |
199 | - | |
200 | -/* For each I/O range remapped we may lose up to two leaf pages for the boundary | |
201 | - * violations and three mid pages to cover up to 3GB. With | |
202 | - * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the | |
203 | - * remapped region. | |
204 | - */ | |
205 | -RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES); | |
206 | - | |
207 | 106 | static inline unsigned p2m_top_index(unsigned long pfn) |
208 | 107 | { |
209 | 108 | BUG_ON(pfn >= MAX_P2M_PFN); |
... | ... | @@ -220,14 +119,6 @@ |
220 | 119 | return pfn % P2M_PER_PAGE; |
221 | 120 | } |
222 | 121 | |
223 | -static void p2m_top_init(unsigned long ***top) | |
224 | -{ | |
225 | - unsigned i; | |
226 | - | |
227 | - for (i = 0; i < P2M_TOP_PER_PAGE; i++) | |
228 | - top[i] = p2m_mid_missing; | |
229 | -} | |
230 | - | |
231 | 122 | static void p2m_top_mfn_init(unsigned long *top) |
232 | 123 | { |
233 | 124 | unsigned i; |
234 | 125 | |
235 | 126 | |
236 | 127 | |
237 | 128 | |
238 | 129 | |
239 | 130 | |
... | ... | @@ -244,30 +135,45 @@ |
244 | 135 | top[i] = p2m_mid_missing_mfn; |
245 | 136 | } |
246 | 137 | |
247 | -static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) | |
138 | +static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) | |
248 | 139 | { |
249 | 140 | unsigned i; |
250 | 141 | |
251 | 142 | for (i = 0; i < P2M_MID_PER_PAGE; i++) |
252 | - mid[i] = leaf; | |
143 | + mid[i] = virt_to_mfn(leaf); | |
253 | 144 | } |
254 | 145 | |
255 | -static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) | |
146 | +static void p2m_init(unsigned long *p2m) | |
256 | 147 | { |
257 | 148 | unsigned i; |
258 | 149 | |
259 | - for (i = 0; i < P2M_MID_PER_PAGE; i++) | |
260 | - mid[i] = virt_to_mfn(leaf); | |
150 | + for (i = 0; i < P2M_PER_PAGE; i++) | |
151 | + p2m[i] = INVALID_P2M_ENTRY; | |
261 | 152 | } |
262 | 153 | |
263 | -static void p2m_init(unsigned long *p2m) | |
154 | +static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) | |
264 | 155 | { |
265 | 156 | unsigned i; |
266 | 157 | |
267 | - for (i = 0; i < P2M_MID_PER_PAGE; i++) | |
268 | - p2m[i] = INVALID_P2M_ENTRY; | |
158 | + for (i = 0; i < P2M_PER_PAGE; i++) | |
159 | + p2m[i] = IDENTITY_FRAME(pfn + i); | |
269 | 160 | } |
270 | 161 | |
162 | +static void * __ref alloc_p2m_page(void) | |
163 | +{ | |
164 | + if (unlikely(!slab_is_available())) | |
165 | + return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | |
166 | + | |
167 | + return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | |
168 | +} | |
169 | + | |
170 | +/* Only to be called in case of a race for a page just allocated! */ | |
171 | +static void free_p2m_page(void *p) | |
172 | +{ | |
173 | + BUG_ON(!slab_is_available()); | |
174 | + free_page((unsigned long)p); | |
175 | +} | |
176 | + | |
271 | 177 | /* |
272 | 178 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures |
273 | 179 | * |
274 | 180 | |
275 | 181 | |
276 | 182 | |
277 | 183 | |
278 | 184 | |
279 | 185 | |
280 | 186 | |
... | ... | @@ -280,40 +186,46 @@ |
280 | 186 | */ |
281 | 187 | void __ref xen_build_mfn_list_list(void) |
282 | 188 | { |
283 | - unsigned long pfn; | |
189 | + unsigned long pfn, mfn; | |
190 | + pte_t *ptep; | |
191 | + unsigned int level, topidx, mididx; | |
192 | + unsigned long *mid_mfn_p; | |
284 | 193 | |
285 | 194 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
286 | 195 | return; |
287 | 196 | |
288 | 197 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
289 | 198 | if (p2m_top_mfn == NULL) { |
290 | - p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | |
199 | + p2m_mid_missing_mfn = alloc_p2m_page(); | |
291 | 200 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
292 | 201 | |
293 | - p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | |
202 | + p2m_top_mfn_p = alloc_p2m_page(); | |
294 | 203 | p2m_top_mfn_p_init(p2m_top_mfn_p); |
295 | 204 | |
296 | - p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | |
205 | + p2m_top_mfn = alloc_p2m_page(); | |
297 | 206 | p2m_top_mfn_init(p2m_top_mfn); |
298 | 207 | } else { |
299 | 208 | /* Reinitialise, mfn's all change after migration */ |
300 | 209 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
301 | 210 | } |
302 | 211 | |
303 | - for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | |
304 | - unsigned topidx = p2m_top_index(pfn); | |
305 | - unsigned mididx = p2m_mid_index(pfn); | |
306 | - unsigned long **mid; | |
307 | - unsigned long *mid_mfn_p; | |
212 | + for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; | |
213 | + pfn += P2M_PER_PAGE) { | |
214 | + topidx = p2m_top_index(pfn); | |
215 | + mididx = p2m_mid_index(pfn); | |
308 | 216 | |
309 | - mid = p2m_top[topidx]; | |
310 | 217 | mid_mfn_p = p2m_top_mfn_p[topidx]; |
218 | + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), | |
219 | + &level); | |
220 | + BUG_ON(!ptep || level != PG_LEVEL_4K); | |
221 | + mfn = pte_mfn(*ptep); | |
222 | + ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
311 | 223 | |
312 | 224 | /* Don't bother allocating any mfn mid levels if |
313 | 225 | * they're just missing, just update the stored mfn, |
314 | 226 | * since all could have changed over a migrate. |
315 | 227 | */ |
316 | - if (mid == p2m_mid_missing) { | |
228 | + if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { | |
317 | 229 | BUG_ON(mididx); |
318 | 230 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); |
319 | 231 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); |
320 | 232 | |
... | ... | @@ -322,19 +234,14 @@ |
322 | 234 | } |
323 | 235 | |
324 | 236 | if (mid_mfn_p == p2m_mid_missing_mfn) { |
325 | - /* | |
326 | - * XXX boot-time only! We should never find | |
327 | - * missing parts of the mfn tree after | |
328 | - * runtime. | |
329 | - */ | |
330 | - mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | |
237 | + mid_mfn_p = alloc_p2m_page(); | |
331 | 238 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
332 | 239 | |
333 | 240 | p2m_top_mfn_p[topidx] = mid_mfn_p; |
334 | 241 | } |
335 | 242 | |
336 | 243 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); |
337 | - mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | |
244 | + mid_mfn_p[mididx] = mfn; | |
338 | 245 | } |
339 | 246 | } |
340 | 247 | |
341 | 248 | |
342 | 249 | |
343 | 250 | |
344 | 251 | |
345 | 252 | |
346 | 253 | |
347 | 254 | |
348 | 255 | |
349 | 256 | |
350 | 257 | |
351 | 258 | |
352 | 259 | |
353 | 260 | |
354 | 261 | |
355 | 262 | |
356 | 263 | |
357 | 264 | |
358 | 265 | |
359 | 266 | |
360 | 267 | |
361 | 268 | |
362 | 269 | |
363 | 270 | |
364 | 271 | |
365 | 272 | |
366 | 273 | |
367 | 274 | |
368 | 275 | |
369 | 276 | |
370 | 277 | |
371 | 278 | |
372 | 279 | |
373 | 280 | |
374 | 281 | |
375 | 282 | |
376 | 283 | |
377 | 284 | |
378 | 285 | |
379 | 286 | |
... | ... | @@ -353,171 +260,235 @@ |
353 | 260 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ |
354 | 261 | void __init xen_build_dynamic_phys_to_machine(void) |
355 | 262 | { |
356 | - unsigned long *mfn_list; | |
357 | - unsigned long max_pfn; | |
358 | 263 | unsigned long pfn; |
359 | 264 | |
360 | 265 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
361 | 266 | return; |
362 | 267 | |
363 | - mfn_list = (unsigned long *)xen_start_info->mfn_list; | |
364 | - max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | |
365 | - xen_max_p2m_pfn = max_pfn; | |
268 | + xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; | |
269 | + xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE); | |
366 | 270 | |
367 | - p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | |
368 | - p2m_init(p2m_missing); | |
369 | - p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); | |
370 | - p2m_init(p2m_identity); | |
271 | + for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++) | |
272 | + xen_p2m_addr[pfn] = INVALID_P2M_ENTRY; | |
371 | 273 | |
372 | - p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | |
373 | - p2m_mid_init(p2m_mid_missing, p2m_missing); | |
374 | - p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); | |
375 | - p2m_mid_init(p2m_mid_identity, p2m_identity); | |
274 | + xen_max_p2m_pfn = xen_p2m_size; | |
275 | +} | |
376 | 276 | |
377 | - p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | |
378 | - p2m_top_init(p2m_top); | |
277 | +#define P2M_TYPE_IDENTITY 0 | |
278 | +#define P2M_TYPE_MISSING 1 | |
279 | +#define P2M_TYPE_PFN 2 | |
280 | +#define P2M_TYPE_UNKNOWN 3 | |
379 | 281 | |
380 | - /* | |
381 | - * The domain builder gives us a pre-constructed p2m array in | |
382 | - * mfn_list for all the pages initially given to us, so we just | |
383 | - * need to graft that into our tree structure. | |
384 | - */ | |
385 | - for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | |
386 | - unsigned topidx = p2m_top_index(pfn); | |
387 | - unsigned mididx = p2m_mid_index(pfn); | |
282 | +static int xen_p2m_elem_type(unsigned long pfn) | |
283 | +{ | |
284 | + unsigned long mfn; | |
388 | 285 | |
389 | - if (p2m_top[topidx] == p2m_mid_missing) { | |
390 | - unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | |
391 | - p2m_mid_init(mid, p2m_missing); | |
286 | + if (pfn >= xen_p2m_size) | |
287 | + return P2M_TYPE_IDENTITY; | |
392 | 288 | |
393 | - p2m_top[topidx] = mid; | |
394 | - } | |
289 | + mfn = xen_p2m_addr[pfn]; | |
395 | 290 | |
396 | - /* | |
397 | - * As long as the mfn_list has enough entries to completely | |
398 | - * fill a p2m page, pointing into the array is ok. But if | |
399 | - * not the entries beyond the last pfn will be undefined. | |
400 | - */ | |
401 | - if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { | |
402 | - unsigned long p2midx; | |
291 | + if (mfn == INVALID_P2M_ENTRY) | |
292 | + return P2M_TYPE_MISSING; | |
403 | 293 | |
404 | - p2midx = max_pfn % P2M_PER_PAGE; | |
405 | - for ( ; p2midx < P2M_PER_PAGE; p2midx++) | |
406 | - mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; | |
407 | - } | |
408 | - p2m_top[topidx][mididx] = &mfn_list[pfn]; | |
409 | - } | |
294 | + if (mfn & IDENTITY_FRAME_BIT) | |
295 | + return P2M_TYPE_IDENTITY; | |
410 | 296 | |
411 | - m2p_override_init(); | |
297 | + return P2M_TYPE_PFN; | |
412 | 298 | } |
413 | -#ifdef CONFIG_X86_64 | |
414 | -unsigned long __init xen_revector_p2m_tree(void) | |
299 | + | |
300 | +static void __init xen_rebuild_p2m_list(unsigned long *p2m) | |
415 | 301 | { |
416 | - unsigned long va_start; | |
417 | - unsigned long va_end; | |
302 | + unsigned int i, chunk; | |
418 | 303 | unsigned long pfn; |
419 | - unsigned long pfn_free = 0; | |
420 | - unsigned long *mfn_list = NULL; | |
421 | - unsigned long size; | |
304 | + unsigned long *mfns; | |
305 | + pte_t *ptep; | |
306 | + pmd_t *pmdp; | |
307 | + int type; | |
422 | 308 | |
423 | - va_start = xen_start_info->mfn_list; | |
424 | - /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), | |
425 | - * so make sure it is rounded up to that */ | |
426 | - size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | |
427 | - va_end = va_start + size; | |
309 | + p2m_missing = alloc_p2m_page(); | |
310 | + p2m_init(p2m_missing); | |
311 | + p2m_identity = alloc_p2m_page(); | |
312 | + p2m_init(p2m_identity); | |
428 | 313 | |
429 | - /* If we were revectored already, don't do it again. */ | |
430 | - if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) | |
431 | - return 0; | |
432 | - | |
433 | - mfn_list = alloc_bootmem_align(size, PAGE_SIZE); | |
434 | - if (!mfn_list) { | |
435 | - pr_warn("Could not allocate space for a new P2M tree!\n"); | |
436 | - return xen_start_info->mfn_list; | |
314 | + p2m_missing_pte = alloc_p2m_page(); | |
315 | + paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); | |
316 | + p2m_identity_pte = alloc_p2m_page(); | |
317 | + paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); | |
318 | + for (i = 0; i < PTRS_PER_PTE; i++) { | |
319 | + set_pte(p2m_missing_pte + i, | |
320 | + pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); | |
321 | + set_pte(p2m_identity_pte + i, | |
322 | + pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); | |
437 | 323 | } |
438 | - /* Fill it out with INVALID_P2M_ENTRY value */ | |
439 | - memset(mfn_list, 0xFF, size); | |
440 | 324 | |
441 | - for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { | |
442 | - unsigned topidx = p2m_top_index(pfn); | |
443 | - unsigned mididx; | |
444 | - unsigned long *mid_p; | |
325 | + for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { | |
326 | + /* | |
327 | + * Try to map missing/identity PMDs or p2m-pages if possible. | |
328 | + * We have to respect the structure of the mfn_list_list | |
329 | + * which will be built just afterwards. | |
330 | + * Chunk size to test is one p2m page if we are in the middle | |
331 | + * of a mfn_list_list mid page and the complete mid page area | |
332 | + * if we are at index 0 of the mid page. Please note that a | |
333 | + * mid page might cover more than one PMD, e.g. on 32 bit PAE | |
334 | + * kernels. | |
335 | + */ | |
336 | + chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? | |
337 | + P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; | |
445 | 338 | |
446 | - if (!p2m_top[topidx]) | |
447 | - continue; | |
339 | + type = xen_p2m_elem_type(pfn); | |
340 | + i = 0; | |
341 | + if (type != P2M_TYPE_PFN) | |
342 | + for (i = 1; i < chunk; i++) | |
343 | + if (xen_p2m_elem_type(pfn + i) != type) | |
344 | + break; | |
345 | + if (i < chunk) | |
346 | + /* Reset to minimal chunk size. */ | |
347 | + chunk = P2M_PER_PAGE; | |
448 | 348 | |
449 | - if (p2m_top[topidx] == p2m_mid_missing) | |
349 | + if (type == P2M_TYPE_PFN || i < chunk) { | |
350 | + /* Use initial p2m page contents. */ | |
351 | +#ifdef CONFIG_X86_64 | |
352 | + mfns = alloc_p2m_page(); | |
353 | + copy_page(mfns, xen_p2m_addr + pfn); | |
354 | +#else | |
355 | + mfns = xen_p2m_addr + pfn; | |
356 | +#endif | |
357 | + ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
358 | + set_pte(ptep, | |
359 | + pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); | |
450 | 360 | continue; |
361 | + } | |
451 | 362 | |
452 | - mididx = p2m_mid_index(pfn); | |
453 | - mid_p = p2m_top[topidx][mididx]; | |
454 | - if (!mid_p) | |
363 | + if (chunk == P2M_PER_PAGE) { | |
364 | + /* Map complete missing or identity p2m-page. */ | |
365 | + mfns = (type == P2M_TYPE_MISSING) ? | |
366 | + p2m_missing : p2m_identity; | |
367 | + ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
368 | + set_pte(ptep, | |
369 | + pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); | |
455 | 370 | continue; |
456 | - if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) | |
457 | - continue; | |
371 | + } | |
458 | 372 | |
459 | - if ((unsigned long)mid_p == INVALID_P2M_ENTRY) | |
460 | - continue; | |
373 | + /* Complete missing or identity PMD(s) can be mapped. */ | |
374 | + ptep = (type == P2M_TYPE_MISSING) ? | |
375 | + p2m_missing_pte : p2m_identity_pte; | |
376 | + for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
377 | + pmdp = populate_extra_pmd( | |
378 | + (unsigned long)(p2m + pfn + i * PTRS_PER_PTE)); | |
379 | + set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); | |
380 | + } | |
381 | + } | |
382 | +} | |
461 | 383 | |
462 | - /* The old va. Rebase it on mfn_list */ | |
463 | - if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { | |
464 | - unsigned long *new; | |
384 | +void __init xen_vmalloc_p2m_tree(void) | |
385 | +{ | |
386 | + static struct vm_struct vm; | |
465 | 387 | |
466 | - if (pfn_free > (size / sizeof(unsigned long))) { | |
467 | - WARN(1, "Only allocated for %ld pages, but we want %ld!\n", | |
468 | - size / sizeof(unsigned long), pfn_free); | |
469 | - return 0; | |
470 | - } | |
471 | - new = &mfn_list[pfn_free]; | |
388 | + vm.flags = VM_ALLOC; | |
389 | + vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn, | |
390 | + PMD_SIZE * PMDS_PER_MID_PAGE); | |
391 | + vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); | |
392 | + pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); | |
472 | 393 | |
473 | - copy_page(new, mid_p); | |
474 | - p2m_top[topidx][mididx] = &mfn_list[pfn_free]; | |
394 | + xen_max_p2m_pfn = vm.size / sizeof(unsigned long); | |
475 | 395 | |
476 | - pfn_free += P2M_PER_PAGE; | |
396 | + xen_rebuild_p2m_list(vm.addr); | |
477 | 397 | |
478 | - } | |
479 | - /* This should be the leafs allocated for identity from _brk. */ | |
480 | - } | |
481 | - return (unsigned long)mfn_list; | |
398 | + xen_p2m_addr = vm.addr; | |
399 | + xen_p2m_size = xen_max_p2m_pfn; | |
482 | 400 | |
401 | + xen_inv_extra_mem(); | |
402 | + | |
403 | + m2p_override_init(); | |
483 | 404 | } |
484 | -#else | |
485 | -unsigned long __init xen_revector_p2m_tree(void) | |
486 | -{ | |
487 | - return 0; | |
488 | -} | |
489 | -#endif | |
405 | + | |
490 | 406 | unsigned long get_phys_to_machine(unsigned long pfn) |
491 | 407 | { |
492 | - unsigned topidx, mididx, idx; | |
408 | + pte_t *ptep; | |
409 | + unsigned int level; | |
493 | 410 | |
494 | - if (unlikely(pfn >= MAX_P2M_PFN)) | |
411 | + if (unlikely(pfn >= xen_p2m_size)) { | |
412 | + if (pfn < xen_max_p2m_pfn) | |
413 | + return xen_chk_extra_mem(pfn); | |
414 | + | |
495 | 415 | return IDENTITY_FRAME(pfn); |
416 | + } | |
496 | 417 | |
497 | - topidx = p2m_top_index(pfn); | |
498 | - mididx = p2m_mid_index(pfn); | |
499 | - idx = p2m_index(pfn); | |
418 | + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); | |
419 | + BUG_ON(!ptep || level != PG_LEVEL_4K); | |
500 | 420 | |
501 | 421 | /* |
502 | 422 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity |
503 | 423 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY |
504 | 424 | * would be wrong. |
505 | 425 | */ |
506 | - if (p2m_top[topidx][mididx] == p2m_identity) | |
426 | + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) | |
507 | 427 | return IDENTITY_FRAME(pfn); |
508 | 428 | |
509 | - return p2m_top[topidx][mididx][idx]; | |
429 | + return xen_p2m_addr[pfn]; | |
510 | 430 | } |
511 | 431 | EXPORT_SYMBOL_GPL(get_phys_to_machine); |
512 | 432 | |
513 | -static void *alloc_p2m_page(void) | |
433 | +/* | |
434 | + * Allocate new pmd(s). It is checked whether the old pmd is still in place. | |
435 | + * If not, nothing is changed. This is okay as the only reason for allocating | |
436 | + * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual | |
437 | + * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! | |
438 | + */ | |
439 | +static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) | |
514 | 440 | { |
515 | - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | |
516 | -} | |
441 | + pte_t *ptechk; | |
442 | + pte_t *pteret = ptep; | |
443 | + pte_t *pte_newpg[PMDS_PER_MID_PAGE]; | |
444 | + pmd_t *pmdp; | |
445 | + unsigned int level; | |
446 | + unsigned long flags; | |
447 | + unsigned long vaddr; | |
448 | + int i; | |
517 | 449 | |
518 | -static void free_p2m_page(void *p) | |
519 | -{ | |
520 | - free_page((unsigned long)p); | |
450 | + /* Do all allocations first to bail out in error case. */ | |
451 | + for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
452 | + pte_newpg[i] = alloc_p2m_page(); | |
453 | + if (!pte_newpg[i]) { | |
454 | + for (i--; i >= 0; i--) | |
455 | + free_p2m_page(pte_newpg[i]); | |
456 | + | |
457 | + return NULL; | |
458 | + } | |
459 | + } | |
460 | + | |
461 | + vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); | |
462 | + | |
463 | + for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
464 | + copy_page(pte_newpg[i], pte_pg); | |
465 | + paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); | |
466 | + | |
467 | + pmdp = lookup_pmd_address(vaddr); | |
468 | + BUG_ON(!pmdp); | |
469 | + | |
470 | + spin_lock_irqsave(&p2m_update_lock, flags); | |
471 | + | |
472 | + ptechk = lookup_address(vaddr, &level); | |
473 | + if (ptechk == pte_pg) { | |
474 | + set_pmd(pmdp, | |
475 | + __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); | |
476 | + if (vaddr == (addr & ~(PMD_SIZE - 1))) | |
477 | + pteret = pte_offset_kernel(pmdp, addr); | |
478 | + pte_newpg[i] = NULL; | |
479 | + } | |
480 | + | |
481 | + spin_unlock_irqrestore(&p2m_update_lock, flags); | |
482 | + | |
483 | + if (pte_newpg[i]) { | |
484 | + paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); | |
485 | + free_p2m_page(pte_newpg[i]); | |
486 | + } | |
487 | + | |
488 | + vaddr += PMD_SIZE; | |
489 | + } | |
490 | + | |
491 | + return pteret; | |
521 | 492 | } |
522 | 493 | |
523 | 494 | /* |
524 | 495 | |
525 | 496 | |
526 | 497 | |
527 | 498 | |
528 | 499 | |
529 | 500 | |
530 | 501 | |
531 | 502 | |
532 | 503 | |
533 | 504 | |
534 | 505 | |
535 | 506 | |
... | ... | @@ -530,58 +501,62 @@ |
530 | 501 | static bool alloc_p2m(unsigned long pfn) |
531 | 502 | { |
532 | 503 | unsigned topidx, mididx; |
533 | - unsigned long ***top_p, **mid; | |
534 | 504 | unsigned long *top_mfn_p, *mid_mfn; |
535 | - unsigned long *p2m_orig; | |
505 | + pte_t *ptep, *pte_pg; | |
506 | + unsigned int level; | |
507 | + unsigned long flags; | |
508 | + unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); | |
509 | + unsigned long p2m_pfn; | |
536 | 510 | |
537 | 511 | topidx = p2m_top_index(pfn); |
538 | 512 | mididx = p2m_mid_index(pfn); |
539 | 513 | |
540 | - top_p = &p2m_top[topidx]; | |
541 | - mid = ACCESS_ONCE(*top_p); | |
514 | + ptep = lookup_address(addr, &level); | |
515 | + BUG_ON(!ptep || level != PG_LEVEL_4K); | |
516 | + pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
542 | 517 | |
543 | - if (mid == p2m_mid_missing) { | |
544 | - /* Mid level is missing, allocate a new one */ | |
545 | - mid = alloc_p2m_page(); | |
546 | - if (!mid) | |
518 | + if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { | |
519 | + /* PMD level is missing, allocate a new one */ | |
520 | + ptep = alloc_p2m_pmd(addr, ptep, pte_pg); | |
521 | + if (!ptep) | |
547 | 522 | return false; |
548 | - | |
549 | - p2m_mid_init(mid, p2m_missing); | |
550 | - | |
551 | - if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | |
552 | - free_p2m_page(mid); | |
553 | 523 | } |
554 | 524 | |
555 | - top_mfn_p = &p2m_top_mfn[topidx]; | |
556 | - mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | |
525 | + if (p2m_top_mfn) { | |
526 | + top_mfn_p = &p2m_top_mfn[topidx]; | |
527 | + mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | |
557 | 528 | |
558 | - BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | |
529 | + BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | |
559 | 530 | |
560 | - if (mid_mfn == p2m_mid_missing_mfn) { | |
561 | - /* Separately check the mid mfn level */ | |
562 | - unsigned long missing_mfn; | |
563 | - unsigned long mid_mfn_mfn; | |
564 | - unsigned long old_mfn; | |
531 | + if (mid_mfn == p2m_mid_missing_mfn) { | |
532 | + /* Separately check the mid mfn level */ | |
533 | + unsigned long missing_mfn; | |
534 | + unsigned long mid_mfn_mfn; | |
535 | + unsigned long old_mfn; | |
565 | 536 | |
566 | - mid_mfn = alloc_p2m_page(); | |
567 | - if (!mid_mfn) | |
568 | - return false; | |
537 | + mid_mfn = alloc_p2m_page(); | |
538 | + if (!mid_mfn) | |
539 | + return false; | |
569 | 540 | |
570 | - p2m_mid_mfn_init(mid_mfn, p2m_missing); | |
541 | + p2m_mid_mfn_init(mid_mfn, p2m_missing); | |
571 | 542 | |
572 | - missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | |
573 | - mid_mfn_mfn = virt_to_mfn(mid_mfn); | |
574 | - old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); | |
575 | - if (old_mfn != missing_mfn) { | |
576 | - free_p2m_page(mid_mfn); | |
577 | - mid_mfn = mfn_to_virt(old_mfn); | |
578 | - } else { | |
579 | - p2m_top_mfn_p[topidx] = mid_mfn; | |
543 | + missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | |
544 | + mid_mfn_mfn = virt_to_mfn(mid_mfn); | |
545 | + old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); | |
546 | + if (old_mfn != missing_mfn) { | |
547 | + free_p2m_page(mid_mfn); | |
548 | + mid_mfn = mfn_to_virt(old_mfn); | |
549 | + } else { | |
550 | + p2m_top_mfn_p[topidx] = mid_mfn; | |
551 | + } | |
580 | 552 | } |
553 | + } else { | |
554 | + mid_mfn = NULL; | |
581 | 555 | } |
582 | 556 | |
583 | - p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); | |
584 | - if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { | |
557 | + p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep)); | |
558 | + if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || | |
559 | + p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { | |
585 | 560 | /* p2m leaf page is missing */ |
586 | 561 | unsigned long *p2m; |
587 | 562 | |
588 | 563 | |
589 | 564 | |
590 | 565 | |
591 | 566 | |
592 | 567 | |
593 | 568 | |
594 | 569 | |
595 | 570 | |
... | ... | @@ -589,183 +564,36 @@ |
589 | 564 | if (!p2m) |
590 | 565 | return false; |
591 | 566 | |
592 | - p2m_init(p2m); | |
593 | - | |
594 | - if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) | |
595 | - free_p2m_page(p2m); | |
567 | + if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) | |
568 | + p2m_init(p2m); | |
596 | 569 | else |
597 | - mid_mfn[mididx] = virt_to_mfn(p2m); | |
598 | - } | |
570 | + p2m_init_identity(p2m, pfn); | |
599 | 571 | |
600 | - return true; | |
601 | -} | |
572 | + spin_lock_irqsave(&p2m_update_lock, flags); | |
602 | 573 | |
603 | -static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) | |
604 | -{ | |
605 | - unsigned topidx, mididx, idx; | |
606 | - unsigned long *p2m; | |
607 | - | |
608 | - topidx = p2m_top_index(pfn); | |
609 | - mididx = p2m_mid_index(pfn); | |
610 | - idx = p2m_index(pfn); | |
611 | - | |
612 | - /* Pfff.. No boundary cross-over, lets get out. */ | |
613 | - if (!idx && check_boundary) | |
614 | - return false; | |
615 | - | |
616 | - WARN(p2m_top[topidx][mididx] == p2m_identity, | |
617 | - "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", | |
618 | - topidx, mididx); | |
619 | - | |
620 | - /* | |
621 | - * Could be done by xen_build_dynamic_phys_to_machine.. | |
622 | - */ | |
623 | - if (p2m_top[topidx][mididx] != p2m_missing) | |
624 | - return false; | |
625 | - | |
626 | - /* Boundary cross-over for the edges: */ | |
627 | - p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); | |
628 | - | |
629 | - p2m_init(p2m); | |
630 | - | |
631 | - p2m_top[topidx][mididx] = p2m; | |
632 | - | |
633 | - return true; | |
634 | -} | |
635 | - | |
636 | -static bool __init early_alloc_p2m_middle(unsigned long pfn) | |
637 | -{ | |
638 | - unsigned topidx = p2m_top_index(pfn); | |
639 | - unsigned long **mid; | |
640 | - | |
641 | - mid = p2m_top[topidx]; | |
642 | - if (mid == p2m_mid_missing) { | |
643 | - mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | |
644 | - | |
645 | - p2m_mid_init(mid, p2m_missing); | |
646 | - | |
647 | - p2m_top[topidx] = mid; | |
648 | - } | |
649 | - return true; | |
650 | -} | |
651 | - | |
652 | -/* | |
653 | - * Skim over the P2M tree looking at pages that are either filled with | |
654 | - * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and | |
655 | - * replace the P2M leaf with a p2m_missing or p2m_identity. | |
656 | - * Stick the old page in the new P2M tree location. | |
657 | - */ | |
658 | -static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn) | |
659 | -{ | |
660 | - unsigned topidx; | |
661 | - unsigned mididx; | |
662 | - unsigned ident_pfns; | |
663 | - unsigned inv_pfns; | |
664 | - unsigned long *p2m; | |
665 | - unsigned idx; | |
666 | - unsigned long pfn; | |
667 | - | |
668 | - /* We only look when this entails a P2M middle layer */ | |
669 | - if (p2m_index(set_pfn)) | |
670 | - return false; | |
671 | - | |
672 | - for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { | |
673 | - topidx = p2m_top_index(pfn); | |
674 | - | |
675 | - if (!p2m_top[topidx]) | |
676 | - continue; | |
677 | - | |
678 | - if (p2m_top[topidx] == p2m_mid_missing) | |
679 | - continue; | |
680 | - | |
681 | - mididx = p2m_mid_index(pfn); | |
682 | - p2m = p2m_top[topidx][mididx]; | |
683 | - if (!p2m) | |
684 | - continue; | |
685 | - | |
686 | - if ((p2m == p2m_missing) || (p2m == p2m_identity)) | |
687 | - continue; | |
688 | - | |
689 | - if ((unsigned long)p2m == INVALID_P2M_ENTRY) | |
690 | - continue; | |
691 | - | |
692 | - ident_pfns = 0; | |
693 | - inv_pfns = 0; | |
694 | - for (idx = 0; idx < P2M_PER_PAGE; idx++) { | |
695 | - /* IDENTITY_PFNs are 1:1 */ | |
696 | - if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) | |
697 | - ident_pfns++; | |
698 | - else if (p2m[idx] == INVALID_P2M_ENTRY) | |
699 | - inv_pfns++; | |
700 | - else | |
701 | - break; | |
574 | + if (pte_pfn(*ptep) == p2m_pfn) { | |
575 | + set_pte(ptep, | |
576 | + pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); | |
577 | + if (mid_mfn) | |
578 | + mid_mfn[mididx] = virt_to_mfn(p2m); | |
579 | + p2m = NULL; | |
702 | 580 | } |
703 | - if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) | |
704 | - goto found; | |
705 | - } | |
706 | - return false; | |
707 | -found: | |
708 | - /* Found one, replace old with p2m_identity or p2m_missing */ | |
709 | - p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); | |
710 | 581 | |
711 | - /* Reset where we want to stick the old page in. */ | |
712 | - topidx = p2m_top_index(set_pfn); | |
713 | - mididx = p2m_mid_index(set_pfn); | |
582 | + spin_unlock_irqrestore(&p2m_update_lock, flags); | |
714 | 583 | |
715 | - /* This shouldn't happen */ | |
716 | - if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) | |
717 | - early_alloc_p2m_middle(set_pfn); | |
718 | - | |
719 | - if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) | |
720 | - return false; | |
721 | - | |
722 | - p2m_init(p2m); | |
723 | - p2m_top[topidx][mididx] = p2m; | |
724 | - | |
725 | - return true; | |
726 | -} | |
727 | -bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |
728 | -{ | |
729 | - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | |
730 | - if (!early_alloc_p2m_middle(pfn)) | |
731 | - return false; | |
732 | - | |
733 | - if (early_can_reuse_p2m_middle(pfn)) | |
734 | - return __set_phys_to_machine(pfn, mfn); | |
735 | - | |
736 | - if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) | |
737 | - return false; | |
738 | - | |
739 | - if (!__set_phys_to_machine(pfn, mfn)) | |
740 | - return false; | |
584 | + if (p2m) | |
585 | + free_p2m_page(p2m); | |
741 | 586 | } |
742 | 587 | |
743 | 588 | return true; |
744 | 589 | } |
745 | 590 | |
746 | -static void __init early_split_p2m(unsigned long pfn) | |
747 | -{ | |
748 | - unsigned long mididx, idx; | |
749 | - | |
750 | - mididx = p2m_mid_index(pfn); | |
751 | - idx = p2m_index(pfn); | |
752 | - | |
753 | - /* | |
754 | - * Allocate new middle and leaf pages if this pfn lies in the | |
755 | - * middle of one. | |
756 | - */ | |
757 | - if (mididx || idx) | |
758 | - early_alloc_p2m_middle(pfn); | |
759 | - if (idx) | |
760 | - early_alloc_p2m(pfn, false); | |
761 | -} | |
762 | - | |
763 | 591 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
764 | 592 | unsigned long pfn_e) |
765 | 593 | { |
766 | 594 | unsigned long pfn; |
767 | 595 | |
768 | - if (unlikely(pfn_s >= MAX_P2M_PFN)) | |
596 | + if (unlikely(pfn_s >= xen_p2m_size)) | |
769 | 597 | return 0; |
770 | 598 | |
771 | 599 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) |
772 | 600 | |
773 | 601 | |
774 | 602 | |
775 | 603 | |
776 | 604 | |
777 | 605 | |
778 | 606 | |
779 | 607 | |
780 | 608 | |
781 | 609 | |
782 | 610 | |
783 | 611 | |
... | ... | @@ -774,101 +602,51 @@ |
774 | 602 | if (pfn_s > pfn_e) |
775 | 603 | return 0; |
776 | 604 | |
777 | - if (pfn_e > MAX_P2M_PFN) | |
778 | - pfn_e = MAX_P2M_PFN; | |
605 | + if (pfn_e > xen_p2m_size) | |
606 | + pfn_e = xen_p2m_size; | |
779 | 607 | |
780 | - early_split_p2m(pfn_s); | |
781 | - early_split_p2m(pfn_e); | |
608 | + for (pfn = pfn_s; pfn < pfn_e; pfn++) | |
609 | + xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn); | |
782 | 610 | |
783 | - for (pfn = pfn_s; pfn < pfn_e;) { | |
784 | - unsigned topidx = p2m_top_index(pfn); | |
785 | - unsigned mididx = p2m_mid_index(pfn); | |
786 | - | |
787 | - if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) | |
788 | - break; | |
789 | - pfn++; | |
790 | - | |
791 | - /* | |
792 | - * If the PFN was set to a middle or leaf identity | |
793 | - * page the remainder must also be identity, so skip | |
794 | - * ahead to the next middle or leaf entry. | |
795 | - */ | |
796 | - if (p2m_top[topidx] == p2m_mid_identity) | |
797 | - pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE); | |
798 | - else if (p2m_top[topidx][mididx] == p2m_identity) | |
799 | - pfn = ALIGN(pfn, P2M_PER_PAGE); | |
800 | - } | |
801 | - | |
802 | - WARN((pfn - pfn_s) != (pfn_e - pfn_s), | |
803 | - "Identity mapping failed. We are %ld short of 1-1 mappings!\n", | |
804 | - (pfn_e - pfn_s) - (pfn - pfn_s)); | |
805 | - | |
806 | 611 | return pfn - pfn_s; |
807 | 612 | } |
808 | 613 | |
809 | -/* Try to install p2m mapping; fail if intermediate bits missing */ | |
810 | 614 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
811 | 615 | { |
812 | - unsigned topidx, mididx, idx; | |
616 | + pte_t *ptep; | |
617 | + unsigned int level; | |
813 | 618 | |
814 | 619 | /* don't track P2M changes in autotranslate guests */ |
815 | 620 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) |
816 | 621 | return true; |
817 | 622 | |
818 | - if (unlikely(pfn >= MAX_P2M_PFN)) { | |
623 | + if (unlikely(pfn >= xen_p2m_size)) { | |
819 | 624 | BUG_ON(mfn != INVALID_P2M_ENTRY); |
820 | 625 | return true; |
821 | 626 | } |
822 | 627 | |
823 | - topidx = p2m_top_index(pfn); | |
824 | - mididx = p2m_mid_index(pfn); | |
825 | - idx = p2m_index(pfn); | |
628 | + if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) | |
629 | + return true; | |
826 | 630 | |
827 | - /* For sparse holes were the p2m leaf has real PFN along with | |
828 | - * PCI holes, stick in the PFN as the MFN value. | |
829 | - * | |
830 | - * set_phys_range_identity() will have allocated new middle | |
831 | - * and leaf pages as required so an existing p2m_mid_missing | |
832 | - * or p2m_missing mean that whole range will be identity so | |
833 | - * these can be switched to p2m_mid_identity or p2m_identity. | |
834 | - */ | |
835 | - if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { | |
836 | - if (p2m_top[topidx] == p2m_mid_identity) | |
837 | - return true; | |
631 | + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); | |
632 | + BUG_ON(!ptep || level != PG_LEVEL_4K); | |
838 | 633 | |
839 | - if (p2m_top[topidx] == p2m_mid_missing) { | |
840 | - WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing, | |
841 | - p2m_mid_identity) != p2m_mid_missing); | |
842 | - return true; | |
843 | - } | |
844 | - | |
845 | - if (p2m_top[topidx][mididx] == p2m_identity) | |
846 | - return true; | |
847 | - | |
848 | - /* Swap over from MISSING to IDENTITY if needed. */ | |
849 | - if (p2m_top[topidx][mididx] == p2m_missing) { | |
850 | - WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, | |
851 | - p2m_identity) != p2m_missing); | |
852 | - return true; | |
853 | - } | |
854 | - } | |
855 | - | |
856 | - if (p2m_top[topidx][mididx] == p2m_missing) | |
634 | + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) | |
857 | 635 | return mfn == INVALID_P2M_ENTRY; |
858 | 636 | |
859 | - p2m_top[topidx][mididx][idx] = mfn; | |
637 | + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) | |
638 | + return mfn == IDENTITY_FRAME(pfn); | |
860 | 639 | |
861 | - return true; | |
640 | + return false; | |
862 | 641 | } |
863 | 642 | |
864 | 643 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
865 | 644 | { |
866 | - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | |
645 | + if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | |
867 | 646 | if (!alloc_p2m(pfn)) |
868 | 647 | return false; |
869 | 648 | |
870 | - if (!__set_phys_to_machine(pfn, mfn)) | |
871 | - return false; | |
649 | + return __set_phys_to_machine(pfn, mfn); | |
872 | 650 | } |
873 | 651 | |
874 | 652 | return true; |
875 | 653 | |
... | ... | @@ -877,15 +655,16 @@ |
877 | 655 | #define M2P_OVERRIDE_HASH_SHIFT 10 |
878 | 656 | #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) |
879 | 657 | |
880 | -static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); | |
658 | +static struct list_head *m2p_overrides; | |
881 | 659 | static DEFINE_SPINLOCK(m2p_override_lock); |
882 | 660 | |
883 | 661 | static void __init m2p_override_init(void) |
884 | 662 | { |
885 | 663 | unsigned i; |
886 | 664 | |
887 | - m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, | |
888 | - sizeof(unsigned long)); | |
665 | + m2p_overrides = alloc_bootmem_align( | |
666 | + sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, | |
667 | + sizeof(unsigned long)); | |
889 | 668 | |
890 | 669 | for (i = 0; i < M2P_OVERRIDE_HASH; i++) |
891 | 670 | INIT_LIST_HEAD(&m2p_overrides[i]); |
892 | 671 | |
... | ... | @@ -896,68 +675,9 @@ |
896 | 675 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); |
897 | 676 | } |
898 | 677 | |
899 | -int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, | |
900 | - struct gnttab_map_grant_ref *kmap_ops, | |
901 | - struct page **pages, unsigned int count) | |
902 | -{ | |
903 | - int i, ret = 0; | |
904 | - bool lazy = false; | |
905 | - pte_t *pte; | |
906 | - | |
907 | - if (xen_feature(XENFEAT_auto_translated_physmap)) | |
908 | - return 0; | |
909 | - | |
910 | - if (kmap_ops && | |
911 | - !in_interrupt() && | |
912 | - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { | |
913 | - arch_enter_lazy_mmu_mode(); | |
914 | - lazy = true; | |
915 | - } | |
916 | - | |
917 | - for (i = 0; i < count; i++) { | |
918 | - unsigned long mfn, pfn; | |
919 | - | |
920 | - /* Do not add to override if the map failed. */ | |
921 | - if (map_ops[i].status) | |
922 | - continue; | |
923 | - | |
924 | - if (map_ops[i].flags & GNTMAP_contains_pte) { | |
925 | - pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | |
926 | - (map_ops[i].host_addr & ~PAGE_MASK)); | |
927 | - mfn = pte_mfn(*pte); | |
928 | - } else { | |
929 | - mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | |
930 | - } | |
931 | - pfn = page_to_pfn(pages[i]); | |
932 | - | |
933 | - WARN_ON(PagePrivate(pages[i])); | |
934 | - SetPagePrivate(pages[i]); | |
935 | - set_page_private(pages[i], mfn); | |
936 | - pages[i]->index = pfn_to_mfn(pfn); | |
937 | - | |
938 | - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { | |
939 | - ret = -ENOMEM; | |
940 | - goto out; | |
941 | - } | |
942 | - | |
943 | - if (kmap_ops) { | |
944 | - ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); | |
945 | - if (ret) | |
946 | - goto out; | |
947 | - } | |
948 | - } | |
949 | - | |
950 | -out: | |
951 | - if (lazy) | |
952 | - arch_leave_lazy_mmu_mode(); | |
953 | - | |
954 | - return ret; | |
955 | -} | |
956 | -EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); | |
957 | - | |
958 | 678 | /* Add an MFN override for a particular page */ |
959 | -int m2p_add_override(unsigned long mfn, struct page *page, | |
960 | - struct gnttab_map_grant_ref *kmap_op) | |
679 | +static int m2p_add_override(unsigned long mfn, struct page *page, | |
680 | + struct gnttab_map_grant_ref *kmap_op) | |
961 | 681 | { |
962 | 682 | unsigned long flags; |
963 | 683 | unsigned long pfn; |
... | ... | @@ -970,7 +690,7 @@ |
970 | 690 | address = (unsigned long)__va(pfn << PAGE_SHIFT); |
971 | 691 | ptep = lookup_address(address, &level); |
972 | 692 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, |
973 | - "m2p_add_override: pfn %lx not mapped", pfn)) | |
693 | + "m2p_add_override: pfn %lx not mapped", pfn)) | |
974 | 694 | return -EINVAL; |
975 | 695 | } |
976 | 696 | |
977 | 697 | |
978 | 698 | |
979 | 699 | |
... | ... | @@ -1004,19 +724,19 @@ |
1004 | 724 | * because mfn_to_pfn (that ends up being called by GUPF) will |
1005 | 725 | * return the backend pfn rather than the frontend pfn. */ |
1006 | 726 | pfn = mfn_to_pfn_no_overrides(mfn); |
1007 | - if (get_phys_to_machine(pfn) == mfn) | |
727 | + if (__pfn_to_mfn(pfn) == mfn) | |
1008 | 728 | set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); |
1009 | 729 | |
1010 | 730 | return 0; |
1011 | 731 | } |
1012 | -EXPORT_SYMBOL_GPL(m2p_add_override); | |
1013 | 732 | |
1014 | -int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, | |
1015 | - struct gnttab_map_grant_ref *kmap_ops, | |
1016 | - struct page **pages, unsigned int count) | |
733 | +int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, | |
734 | + struct gnttab_map_grant_ref *kmap_ops, | |
735 | + struct page **pages, unsigned int count) | |
1017 | 736 | { |
1018 | 737 | int i, ret = 0; |
1019 | 738 | bool lazy = false; |
739 | + pte_t *pte; | |
1020 | 740 | |
1021 | 741 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
1022 | 742 | return 0; |
1023 | 743 | |
1024 | 744 | |
1025 | 745 | |
1026 | 746 | |
1027 | 747 | |
1028 | 748 | |
1029 | 749 | |
1030 | 750 | |
1031 | 751 | |
... | ... | @@ -1029,37 +749,77 @@ |
1029 | 749 | } |
1030 | 750 | |
1031 | 751 | for (i = 0; i < count; i++) { |
1032 | - unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i])); | |
1033 | - unsigned long pfn = page_to_pfn(pages[i]); | |
752 | + unsigned long mfn, pfn; | |
1034 | 753 | |
1035 | - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { | |
1036 | - ret = -EINVAL; | |
1037 | - goto out; | |
754 | + /* Do not add to override if the map failed. */ | |
755 | + if (map_ops[i].status) | |
756 | + continue; | |
757 | + | |
758 | + if (map_ops[i].flags & GNTMAP_contains_pte) { | |
759 | + pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | |
760 | + (map_ops[i].host_addr & ~PAGE_MASK)); | |
761 | + mfn = pte_mfn(*pte); | |
762 | + } else { | |
763 | + mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | |
1038 | 764 | } |
765 | + pfn = page_to_pfn(pages[i]); | |
1039 | 766 | |
1040 | - set_page_private(pages[i], INVALID_P2M_ENTRY); | |
1041 | - WARN_ON(!PagePrivate(pages[i])); | |
1042 | - ClearPagePrivate(pages[i]); | |
1043 | - set_phys_to_machine(pfn, pages[i]->index); | |
767 | + WARN_ON(PagePrivate(pages[i])); | |
768 | + SetPagePrivate(pages[i]); | |
769 | + set_page_private(pages[i], mfn); | |
770 | + pages[i]->index = pfn_to_mfn(pfn); | |
1044 | 771 | |
1045 | - if (kmap_ops) | |
1046 | - ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); | |
1047 | - if (ret) | |
772 | + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { | |
773 | + ret = -ENOMEM; | |
1048 | 774 | goto out; |
775 | + } | |
776 | + | |
777 | + if (kmap_ops) { | |
778 | + ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); | |
779 | + if (ret) | |
780 | + goto out; | |
781 | + } | |
1049 | 782 | } |
1050 | 783 | |
1051 | 784 | out: |
1052 | 785 | if (lazy) |
1053 | 786 | arch_leave_lazy_mmu_mode(); |
787 | + | |
1054 | 788 | return ret; |
1055 | 789 | } |
1056 | -EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); | |
790 | +EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); | |
1057 | 791 | |
1058 | -int m2p_remove_override(struct page *page, | |
1059 | - struct gnttab_map_grant_ref *kmap_op, | |
1060 | - unsigned long mfn) | |
792 | +static struct page *m2p_find_override(unsigned long mfn) | |
1061 | 793 | { |
1062 | 794 | unsigned long flags; |
795 | + struct list_head *bucket; | |
796 | + struct page *p, *ret; | |
797 | + | |
798 | + if (unlikely(!m2p_overrides)) | |
799 | + return NULL; | |
800 | + | |
801 | + ret = NULL; | |
802 | + bucket = &m2p_overrides[mfn_hash(mfn)]; | |
803 | + | |
804 | + spin_lock_irqsave(&m2p_override_lock, flags); | |
805 | + | |
806 | + list_for_each_entry(p, bucket, lru) { | |
807 | + if (page_private(p) == mfn) { | |
808 | + ret = p; | |
809 | + break; | |
810 | + } | |
811 | + } | |
812 | + | |
813 | + spin_unlock_irqrestore(&m2p_override_lock, flags); | |
814 | + | |
815 | + return ret; | |
816 | +} | |
817 | + | |
818 | +static int m2p_remove_override(struct page *page, | |
819 | + struct gnttab_map_grant_ref *kmap_op, | |
820 | + unsigned long mfn) | |
821 | +{ | |
822 | + unsigned long flags; | |
1063 | 823 | unsigned long pfn; |
1064 | 824 | unsigned long uninitialized_var(address); |
1065 | 825 | unsigned level; |
... | ... | @@ -1072,7 +832,7 @@ |
1072 | 832 | ptep = lookup_address(address, &level); |
1073 | 833 | |
1074 | 834 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, |
1075 | - "m2p_remove_override: pfn %lx not mapped", pfn)) | |
835 | + "m2p_remove_override: pfn %lx not mapped", pfn)) | |
1076 | 836 | return -EINVAL; |
1077 | 837 | } |
1078 | 838 | |
... | ... | @@ -1102,9 +862,8 @@ |
1102 | 862 | * hypercall actually returned an error. |
1103 | 863 | */ |
1104 | 864 | if (kmap_op->handle == GNTST_general_error) { |
1105 | - printk(KERN_WARNING "m2p_remove_override: " | |
1106 | - "pfn %lx mfn %lx, failed to modify kernel mappings", | |
1107 | - pfn, mfn); | |
865 | + pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings", | |
866 | + pfn, mfn); | |
1108 | 867 | put_balloon_scratch_page(); |
1109 | 868 | return -1; |
1110 | 869 | } |
1111 | 870 | |
... | ... | @@ -1112,14 +871,14 @@ |
1112 | 871 | xen_mc_batch(); |
1113 | 872 | |
1114 | 873 | mcs = __xen_mc_entry( |
1115 | - sizeof(struct gnttab_unmap_and_replace)); | |
874 | + sizeof(struct gnttab_unmap_and_replace)); | |
1116 | 875 | unmap_op = mcs.args; |
1117 | 876 | unmap_op->host_addr = kmap_op->host_addr; |
1118 | 877 | unmap_op->new_addr = scratch_page_address; |
1119 | 878 | unmap_op->handle = kmap_op->handle; |
1120 | 879 | |
1121 | 880 | MULTI_grant_table_op(mcs.mc, |
1122 | - GNTTABOP_unmap_and_replace, unmap_op, 1); | |
881 | + GNTTABOP_unmap_and_replace, unmap_op, 1); | |
1123 | 882 | |
1124 | 883 | mcs = __xen_mc_entry(0); |
1125 | 884 | MULTI_update_va_mapping(mcs.mc, scratch_page_address, |
1126 | 885 | |
1127 | 886 | |
1128 | 887 | |
1129 | 888 | |
1130 | 889 | |
1131 | 890 | |
1132 | 891 | |
1133 | 892 | |
1134 | 893 | |
1135 | 894 | |
... | ... | @@ -1145,35 +904,56 @@ |
1145 | 904 | * pfn again. */ |
1146 | 905 | mfn &= ~FOREIGN_FRAME_BIT; |
1147 | 906 | pfn = mfn_to_pfn_no_overrides(mfn); |
1148 | - if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && | |
907 | + if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) && | |
1149 | 908 | m2p_find_override(mfn) == NULL) |
1150 | 909 | set_phys_to_machine(pfn, mfn); |
1151 | 910 | |
1152 | 911 | return 0; |
1153 | 912 | } |
1154 | -EXPORT_SYMBOL_GPL(m2p_remove_override); | |
1155 | 913 | |
1156 | -struct page *m2p_find_override(unsigned long mfn) | |
914 | +int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, | |
915 | + struct gnttab_map_grant_ref *kmap_ops, | |
916 | + struct page **pages, unsigned int count) | |
1157 | 917 | { |
1158 | - unsigned long flags; | |
1159 | - struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; | |
1160 | - struct page *p, *ret; | |
918 | + int i, ret = 0; | |
919 | + bool lazy = false; | |
1161 | 920 | |
1162 | - ret = NULL; | |
921 | + if (xen_feature(XENFEAT_auto_translated_physmap)) | |
922 | + return 0; | |
1163 | 923 | |
1164 | - spin_lock_irqsave(&m2p_override_lock, flags); | |
924 | + if (kmap_ops && | |
925 | + !in_interrupt() && | |
926 | + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { | |
927 | + arch_enter_lazy_mmu_mode(); | |
928 | + lazy = true; | |
929 | + } | |
1165 | 930 | |
1166 | - list_for_each_entry(p, bucket, lru) { | |
1167 | - if (page_private(p) == mfn) { | |
1168 | - ret = p; | |
1169 | - break; | |
931 | + for (i = 0; i < count; i++) { | |
932 | + unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); | |
933 | + unsigned long pfn = page_to_pfn(pages[i]); | |
934 | + | |
935 | + if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { | |
936 | + ret = -EINVAL; | |
937 | + goto out; | |
1170 | 938 | } |
1171 | - } | |
1172 | 939 | |
1173 | - spin_unlock_irqrestore(&m2p_override_lock, flags); | |
940 | + set_page_private(pages[i], INVALID_P2M_ENTRY); | |
941 | + WARN_ON(!PagePrivate(pages[i])); | |
942 | + ClearPagePrivate(pages[i]); | |
943 | + set_phys_to_machine(pfn, pages[i]->index); | |
1174 | 944 | |
945 | + if (kmap_ops) | |
946 | + ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); | |
947 | + if (ret) | |
948 | + goto out; | |
949 | + } | |
950 | + | |
951 | +out: | |
952 | + if (lazy) | |
953 | + arch_leave_lazy_mmu_mode(); | |
1175 | 954 | return ret; |
1176 | 955 | } |
956 | +EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); | |
1177 | 957 | |
1178 | 958 | unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) |
1179 | 959 | { |
1180 | 960 | |
1181 | 961 | |
1182 | 962 | |
1183 | 963 | |
1184 | 964 | |
1185 | 965 | |
1186 | 966 | |
... | ... | @@ -1192,79 +972,29 @@ |
1192 | 972 | #include "debugfs.h" |
1193 | 973 | static int p2m_dump_show(struct seq_file *m, void *v) |
1194 | 974 | { |
1195 | - static const char * const level_name[] = { "top", "middle", | |
1196 | - "entry", "abnormal", "error"}; | |
1197 | -#define TYPE_IDENTITY 0 | |
1198 | -#define TYPE_MISSING 1 | |
1199 | -#define TYPE_PFN 2 | |
1200 | -#define TYPE_UNKNOWN 3 | |
1201 | 975 | static const char * const type_name[] = { |
1202 | - [TYPE_IDENTITY] = "identity", | |
1203 | - [TYPE_MISSING] = "missing", | |
1204 | - [TYPE_PFN] = "pfn", | |
1205 | - [TYPE_UNKNOWN] = "abnormal"}; | |
1206 | - unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; | |
1207 | - unsigned int uninitialized_var(prev_level); | |
1208 | - unsigned int uninitialized_var(prev_type); | |
976 | + [P2M_TYPE_IDENTITY] = "identity", | |
977 | + [P2M_TYPE_MISSING] = "missing", | |
978 | + [P2M_TYPE_PFN] = "pfn", | |
979 | + [P2M_TYPE_UNKNOWN] = "abnormal"}; | |
980 | + unsigned long pfn, first_pfn; | |
981 | + int type, prev_type; | |
1209 | 982 | |
1210 | - if (!p2m_top) | |
1211 | - return 0; | |
983 | + prev_type = xen_p2m_elem_type(0); | |
984 | + first_pfn = 0; | |
1212 | 985 | |
1213 | - for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { | |
1214 | - unsigned topidx = p2m_top_index(pfn); | |
1215 | - unsigned mididx = p2m_mid_index(pfn); | |
1216 | - unsigned idx = p2m_index(pfn); | |
1217 | - unsigned lvl, type; | |
1218 | - | |
1219 | - lvl = 4; | |
1220 | - type = TYPE_UNKNOWN; | |
1221 | - if (p2m_top[topidx] == p2m_mid_missing) { | |
1222 | - lvl = 0; type = TYPE_MISSING; | |
1223 | - } else if (p2m_top[topidx] == NULL) { | |
1224 | - lvl = 0; type = TYPE_UNKNOWN; | |
1225 | - } else if (p2m_top[topidx][mididx] == NULL) { | |
1226 | - lvl = 1; type = TYPE_UNKNOWN; | |
1227 | - } else if (p2m_top[topidx][mididx] == p2m_identity) { | |
1228 | - lvl = 1; type = TYPE_IDENTITY; | |
1229 | - } else if (p2m_top[topidx][mididx] == p2m_missing) { | |
1230 | - lvl = 1; type = TYPE_MISSING; | |
1231 | - } else if (p2m_top[topidx][mididx][idx] == 0) { | |
1232 | - lvl = 2; type = TYPE_UNKNOWN; | |
1233 | - } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { | |
1234 | - lvl = 2; type = TYPE_IDENTITY; | |
1235 | - } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { | |
1236 | - lvl = 2; type = TYPE_MISSING; | |
1237 | - } else if (p2m_top[topidx][mididx][idx] == pfn) { | |
1238 | - lvl = 2; type = TYPE_PFN; | |
1239 | - } else if (p2m_top[topidx][mididx][idx] != pfn) { | |
1240 | - lvl = 2; type = TYPE_PFN; | |
1241 | - } | |
1242 | - if (pfn == 0) { | |
1243 | - prev_level = lvl; | |
986 | + for (pfn = 0; pfn < xen_p2m_size; pfn++) { | |
987 | + type = xen_p2m_elem_type(pfn); | |
988 | + if (type != prev_type) { | |
989 | + seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, | |
990 | + type_name[prev_type]); | |
1244 | 991 | prev_type = type; |
992 | + first_pfn = pfn; | |
1245 | 993 | } |
1246 | - if (pfn == MAX_DOMAIN_PAGES-1) { | |
1247 | - lvl = 3; | |
1248 | - type = TYPE_UNKNOWN; | |
1249 | - } | |
1250 | - if (prev_type != type) { | |
1251 | - seq_printf(m, " [0x%lx->0x%lx] %s\n", | |
1252 | - prev_pfn_type, pfn, type_name[prev_type]); | |
1253 | - prev_pfn_type = pfn; | |
1254 | - prev_type = type; | |
1255 | - } | |
1256 | - if (prev_level != lvl) { | |
1257 | - seq_printf(m, " [0x%lx->0x%lx] level %s\n", | |
1258 | - prev_pfn_level, pfn, level_name[prev_level]); | |
1259 | - prev_pfn_level = pfn; | |
1260 | - prev_level = lvl; | |
1261 | - } | |
1262 | 994 | } |
995 | + seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, | |
996 | + type_name[prev_type]); | |
1263 | 997 | return 0; |
1264 | -#undef TYPE_IDENTITY | |
1265 | -#undef TYPE_MISSING | |
1266 | -#undef TYPE_PFN | |
1267 | -#undef TYPE_UNKNOWN | |
1268 | 998 | } |
1269 | 999 | |
1270 | 1000 | static int p2m_dump_open(struct inode *inode, struct file *filp) |
arch/x86/xen/setup.c
... | ... | @@ -30,6 +30,7 @@ |
30 | 30 | #include "xen-ops.h" |
31 | 31 | #include "vdso.h" |
32 | 32 | #include "p2m.h" |
33 | +#include "mmu.h" | |
33 | 34 | |
34 | 35 | /* These are code, but not functions. Defined in entry.S */ |
35 | 36 | extern const char xen_hypervisor_callback[]; |
... | ... | @@ -47,8 +48,19 @@ |
47 | 48 | /* Number of pages released from the initial allocation. */ |
48 | 49 | unsigned long xen_released_pages; |
49 | 50 | |
50 | -/* Buffer used to remap identity mapped pages */ | |
51 | -unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; | |
51 | +/* | |
52 | + * Buffer used to remap identity mapped pages. We only need the virtual space. | |
53 | + * The physical page behind this address is remapped as needed to different | |
54 | + * buffer pages. | |
55 | + */ | |
56 | +#define REMAP_SIZE (P2M_PER_PAGE - 3) | |
57 | +static struct { | |
58 | + unsigned long next_area_mfn; | |
59 | + unsigned long target_pfn; | |
60 | + unsigned long size; | |
61 | + unsigned long mfns[REMAP_SIZE]; | |
62 | +} xen_remap_buf __initdata __aligned(PAGE_SIZE); | |
63 | +static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; | |
52 | 64 | |
53 | 65 | /* |
54 | 66 | * The maximum amount of extra memory compared to the base size. The |
... | ... | @@ -64,7 +76,6 @@ |
64 | 76 | |
65 | 77 | static void __init xen_add_extra_mem(u64 start, u64 size) |
66 | 78 | { |
67 | - unsigned long pfn; | |
68 | 79 | int i; |
69 | 80 | |
70 | 81 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
71 | 82 | |
72 | 83 | |
73 | 84 | |
74 | 85 | |
75 | 86 | |
76 | 87 | |
77 | 88 | |
78 | 89 | |
79 | 90 | |
80 | 91 | |
81 | 92 | |
... | ... | @@ -84,75 +95,76 @@ |
84 | 95 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); |
85 | 96 | |
86 | 97 | memblock_reserve(start, size); |
98 | +} | |
87 | 99 | |
88 | - xen_max_p2m_pfn = PFN_DOWN(start + size); | |
89 | - for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | |
90 | - unsigned long mfn = pfn_to_mfn(pfn); | |
100 | +static void __init xen_del_extra_mem(u64 start, u64 size) | |
101 | +{ | |
102 | + int i; | |
103 | + u64 start_r, size_r; | |
91 | 104 | |
92 | - if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) | |
93 | - continue; | |
94 | - WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", | |
95 | - pfn, mfn); | |
105 | + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { | |
106 | + start_r = xen_extra_mem[i].start; | |
107 | + size_r = xen_extra_mem[i].size; | |
96 | 108 | |
97 | - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | |
109 | + /* Start of region. */ | |
110 | + if (start_r == start) { | |
111 | + BUG_ON(size > size_r); | |
112 | + xen_extra_mem[i].start += size; | |
113 | + xen_extra_mem[i].size -= size; | |
114 | + break; | |
115 | + } | |
116 | + /* End of region. */ | |
117 | + if (start_r + size_r == start + size) { | |
118 | + BUG_ON(size > size_r); | |
119 | + xen_extra_mem[i].size -= size; | |
120 | + break; | |
121 | + } | |
122 | + /* Mid of region. */ | |
123 | + if (start > start_r && start < start_r + size_r) { | |
124 | + BUG_ON(start + size > start_r + size_r); | |
125 | + xen_extra_mem[i].size = start - start_r; | |
126 | + /* Calling memblock_reserve() again is okay. */ | |
127 | + xen_add_extra_mem(start + size, start_r + size_r - | |
128 | + (start + size)); | |
129 | + break; | |
130 | + } | |
98 | 131 | } |
132 | + memblock_free(start, size); | |
99 | 133 | } |
100 | 134 | |
101 | -static unsigned long __init xen_do_chunk(unsigned long start, | |
102 | - unsigned long end, bool release) | |
135 | +/* | |
136 | + * Called during boot before the p2m list can take entries beyond the | |
137 | + * hypervisor supplied p2m list. Entries in extra mem are to be regarded as | |
138 | + * invalid. | |
139 | + */ | |
140 | +unsigned long __ref xen_chk_extra_mem(unsigned long pfn) | |
103 | 141 | { |
104 | - struct xen_memory_reservation reservation = { | |
105 | - .address_bits = 0, | |
106 | - .extent_order = 0, | |
107 | - .domid = DOMID_SELF | |
108 | - }; | |
109 | - unsigned long len = 0; | |
110 | - unsigned long pfn; | |
111 | - int ret; | |
142 | + int i; | |
143 | + unsigned long addr = PFN_PHYS(pfn); | |
112 | 144 | |
113 | - for (pfn = start; pfn < end; pfn++) { | |
114 | - unsigned long frame; | |
115 | - unsigned long mfn = pfn_to_mfn(pfn); | |
145 | + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { | |
146 | + if (addr >= xen_extra_mem[i].start && | |
147 | + addr < xen_extra_mem[i].start + xen_extra_mem[i].size) | |
148 | + return INVALID_P2M_ENTRY; | |
149 | + } | |
116 | 150 | |
117 | - if (release) { | |
118 | - /* Make sure pfn exists to start with */ | |
119 | - if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) | |
120 | - continue; | |
121 | - frame = mfn; | |
122 | - } else { | |
123 | - if (mfn != INVALID_P2M_ENTRY) | |
124 | - continue; | |
125 | - frame = pfn; | |
126 | - } | |
127 | - set_xen_guest_handle(reservation.extent_start, &frame); | |
128 | - reservation.nr_extents = 1; | |
151 | + return IDENTITY_FRAME(pfn); | |
152 | +} | |
129 | 153 | |
130 | - ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, | |
131 | - &reservation); | |
132 | - WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", | |
133 | - release ? "release" : "populate", pfn, ret); | |
154 | +/* | |
155 | + * Mark all pfns of extra mem as invalid in p2m list. | |
156 | + */ | |
157 | +void __init xen_inv_extra_mem(void) | |
158 | +{ | |
159 | + unsigned long pfn, pfn_s, pfn_e; | |
160 | + int i; | |
134 | 161 | |
135 | - if (ret == 1) { | |
136 | - if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { | |
137 | - if (release) | |
138 | - break; | |
139 | - set_xen_guest_handle(reservation.extent_start, &frame); | |
140 | - reservation.nr_extents = 1; | |
141 | - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | |
142 | - &reservation); | |
143 | - break; | |
144 | - } | |
145 | - len++; | |
146 | - } else | |
147 | - break; | |
162 | + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { | |
163 | + pfn_s = PFN_DOWN(xen_extra_mem[i].start); | |
164 | + pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size); | |
165 | + for (pfn = pfn_s; pfn < pfn_e; pfn++) | |
166 | + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | |
148 | 167 | } |
149 | - if (len) | |
150 | - printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", | |
151 | - release ? "Freeing" : "Populating", | |
152 | - start, end, len, | |
153 | - release ? "freed" : "added"); | |
154 | - | |
155 | - return len; | |
156 | 168 | } |
157 | 169 | |
158 | 170 | /* |
159 | 171 | |
160 | 172 | |
161 | 173 | |
162 | 174 | |
163 | 175 | |
164 | 176 | |
... | ... | @@ -198,26 +210,62 @@ |
198 | 210 | return done; |
199 | 211 | } |
200 | 212 | |
213 | +static int __init xen_free_mfn(unsigned long mfn) | |
214 | +{ | |
215 | + struct xen_memory_reservation reservation = { | |
216 | + .address_bits = 0, | |
217 | + .extent_order = 0, | |
218 | + .domid = DOMID_SELF | |
219 | + }; | |
220 | + | |
221 | + set_xen_guest_handle(reservation.extent_start, &mfn); | |
222 | + reservation.nr_extents = 1; | |
223 | + | |
224 | + return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | |
225 | +} | |
226 | + | |
201 | 227 | /* |
202 | - * This releases a chunk of memory and then does the identity map. It's used as | |
228 | + * This releases a chunk of memory and then does the identity map. It's used | |
203 | 229 | * as a fallback if the remapping fails. |
204 | 230 | */ |
205 | 231 | static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, |
206 | 232 | unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, |
207 | 233 | unsigned long *released) |
208 | 234 | { |
235 | + unsigned long len = 0; | |
236 | + unsigned long pfn, end; | |
237 | + int ret; | |
238 | + | |
209 | 239 | WARN_ON(start_pfn > end_pfn); |
210 | 240 | |
241 | + end = min(end_pfn, nr_pages); | |
242 | + for (pfn = start_pfn; pfn < end; pfn++) { | |
243 | + unsigned long mfn = pfn_to_mfn(pfn); | |
244 | + | |
245 | + /* Make sure pfn exists to start with */ | |
246 | + if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) | |
247 | + continue; | |
248 | + | |
249 | + ret = xen_free_mfn(mfn); | |
250 | + WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); | |
251 | + | |
252 | + if (ret == 1) { | |
253 | + if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) | |
254 | + break; | |
255 | + len++; | |
256 | + } else | |
257 | + break; | |
258 | + } | |
259 | + | |
211 | 260 | /* Need to release pages first */ |
212 | - *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true); | |
261 | + *released += len; | |
213 | 262 | *identity += set_phys_range_identity(start_pfn, end_pfn); |
214 | 263 | } |
215 | 264 | |
216 | 265 | /* |
217 | - * Helper function to update both the p2m and m2p tables. | |
266 | + * Helper function to update the p2m and m2p tables and kernel mapping. | |
218 | 267 | */ |
219 | -static unsigned long __init xen_update_mem_tables(unsigned long pfn, | |
220 | - unsigned long mfn) | |
268 | +static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) | |
221 | 269 | { |
222 | 270 | struct mmu_update update = { |
223 | 271 | .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, |
224 | 272 | |
225 | 273 | |
226 | 274 | |
227 | 275 | |
228 | 276 | |
229 | 277 | |
230 | 278 | |
231 | 279 | |
232 | 280 | |
233 | 281 | |
234 | 282 | |
235 | 283 | |
236 | 284 | |
237 | 285 | |
238 | 286 | |
239 | 287 | |
... | ... | @@ -225,161 +273,88 @@ |
225 | 273 | }; |
226 | 274 | |
227 | 275 | /* Update p2m */ |
228 | - if (!early_set_phys_to_machine(pfn, mfn)) { | |
276 | + if (!set_phys_to_machine(pfn, mfn)) { | |
229 | 277 | WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", |
230 | 278 | pfn, mfn); |
231 | - return false; | |
279 | + BUG(); | |
232 | 280 | } |
233 | 281 | |
234 | 282 | /* Update m2p */ |
235 | 283 | if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { |
236 | 284 | WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", |
237 | 285 | mfn, pfn); |
238 | - return false; | |
286 | + BUG(); | |
239 | 287 | } |
240 | 288 | |
241 | - return true; | |
289 | + /* Update kernel mapping, but not for highmem. */ | |
290 | + if ((pfn << PAGE_SHIFT) >= __pa(high_memory)) | |
291 | + return; | |
292 | + | |
293 | + if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), | |
294 | + mfn_pte(mfn, PAGE_KERNEL), 0)) { | |
295 | + WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", | |
296 | + mfn, pfn); | |
297 | + BUG(); | |
298 | + } | |
242 | 299 | } |
243 | 300 | |
244 | 301 | /* |
245 | 302 | * This function updates the p2m and m2p tables with an identity map from |
246 | - * start_pfn to start_pfn+size and remaps the underlying RAM of the original | |
247 | - * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks | |
248 | - * to not exhaust the reserved brk space. Doing it in properly aligned blocks | |
249 | - * ensures we only allocate the minimum required leaf pages in the p2m table. It | |
250 | - * copies the existing mfns from the p2m table under the 1:1 map, overwrites | |
251 | - * them with the identity map and then updates the p2m and m2p tables with the | |
252 | - * remapped memory. | |
303 | + * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the | |
304 | + * original allocation at remap_pfn. The information needed for remapping is | |
305 | + * saved in the memory itself to avoid the need for allocating buffers. The | |
306 | + * complete remap information is contained in a list of MFNs each containing | |
307 | + * up to REMAP_SIZE MFNs and the start target PFN for doing the remap. | |
308 | + * This enables us to preserve the original mfn sequence while doing the | |
309 | + * remapping at a time when the memory management is capable of allocating | |
310 | + * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and | |
311 | + * its callers. | |
253 | 312 | */ |
254 | -static unsigned long __init xen_do_set_identity_and_remap_chunk( | |
313 | +static void __init xen_do_set_identity_and_remap_chunk( | |
255 | 314 | unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) |
256 | 315 | { |
316 | + unsigned long buf = (unsigned long)&xen_remap_buf; | |
317 | + unsigned long mfn_save, mfn; | |
257 | 318 | unsigned long ident_pfn_iter, remap_pfn_iter; |
258 | - unsigned long ident_start_pfn_align, remap_start_pfn_align; | |
259 | - unsigned long ident_end_pfn_align, remap_end_pfn_align; | |
260 | - unsigned long ident_boundary_pfn, remap_boundary_pfn; | |
261 | - unsigned long ident_cnt = 0; | |
262 | - unsigned long remap_cnt = 0; | |
319 | + unsigned long ident_end_pfn = start_pfn + size; | |
263 | 320 | unsigned long left = size; |
264 | - unsigned long mod; | |
265 | - int i; | |
321 | + unsigned long ident_cnt = 0; | |
322 | + unsigned int i, chunk; | |
266 | 323 | |
267 | 324 | WARN_ON(size == 0); |
268 | 325 | |
269 | 326 | BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); |
270 | 327 | |
271 | - /* | |
272 | - * Determine the proper alignment to remap memory in P2M_PER_PAGE sized | |
273 | - * blocks. We need to keep track of both the existing pfn mapping and | |
274 | - * the new pfn remapping. | |
275 | - */ | |
276 | - mod = start_pfn % P2M_PER_PAGE; | |
277 | - ident_start_pfn_align = | |
278 | - mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn; | |
279 | - mod = remap_pfn % P2M_PER_PAGE; | |
280 | - remap_start_pfn_align = | |
281 | - mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn; | |
282 | - mod = (start_pfn + size) % P2M_PER_PAGE; | |
283 | - ident_end_pfn_align = start_pfn + size - mod; | |
284 | - mod = (remap_pfn + size) % P2M_PER_PAGE; | |
285 | - remap_end_pfn_align = remap_pfn + size - mod; | |
328 | + mfn_save = virt_to_mfn(buf); | |
286 | 329 | |
287 | - /* Iterate over each p2m leaf node in each range */ | |
288 | - for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align; | |
289 | - ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align; | |
290 | - ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) { | |
291 | - /* Check we aren't past the end */ | |
292 | - BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size); | |
293 | - BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size); | |
330 | + for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; | |
331 | + ident_pfn_iter < ident_end_pfn; | |
332 | + ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) { | |
333 | + chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE; | |
294 | 334 | |
295 | - /* Save p2m mappings */ | |
296 | - for (i = 0; i < P2M_PER_PAGE; i++) | |
297 | - xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i); | |
335 | + /* Map first pfn to xen_remap_buf */ | |
336 | + mfn = pfn_to_mfn(ident_pfn_iter); | |
337 | + set_pte_mfn(buf, mfn, PAGE_KERNEL); | |
298 | 338 | |
299 | - /* Set identity map which will free a p2m leaf */ | |
300 | - ident_cnt += set_phys_range_identity(ident_pfn_iter, | |
301 | - ident_pfn_iter + P2M_PER_PAGE); | |
339 | + /* Save mapping information in page */ | |
340 | + xen_remap_buf.next_area_mfn = xen_remap_mfn; | |
341 | + xen_remap_buf.target_pfn = remap_pfn_iter; | |
342 | + xen_remap_buf.size = chunk; | |
343 | + for (i = 0; i < chunk; i++) | |
344 | + xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i); | |
302 | 345 | |
303 | -#ifdef DEBUG | |
304 | - /* Helps verify a p2m leaf has been freed */ | |
305 | - for (i = 0; i < P2M_PER_PAGE; i++) { | |
306 | - unsigned int pfn = ident_pfn_iter + i; | |
307 | - BUG_ON(pfn_to_mfn(pfn) != pfn); | |
308 | - } | |
309 | -#endif | |
310 | - /* Now remap memory */ | |
311 | - for (i = 0; i < P2M_PER_PAGE; i++) { | |
312 | - unsigned long mfn = xen_remap_buf[i]; | |
346 | + /* Put remap buf into list. */ | |
347 | + xen_remap_mfn = mfn; | |
313 | 348 | |
314 | - /* This will use the p2m leaf freed above */ | |
315 | - if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) { | |
316 | - WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", | |
317 | - remap_pfn_iter + i, mfn); | |
318 | - return 0; | |
319 | - } | |
349 | + /* Set identity map */ | |
350 | + ident_cnt += set_phys_range_identity(ident_pfn_iter, | |
351 | + ident_pfn_iter + chunk); | |
320 | 352 | |
321 | - remap_cnt++; | |
322 | - } | |
323 | - | |
324 | - left -= P2M_PER_PAGE; | |
353 | + left -= chunk; | |
325 | 354 | } |
326 | 355 | |
327 | - /* Max boundary space possible */ | |
328 | - BUG_ON(left > (P2M_PER_PAGE - 1) * 2); | |
329 | - | |
330 | - /* Now handle the boundary conditions */ | |
331 | - ident_boundary_pfn = start_pfn; | |
332 | - remap_boundary_pfn = remap_pfn; | |
333 | - for (i = 0; i < left; i++) { | |
334 | - unsigned long mfn; | |
335 | - | |
336 | - /* These two checks move from the start to end boundaries */ | |
337 | - if (ident_boundary_pfn == ident_start_pfn_align) | |
338 | - ident_boundary_pfn = ident_pfn_iter; | |
339 | - if (remap_boundary_pfn == remap_start_pfn_align) | |
340 | - remap_boundary_pfn = remap_pfn_iter; | |
341 | - | |
342 | - /* Check we aren't past the end */ | |
343 | - BUG_ON(ident_boundary_pfn >= start_pfn + size); | |
344 | - BUG_ON(remap_boundary_pfn >= remap_pfn + size); | |
345 | - | |
346 | - mfn = pfn_to_mfn(ident_boundary_pfn); | |
347 | - | |
348 | - if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) { | |
349 | - WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", | |
350 | - remap_pfn_iter + i, mfn); | |
351 | - return 0; | |
352 | - } | |
353 | - remap_cnt++; | |
354 | - | |
355 | - ident_boundary_pfn++; | |
356 | - remap_boundary_pfn++; | |
357 | - } | |
358 | - | |
359 | - /* Finish up the identity map */ | |
360 | - if (ident_start_pfn_align >= ident_end_pfn_align) { | |
361 | - /* | |
362 | - * In this case we have an identity range which does not span an | |
363 | - * aligned block so everything needs to be identity mapped here. | |
364 | - * If we didn't check this we might remap too many pages since | |
365 | - * the align boundaries are not meaningful in this case. | |
366 | - */ | |
367 | - ident_cnt += set_phys_range_identity(start_pfn, | |
368 | - start_pfn + size); | |
369 | - } else { | |
370 | - /* Remapped above so check each end of the chunk */ | |
371 | - if (start_pfn < ident_start_pfn_align) | |
372 | - ident_cnt += set_phys_range_identity(start_pfn, | |
373 | - ident_start_pfn_align); | |
374 | - if (start_pfn + size > ident_pfn_iter) | |
375 | - ident_cnt += set_phys_range_identity(ident_pfn_iter, | |
376 | - start_pfn + size); | |
377 | - } | |
378 | - | |
379 | - BUG_ON(ident_cnt != size); | |
380 | - BUG_ON(remap_cnt != size); | |
381 | - | |
382 | - return size; | |
356 | + /* Restore old xen_remap_buf mapping */ | |
357 | + set_pte_mfn(buf, mfn_save, PAGE_KERNEL); | |
383 | 358 | } |
384 | 359 | |
385 | 360 | /* |
... | ... | @@ -396,8 +371,7 @@ |
396 | 371 | static unsigned long __init xen_set_identity_and_remap_chunk( |
397 | 372 | const struct e820entry *list, size_t map_size, unsigned long start_pfn, |
398 | 373 | unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, |
399 | - unsigned long *identity, unsigned long *remapped, | |
400 | - unsigned long *released) | |
374 | + unsigned long *identity, unsigned long *released) | |
401 | 375 | { |
402 | 376 | unsigned long pfn; |
403 | 377 | unsigned long i = 0; |
404 | 378 | |
... | ... | @@ -431,19 +405,12 @@ |
431 | 405 | if (size > remap_range_size) |
432 | 406 | size = remap_range_size; |
433 | 407 | |
434 | - if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) { | |
435 | - WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n", | |
436 | - cur_pfn, size, remap_pfn); | |
437 | - xen_set_identity_and_release_chunk(cur_pfn, | |
438 | - cur_pfn + left, nr_pages, identity, released); | |
439 | - break; | |
440 | - } | |
408 | + xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn); | |
441 | 409 | |
442 | 410 | /* Update variables to reflect new mappings. */ |
443 | 411 | i += size; |
444 | 412 | remap_pfn += size; |
445 | 413 | *identity += size; |
446 | - *remapped += size; | |
447 | 414 | } |
448 | 415 | |
449 | 416 | /* |
450 | 417 | |
... | ... | @@ -458,13 +425,12 @@ |
458 | 425 | return remap_pfn; |
459 | 426 | } |
460 | 427 | |
461 | -static unsigned long __init xen_set_identity_and_remap( | |
428 | +static void __init xen_set_identity_and_remap( | |
462 | 429 | const struct e820entry *list, size_t map_size, unsigned long nr_pages, |
463 | 430 | unsigned long *released) |
464 | 431 | { |
465 | 432 | phys_addr_t start = 0; |
466 | 433 | unsigned long identity = 0; |
467 | - unsigned long remapped = 0; | |
468 | 434 | unsigned long last_pfn = nr_pages; |
469 | 435 | const struct e820entry *entry; |
470 | 436 | unsigned long num_released = 0; |
... | ... | @@ -494,8 +460,7 @@ |
494 | 460 | last_pfn = xen_set_identity_and_remap_chunk( |
495 | 461 | list, map_size, start_pfn, |
496 | 462 | end_pfn, nr_pages, last_pfn, |
497 | - &identity, &remapped, | |
498 | - &num_released); | |
463 | + &identity, &num_released); | |
499 | 464 | start = end; |
500 | 465 | } |
501 | 466 | } |
502 | 467 | |
503 | 468 | |
504 | 469 | |
... | ... | @@ -503,12 +468,63 @@ |
503 | 468 | *released = num_released; |
504 | 469 | |
505 | 470 | pr_info("Set %ld page(s) to 1-1 mapping\n", identity); |
506 | - pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped, | |
507 | - last_pfn); | |
508 | 471 | pr_info("Released %ld page(s)\n", num_released); |
472 | +} | |
509 | 473 | |
510 | - return last_pfn; | |
474 | +/* | |
475 | + * Remap the memory prepared in xen_do_set_identity_and_remap_chunk(). | |
476 | + * The remap information (which mfn remap to which pfn) is contained in the | |
477 | + * to be remapped memory itself in a linked list anchored at xen_remap_mfn. | |
478 | + * This scheme allows to remap the different chunks in arbitrary order while | |
479 | + * the resulting mapping will be independant from the order. | |
480 | + */ | |
481 | +void __init xen_remap_memory(void) | |
482 | +{ | |
483 | + unsigned long buf = (unsigned long)&xen_remap_buf; | |
484 | + unsigned long mfn_save, mfn, pfn; | |
485 | + unsigned long remapped = 0; | |
486 | + unsigned int i; | |
487 | + unsigned long pfn_s = ~0UL; | |
488 | + unsigned long len = 0; | |
489 | + | |
490 | + mfn_save = virt_to_mfn(buf); | |
491 | + | |
492 | + while (xen_remap_mfn != INVALID_P2M_ENTRY) { | |
493 | + /* Map the remap information */ | |
494 | + set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL); | |
495 | + | |
496 | + BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]); | |
497 | + | |
498 | + pfn = xen_remap_buf.target_pfn; | |
499 | + for (i = 0; i < xen_remap_buf.size; i++) { | |
500 | + mfn = xen_remap_buf.mfns[i]; | |
501 | + xen_update_mem_tables(pfn, mfn); | |
502 | + remapped++; | |
503 | + pfn++; | |
504 | + } | |
505 | + if (pfn_s == ~0UL || pfn == pfn_s) { | |
506 | + pfn_s = xen_remap_buf.target_pfn; | |
507 | + len += xen_remap_buf.size; | |
508 | + } else if (pfn_s + len == xen_remap_buf.target_pfn) { | |
509 | + len += xen_remap_buf.size; | |
510 | + } else { | |
511 | + xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); | |
512 | + pfn_s = xen_remap_buf.target_pfn; | |
513 | + len = xen_remap_buf.size; | |
514 | + } | |
515 | + | |
516 | + mfn = xen_remap_mfn; | |
517 | + xen_remap_mfn = xen_remap_buf.next_area_mfn; | |
518 | + } | |
519 | + | |
520 | + if (pfn_s != ~0UL && len) | |
521 | + xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); | |
522 | + | |
523 | + set_pte_mfn(buf, mfn_save, PAGE_KERNEL); | |
524 | + | |
525 | + pr_info("Remapped %ld page(s)\n", remapped); | |
511 | 526 | } |
527 | + | |
512 | 528 | static unsigned long __init xen_get_max_pages(void) |
513 | 529 | { |
514 | 530 | unsigned long max_pages = MAX_DOMAIN_PAGES; |
... | ... | @@ -569,7 +585,6 @@ |
569 | 585 | int rc; |
570 | 586 | struct xen_memory_map memmap; |
571 | 587 | unsigned long max_pages; |
572 | - unsigned long last_pfn = 0; | |
573 | 588 | unsigned long extra_pages = 0; |
574 | 589 | int i; |
575 | 590 | int op; |
576 | 591 | |
577 | 592 | |
... | ... | @@ -616,17 +631,14 @@ |
616 | 631 | extra_pages += max_pages - max_pfn; |
617 | 632 | |
618 | 633 | /* |
619 | - * Set identity map on non-RAM pages and remap the underlying RAM. | |
634 | + * Set identity map on non-RAM pages and prepare remapping the | |
635 | + * underlying RAM. | |
620 | 636 | */ |
621 | - last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, | |
622 | - &xen_released_pages); | |
637 | + xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, | |
638 | + &xen_released_pages); | |
623 | 639 | |
624 | 640 | extra_pages += xen_released_pages; |
625 | 641 | |
626 | - if (last_pfn > max_pfn) { | |
627 | - max_pfn = min(MAX_DOMAIN_PAGES, last_pfn); | |
628 | - mem_end = PFN_PHYS(max_pfn); | |
629 | - } | |
630 | 642 | /* |
631 | 643 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO |
632 | 644 | * factor the base size. On non-highmem systems, the base |
... | ... | @@ -653,6 +665,7 @@ |
653 | 665 | size = min(size, (u64)extra_pages * PAGE_SIZE); |
654 | 666 | extra_pages -= size / PAGE_SIZE; |
655 | 667 | xen_add_extra_mem(addr, size); |
668 | + xen_max_p2m_pfn = PFN_DOWN(addr + size); | |
656 | 669 | } else |
657 | 670 | type = E820_UNUSABLE; |
658 | 671 | } |
arch/x86/xen/xen-ops.h
... | ... | @@ -29,11 +29,13 @@ |
29 | 29 | void xen_setup_machphys_mapping(void); |
30 | 30 | void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); |
31 | 31 | void xen_reserve_top(void); |
32 | -extern unsigned long xen_max_p2m_pfn; | |
33 | 32 | |
34 | 33 | void xen_mm_pin_all(void); |
35 | 34 | void xen_mm_unpin_all(void); |
36 | 35 | |
36 | +unsigned long __ref xen_chk_extra_mem(unsigned long pfn); | |
37 | +void __init xen_inv_extra_mem(void); | |
38 | +void __init xen_remap_memory(void); | |
37 | 39 | char * __init xen_memory_setup(void); |
38 | 40 | char * xen_auto_xlated_memory_setup(void); |
39 | 41 | void __init xen_arch_setup(void); |
... | ... | @@ -46,7 +48,7 @@ |
46 | 48 | void xen_unplug_emulated_devices(void); |
47 | 49 | |
48 | 50 | void __init xen_build_dynamic_phys_to_machine(void); |
49 | -unsigned long __init xen_revector_p2m_tree(void); | |
51 | +void __init xen_vmalloc_p2m_tree(void); | |
50 | 52 | |
51 | 53 | void xen_init_irq_ops(void); |
52 | 54 | void xen_setup_timer(int cpu); |