Blame view
drivers/xen/balloon.c
12.4 KB
1775826ce
|
1 |
/****************************************************************************** |
1775826ce
|
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
* Xen balloon driver - enables returning/claiming memory to/from Xen. * * Copyright (c) 2003, B Dragovic * Copyright (c) 2003-2004, M Williamson, K Fraser * Copyright (c) 2005 Dan M. Smith, IBM Corporation * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include <linux/kernel.h> |
1775826ce
|
34 35 36 37 38 39 40 |
#include <linux/sched.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/bootmem.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/mutex.h> |
1775826ce
|
41 |
#include <linux/list.h> |
5a0e3ad6a
|
42 |
#include <linux/gfp.h> |
1775826ce
|
43 |
|
1775826ce
|
44 45 46 |
#include <asm/page.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> |
1775826ce
|
47 |
#include <asm/tlb.h> |
66946f676
|
48 |
#include <asm/e820.h> |
1775826ce
|
49 |
|
ecbf29cdb
|
50 51 |
#include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> |
1ccbf5344
|
52 53 |
#include <xen/xen.h> |
ecbf29cdb
|
54 |
#include <xen/interface/xen.h> |
1775826ce
|
55 |
#include <xen/interface/memory.h> |
803eb047a
|
56 |
#include <xen/balloon.h> |
1775826ce
|
57 58 |
#include <xen/features.h> #include <xen/page.h> |
95d2ac4a0
|
59 60 61 62 63 64 65 |
/* * balloon_process() state: * * BP_DONE: done or nothing to do, * BP_EAGAIN: error, go to sleep, * BP_ECANCELED: error, balloon operation canceled. */ |
1775826ce
|
66 |
|
95d2ac4a0
|
67 68 69 70 |
enum bp_state { BP_DONE, BP_EAGAIN, BP_ECANCELED |
1775826ce
|
71 |
}; |
1775826ce
|
72 |
|
1775826ce
|
73 |
static DEFINE_MUTEX(balloon_mutex); |
1775826ce
|
74 |
|
803eb047a
|
75 76 |
struct balloon_stats balloon_stats; EXPORT_SYMBOL_GPL(balloon_stats); |
1775826ce
|
77 78 79 |
/* We increase/decrease in batches which fit in a page */ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; |
1775826ce
|
80 |
#ifdef CONFIG_HIGHMEM |
1775826ce
|
81 82 83 84 85 86 87 88 89 90 91 92 |
#define inc_totalhigh_pages() (totalhigh_pages++) #define dec_totalhigh_pages() (totalhigh_pages--) #else #define inc_totalhigh_pages() do {} while(0) #define dec_totalhigh_pages() do {} while(0) #endif /* List of ballooned pages, threaded through the mem_map array. */ static LIST_HEAD(ballooned_pages); /* Main work function, always executed in process context. */ static void balloon_process(struct work_struct *work); |
95170b2e2
|
93 |
static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); |
1775826ce
|
94 95 96 97 98 99 100 101 102 |
/* When ballooning out (allocating memory to return to Xen) we don't really want the kernel to try too hard since that can trigger the oom killer. */ #define GFP_BALLOON \ (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) static void scrub_page(struct page *page) { #ifdef CONFIG_XEN_SCRUB_PAGES |
26a3e9916
|
103 |
clear_highpage(page); |
1775826ce
|
104 105 106 107 |
#endif } /* balloon_append: add the given page to the balloon. */ |
9be4d4575
|
108 |
static void __balloon_append(struct page *page) |
1775826ce
|
109 110 111 112 113 |
{ /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { list_add_tail(&page->lru, &ballooned_pages); balloon_stats.balloon_high++; |
1775826ce
|
114 115 116 117 |
} else { list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } |
9be4d4575
|
118 |
} |
3d65c9488
|
119 |
|
9be4d4575
|
120 121 122 |
static void balloon_append(struct page *page) { __balloon_append(page); |
09ca132a8
|
123 124 |
if (PageHighMem(page)) dec_totalhigh_pages(); |
3d65c9488
|
125 |
totalram_pages--; |
1775826ce
|
126 127 128 |
} /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ |
b6f306798
|
129 |
static struct page *balloon_retrieve(bool prefer_highmem) |
1775826ce
|
130 131 132 133 134 |
{ struct page *page; if (list_empty(&ballooned_pages)) return NULL; |
b6f306798
|
135 136 137 138 |
if (prefer_highmem) page = list_entry(ballooned_pages.prev, struct page, lru); else page = list_entry(ballooned_pages.next, struct page, lru); |
1775826ce
|
139 140 141 142 143 144 145 146 |
list_del(&page->lru); if (PageHighMem(page)) { balloon_stats.balloon_high--; inc_totalhigh_pages(); } else balloon_stats.balloon_low--; |
3d65c9488
|
147 |
totalram_pages++; |
1775826ce
|
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
return page; } static struct page *balloon_first_page(void) { if (list_empty(&ballooned_pages)) return NULL; return list_entry(ballooned_pages.next, struct page, lru); } static struct page *balloon_next_page(struct page *page) { struct list_head *next = page->lru.next; if (next == &ballooned_pages) return NULL; return list_entry(next, struct page, lru); } |
95d2ac4a0
|
165 |
static enum bp_state update_schedule(enum bp_state state) |
1775826ce
|
166 |
{ |
95d2ac4a0
|
167 168 169 170 171 |
if (state == BP_DONE) { balloon_stats.schedule_delay = 1; balloon_stats.retry_count = 1; return BP_DONE; } |
95d2ac4a0
|
172 173 174 175 |
++balloon_stats.retry_count; if (balloon_stats.max_retry_count != RETRY_UNLIMITED && balloon_stats.retry_count > balloon_stats.max_retry_count) { |
95d2ac4a0
|
176 177 178 179 180 181 182 183 184 185 186 |
balloon_stats.schedule_delay = 1; balloon_stats.retry_count = 1; return BP_ECANCELED; } balloon_stats.schedule_delay <<= 1; if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay) balloon_stats.schedule_delay = balloon_stats.max_schedule_delay; return BP_EAGAIN; |
1775826ce
|
187 |
} |
83be7e52d
|
188 |
static long current_credit(void) |
1775826ce
|
189 |
{ |
bc2c03032
|
190 |
unsigned long target = balloon_stats.target_pages; |
1775826ce
|
191 192 193 194 195 |
target = min(target, balloon_stats.current_pages + balloon_stats.balloon_low + balloon_stats.balloon_high); |
83be7e52d
|
196 |
return target - balloon_stats.current_pages; |
1775826ce
|
197 |
} |
95d2ac4a0
|
198 |
static enum bp_state increase_reservation(unsigned long nr_pages) |
1775826ce
|
199 |
{ |
95d2ac4a0
|
200 |
int rc; |
2f70e0acd
|
201 |
unsigned long pfn, i; |
1775826ce
|
202 |
struct page *page; |
1775826ce
|
203 204 205 206 207 208 209 210 |
struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); |
1775826ce
|
211 212 |
page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { |
95d2ac4a0
|
213 214 215 216 |
if (!page) { nr_pages = i; break; } |
a419aef8b
|
217 |
frame_list[i] = page_to_pfn(page); |
1775826ce
|
218 219 |
page = balloon_next_page(page); } |
a90971ebd
|
220 |
set_xen_guest_handle(reservation.extent_start, frame_list); |
fde28e8f4
|
221 222 |
reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); |
40095de1f
|
223 |
if (rc <= 0) |
95d2ac4a0
|
224 |
return BP_EAGAIN; |
1775826ce
|
225 |
|
bc2c03032
|
226 |
for (i = 0; i < rc; i++) { |
b6f306798
|
227 |
page = balloon_retrieve(false); |
1775826ce
|
228 229 230 231 232 233 234 235 236 |
BUG_ON(page == NULL); pfn = page_to_pfn(page); BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); set_phys_to_machine(pfn, frame_list[i]); /* Link back into the page tables if not highmem. */ |
4dfe22f5f
|
237 |
if (xen_pv_domain() && !PageHighMem(page)) { |
1775826ce
|
238 239 240 241 242 243 244 245 246 247 248 249 250 |
int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), mfn_pte(frame_list[i], PAGE_KERNEL), 0); BUG_ON(ret); } /* Relinquish the page back to the allocator. */ ClearPageReserved(page); init_page_count(page); __free_page(page); } |
bc2c03032
|
251 |
balloon_stats.current_pages += rc; |
1775826ce
|
252 |
|
95d2ac4a0
|
253 |
return BP_DONE; |
1775826ce
|
254 |
} |
b6f306798
|
255 |
static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) |
1775826ce
|
256 |
{ |
95d2ac4a0
|
257 |
enum bp_state state = BP_DONE; |
2f70e0acd
|
258 |
unsigned long pfn, i; |
1775826ce
|
259 |
struct page *page; |
1775826ce
|
260 261 262 263 264 265 266 267 268 269 270 |
int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { |
b6f306798
|
271 |
if ((page = alloc_page(gfp)) == NULL) { |
1775826ce
|
272 |
nr_pages = i; |
95d2ac4a0
|
273 |
state = BP_EAGAIN; |
1775826ce
|
274 275 276 277 278 279 280 |
break; } pfn = page_to_pfn(page); frame_list[i] = pfn_to_mfn(pfn); scrub_page(page); |
1058a75f0
|
281 |
|
4dfe22f5f
|
282 |
if (xen_pv_domain() && !PageHighMem(page)) { |
ff4ce8c33
|
283 284 285 286 287 |
ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); BUG_ON(ret); } |
1775826ce
|
288 289 290 291 292 |
} /* Ensure that ballooned highmem pages don't have kmaps. */ kmap_flush_unused(); flush_tlb_all(); |
1775826ce
|
293 294 295 |
/* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); |
6eaa412f2
|
296 |
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
1775826ce
|
297 298 |
balloon_append(pfn_to_page(pfn)); } |
a90971ebd
|
299 |
set_xen_guest_handle(reservation.extent_start, frame_list); |
1775826ce
|
300 301 302 303 304 |
reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); balloon_stats.current_pages -= nr_pages; |
1775826ce
|
305 |
|
95d2ac4a0
|
306 |
return state; |
1775826ce
|
307 308 309 310 311 312 313 314 315 316 |
} /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(struct work_struct *work) { |
95d2ac4a0
|
317 |
enum bp_state state = BP_DONE; |
1775826ce
|
318 319 320 321 322 |
long credit; mutex_lock(&balloon_mutex); do { |
83be7e52d
|
323 |
credit = current_credit(); |
95d2ac4a0
|
324 |
|
1775826ce
|
325 |
if (credit > 0) |
95d2ac4a0
|
326 |
state = increase_reservation(credit); |
1775826ce
|
327 |
if (credit < 0) |
b6f306798
|
328 |
state = decrease_reservation(-credit, GFP_BALLOON); |
95d2ac4a0
|
329 330 |
state = update_schedule(state); |
1775826ce
|
331 332 333 334 335 |
#ifndef CONFIG_PREEMPT if (need_resched()) schedule(); #endif |
95d2ac4a0
|
336 |
} while (credit && state == BP_DONE); |
1775826ce
|
337 338 |
/* Schedule more work if there is some still to be done. */ |
95d2ac4a0
|
339 340 |
if (state == BP_EAGAIN) schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); |
1775826ce
|
341 342 343 344 345 |
mutex_unlock(&balloon_mutex); } /* Resets the Xen limit, sets new target, and kicks off processing. */ |
803eb047a
|
346 |
void balloon_set_new_target(unsigned long target) |
1775826ce
|
347 348 |
{ /* No need for lock. Not read-modify-write updates. */ |
1775826ce
|
349 |
balloon_stats.target_pages = target; |
95170b2e2
|
350 |
schedule_delayed_work(&balloon_worker, 0); |
1775826ce
|
351 |
} |
803eb047a
|
352 |
EXPORT_SYMBOL_GPL(balloon_set_new_target); |
1775826ce
|
353 |
|
b6f306798
|
354 355 356 357 358 359 360 |
/** * alloc_xenballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get * @pages: pages returned * @return 0 on success, error otherwise */ int alloc_xenballooned_pages(int nr_pages, struct page** pages) |
1775826ce
|
361 |
{ |
b6f306798
|
362 363 364 365 366 367 368 369 370 371 372 373 374 |
int pgno = 0; struct page* page; mutex_lock(&balloon_mutex); while (pgno < nr_pages) { page = balloon_retrieve(true); if (page) { pages[pgno++] = page; } else { enum bp_state st; st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); if (st != BP_DONE) goto out_undo; } |
1775826ce
|
375 |
} |
b6f306798
|
376 377 378 379 380 381 382 383 384 |
mutex_unlock(&balloon_mutex); return 0; out_undo: while (pgno) balloon_append(pages[--pgno]); /* Free the memory back to the kernel soon */ schedule_delayed_work(&balloon_worker, 0); mutex_unlock(&balloon_mutex); return -ENOMEM; |
1775826ce
|
385 |
} |
b6f306798
|
386 |
EXPORT_SYMBOL(alloc_xenballooned_pages); |
1775826ce
|
387 |
|
b6f306798
|
388 389 390 391 392 393 |
/** * free_xenballooned_pages - return pages retrieved with get_ballooned_pages * @nr_pages: Number of pages * @pages: pages to return */ void free_xenballooned_pages(int nr_pages, struct page** pages) |
1775826ce
|
394 |
{ |
b6f306798
|
395 |
int i; |
1775826ce
|
396 |
|
b6f306798
|
397 |
mutex_lock(&balloon_mutex); |
1775826ce
|
398 |
|
b6f306798
|
399 400 401 402 403 404 |
for (i = 0; i < nr_pages; i++) { if (pages[i]) balloon_append(pages[i]); } /* The balloon may be too large now. Shrink it if needed. */ |
83be7e52d
|
405 |
if (current_credit()) |
b6f306798
|
406 |
schedule_delayed_work(&balloon_worker, 0); |
1775826ce
|
407 |
|
b6f306798
|
408 409 410 |
mutex_unlock(&balloon_mutex); } EXPORT_SYMBOL(free_xenballooned_pages); |
1775826ce
|
411 412 413 |
static int __init balloon_init(void) { |
4dfe22f5f
|
414 |
unsigned long pfn, extra_pfn_end; |
1775826ce
|
415 |
struct page *page; |
53d5522ca
|
416 |
if (!xen_domain()) |
1775826ce
|
417 |
return -ENODEV; |
803eb047a
|
418 419 |
pr_info("xen/balloon: Initialising balloon driver. "); |
1775826ce
|
420 |
|
4dfe22f5f
|
421 |
balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn; |
1775826ce
|
422 423 424 |
balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; |
1775826ce
|
425 |
|
95d2ac4a0
|
426 427 428 |
balloon_stats.schedule_delay = 1; balloon_stats.max_schedule_delay = 32; balloon_stats.retry_count = 1; |
40095de1f
|
429 |
balloon_stats.max_retry_count = RETRY_UNLIMITED; |
1775826ce
|
430 |
|
2a4c92fa2
|
431 432 433 434 435 436 437 438 439 440 |
/* * Initialise the balloon with excess memory space. We need * to make sure we don't add memory which doesn't exist or * logically exist. The E820 map can be trimmed to be smaller * than the amount of physical memory due to the mem= command * line parameter. And if this is a 32-bit non-HIGHMEM kernel * on a system with memory which requires highmem to access, * don't try to use it. */ extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()), |
66946f676
|
441 |
(unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); |
9be4d4575
|
442 |
for (pfn = PFN_UP(xen_extra_mem_start); |
66946f676
|
443 |
pfn < extra_pfn_end; |
9be4d4575
|
444 |
pfn++) { |
1775826ce
|
445 |
page = pfn_to_page(pfn); |
09ca132a8
|
446 |
/* totalram_pages and totalhigh_pages do not include the boot-time |
9be4d4575
|
447 448 |
balloon extension, so don't subtract from it. */ __balloon_append(page); |
1775826ce
|
449 |
} |
1775826ce
|
450 451 452 453 |
return 0; } subsys_initcall(balloon_init); |
1775826ce
|
454 |
MODULE_LICENSE("GPL"); |