Blame view
mm/frontswap.c
13.4 KB
29f233cff mm: frontswap: co... |
1 2 3 4 5 6 7 8 9 10 11 12 |
/* * Frontswap frontend * * This code provides the generic "frontend" layer to call a matching * "backend" driver implementation of frontswap. See * Documentation/vm/frontswap.txt for more information. * * Copyright (C) 2009-2012 Oracle Corp. All rights reserved. * Author: Dan Magenheimer * * This work is licensed under the terms of the GNU GPL, version 2. */ |
29f233cff mm: frontswap: co... |
13 14 15 |
#include <linux/mman.h> #include <linux/swap.h> #include <linux/swapops.h> |
29f233cff mm: frontswap: co... |
16 |
#include <linux/security.h> |
29f233cff mm: frontswap: co... |
17 |
#include <linux/module.h> |
29f233cff mm: frontswap: co... |
18 19 20 21 22 23 24 25 |
#include <linux/debugfs.h> #include <linux/frontswap.h> #include <linux/swapfile.h> /* * frontswap_ops is set by frontswap_register_ops to contain the pointers * to the frontswap "backend" implementation functions. */ |
1e01c968d frontswap: make f... |
26 |
static struct frontswap_ops *frontswap_ops __read_mostly; |
29f233cff mm: frontswap: co... |
27 28 |
/* |
165c8aed5 frontswap: s/put_... |
29 |
* If enabled, frontswap_store will return failure even on success. As |
29f233cff mm: frontswap: co... |
30 31 32 33 34 35 36 |
* a result, the swap subsystem will always write the page to swap, in * effect converting frontswap into a writethrough cache. In this mode, * there is no direct reduction in swap writes, but a frontswap backend * can unilaterally "reclaim" any pages in use with no data loss, thus * providing increases control over maximum memory usage due to frontswap. */ static bool frontswap_writethrough_enabled __read_mostly; |
e3483a5f3 frontswap: suppor... |
37 38 39 40 41 42 |
/* * If enabled, the underlying tmem implementation is capable of doing * exclusive gets, so frontswap_load, on a successful tmem_get must * mark the page as no longer in frontswap AND mark it dirty. */ static bool frontswap_tmem_exclusive_gets_enabled __read_mostly; |
29f233cff mm: frontswap: co... |
43 44 45 46 47 48 |
#ifdef CONFIG_DEBUG_FS /* * Counters available via /sys/kernel/debug/frontswap (if debugfs is * properly configured). These are for information only so are not protected * against increment races. */ |
165c8aed5 frontswap: s/put_... |
49 50 51 |
static u64 frontswap_loads; static u64 frontswap_succ_stores; static u64 frontswap_failed_stores; |
29f233cff mm: frontswap: co... |
52 |
static u64 frontswap_invalidates; |
165c8aed5 frontswap: s/put_... |
53 54 |
static inline void inc_frontswap_loads(void) { frontswap_loads++; |
29f233cff mm: frontswap: co... |
55 |
} |
165c8aed5 frontswap: s/put_... |
56 57 |
static inline void inc_frontswap_succ_stores(void) { frontswap_succ_stores++; |
29f233cff mm: frontswap: co... |
58 |
} |
165c8aed5 frontswap: s/put_... |
59 60 |
static inline void inc_frontswap_failed_stores(void) { frontswap_failed_stores++; |
29f233cff mm: frontswap: co... |
61 62 63 64 65 |
} static inline void inc_frontswap_invalidates(void) { frontswap_invalidates++; } #else |
165c8aed5 frontswap: s/put_... |
66 67 68 |
static inline void inc_frontswap_loads(void) { } static inline void inc_frontswap_succ_stores(void) { } static inline void inc_frontswap_failed_stores(void) { } |
29f233cff mm: frontswap: co... |
69 70 |
static inline void inc_frontswap_invalidates(void) { } #endif |
905cd0e1b mm: frontswap: la... |
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
/* * Due to the asynchronous nature of the backends loading potentially * _after_ the swap system has been activated, we have chokepoints * on all frontswap functions to not call the backend until the backend * has registered. * * Specifically when no backend is registered (nobody called * frontswap_register_ops) all calls to frontswap_init (which is done via * swapon -> enable_swap_info -> frontswap_init) are registered and remembered * (via the setting of need_init bitmap) but fail to create tmem_pools. When a * backend registers with frontswap at some later point the previous * calls to frontswap_init are executed (by iterating over the need_init * bitmap) to create tmem_pools and set the respective poolids. All of that is * guarded by us using atomic bit operations on the 'need_init' bitmap. * * This would not guards us against the user deciding to call swapoff right as * we are calling the backend to initialize (so swapon is in action). * Fortunatly for us, the swapon_mutex has been taked by the callee so we are * OK. The other scenario where calls to frontswap_store (called via * swap_writepage) is racing with frontswap_invalidate_area (called via * swapoff) is again guarded by the swap subsystem. * * While no backend is registered all calls to frontswap_[store|load| * invalidate_area|invalidate_page] are ignored or fail. * * The time between the backend being registered and the swap file system * calling the backend (via the frontswap_* functions) is indeterminate as |
1e01c968d frontswap: make f... |
99 |
* frontswap_ops is not atomic_t (or a value guarded by a spinlock). |
905cd0e1b mm: frontswap: la... |
100 101 102 103 104 |
* That is OK as we are comfortable missing some of these calls to the newly * registered backend. * * Obviously the opposite (unloading the backend) must be done after all * the frontswap_[store|load|invalidate_area|invalidate_page] start |
1e01c968d frontswap: make f... |
105 |
* ignorning or failing the requests - at which point frontswap_ops |
905cd0e1b mm: frontswap: la... |
106 107 108 |
* would have to be made in some fashion atomic. */ static DECLARE_BITMAP(need_init, MAX_SWAPFILES); |
905cd0e1b mm: frontswap: la... |
109 |
|
29f233cff mm: frontswap: co... |
110 111 112 113 |
/* * Register operations for frontswap, returning previous thus allowing * detection of multiple backends and possible nesting. */ |
1e01c968d frontswap: make f... |
114 |
struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) |
29f233cff mm: frontswap: co... |
115 |
{ |
1e01c968d frontswap: make f... |
116 |
struct frontswap_ops *old = frontswap_ops; |
905cd0e1b mm: frontswap: la... |
117 |
int i; |
29f233cff mm: frontswap: co... |
118 |
|
905cd0e1b mm: frontswap: la... |
119 |
for (i = 0; i < MAX_SWAPFILES; i++) { |
4f89849da frontswap: get ri... |
120 121 122 123 124 |
if (test_and_clear_bit(i, need_init)) { struct swap_info_struct *sis = swap_info[i]; /* __frontswap_init _should_ have set it! */ if (!sis->frontswap_map) return ERR_PTR(-EINVAL); |
1e01c968d frontswap: make f... |
125 |
ops->init(i); |
4f89849da frontswap: get ri... |
126 |
} |
905cd0e1b mm: frontswap: la... |
127 128 |
} /* |
1e01c968d frontswap: make f... |
129 |
* We MUST have frontswap_ops set _after_ the frontswap_init's |
905cd0e1b mm: frontswap: la... |
130 131 132 133 |
* have been called. Otherwise __frontswap_store might fail. Hence * the barrier to make sure compiler does not re-order us. */ barrier(); |
1e01c968d frontswap: make f... |
134 |
frontswap_ops = ops; |
29f233cff mm: frontswap: co... |
135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
return old; } EXPORT_SYMBOL(frontswap_register_ops); /* * Enable/disable frontswap writethrough (see above). */ void frontswap_writethrough(bool enable) { frontswap_writethrough_enabled = enable; } EXPORT_SYMBOL(frontswap_writethrough); /* |
e3483a5f3 frontswap: suppor... |
149 150 151 152 153 154 155 156 157 |
* Enable/disable frontswap exclusive gets (see above). */ void frontswap_tmem_exclusive_gets(bool enable) { frontswap_tmem_exclusive_gets_enabled = enable; } EXPORT_SYMBOL(frontswap_tmem_exclusive_gets); /* |
29f233cff mm: frontswap: co... |
158 159 |
* Called when a swap device is swapon'd. */ |
4f89849da frontswap: get ri... |
160 |
void __frontswap_init(unsigned type, unsigned long *map) |
29f233cff mm: frontswap: co... |
161 162 |
{ struct swap_info_struct *sis = swap_info[type]; |
4f89849da frontswap: get ri... |
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
BUG_ON(sis == NULL); /* * p->frontswap is a bitmap that we MUST have to figure out which page * has gone in frontswap. Without it there is no point of continuing. */ if (WARN_ON(!map)) return; /* * Irregardless of whether the frontswap backend has been loaded * before this function or it will be later, we _MUST_ have the * p->frontswap set to something valid to work properly. */ frontswap_map_set(sis, map); if (frontswap_ops) |
1e01c968d frontswap: make f... |
178 |
frontswap_ops->init(type); |
4f89849da frontswap: get ri... |
179 |
else { |
a1ad28973 mm/frontswap.c: f... |
180 |
BUG_ON(type >= MAX_SWAPFILES); |
905cd0e1b mm: frontswap: la... |
181 182 |
set_bit(type, need_init); } |
29f233cff mm: frontswap: co... |
183 184 |
} EXPORT_SYMBOL(__frontswap_init); |
f066ea230 mm: frontswap: cl... |
185 186 187 188 189 190 191 192 193 194 195 196 197 |
bool __frontswap_test(struct swap_info_struct *sis, pgoff_t offset) { bool ret = false; if (frontswap_ops && sis->frontswap_map) ret = test_bit(offset, sis->frontswap_map); return ret; } EXPORT_SYMBOL(__frontswap_test); static inline void __frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) |
611edfed2 mm: frontswap: sp... |
198 |
{ |
f066ea230 mm: frontswap: cl... |
199 |
clear_bit(offset, sis->frontswap_map); |
611edfed2 mm: frontswap: sp... |
200 201 |
atomic_dec(&sis->frontswap_pages); } |
29f233cff mm: frontswap: co... |
202 |
/* |
165c8aed5 frontswap: s/put_... |
203 |
* "Store" data from a page to frontswap and associate it with the page's |
29f233cff mm: frontswap: co... |
204 205 |
* swaptype and offset. Page must be locked and in the swap cache. * If frontswap already contains a page with matching swaptype and |
1d00015e2 mm/frontswap: cle... |
206 |
* offset, the frontswap implementation may either overwrite the data and |
29f233cff mm: frontswap: co... |
207 208 |
* return success or invalidate the page from frontswap and return failure. */ |
165c8aed5 frontswap: s/put_... |
209 |
int __frontswap_store(struct page *page) |
29f233cff mm: frontswap: co... |
210 211 212 213 214 215 |
{ int ret = -1, dup = 0; swp_entry_t entry = { .val = page_private(page), }; int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); |
f066ea230 mm: frontswap: cl... |
216 217 218 219 220 |
/* * Return if no backend registed. * Don't need to inc frontswap_failed_stores here. */ if (!frontswap_ops) |
905cd0e1b mm: frontswap: la... |
221 |
return ret; |
905cd0e1b mm: frontswap: la... |
222 |
|
29f233cff mm: frontswap: co... |
223 224 |
BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); |
f066ea230 mm: frontswap: cl... |
225 |
if (__frontswap_test(sis, offset)) |
29f233cff mm: frontswap: co... |
226 |
dup = 1; |
1e01c968d frontswap: make f... |
227 |
ret = frontswap_ops->store(type, offset, page); |
29f233cff mm: frontswap: co... |
228 |
if (ret == 0) { |
f066ea230 mm: frontswap: cl... |
229 |
set_bit(offset, sis->frontswap_map); |
165c8aed5 frontswap: s/put_... |
230 |
inc_frontswap_succ_stores(); |
29f233cff mm: frontswap: co... |
231 232 |
if (!dup) atomic_inc(&sis->frontswap_pages); |
d9674dda1 mm: frontswap: ma... |
233 |
} else { |
29f233cff mm: frontswap: co... |
234 235 236 237 |
/* failed dup always results in automatic invalidate of the (older) page from frontswap */ |
165c8aed5 frontswap: s/put_... |
238 |
inc_frontswap_failed_stores(); |
fb993fa1a mm: frontswap: in... |
239 |
if (dup) { |
611edfed2 mm: frontswap: sp... |
240 |
__frontswap_clear(sis, offset); |
fb993fa1a mm: frontswap: in... |
241 242 |
frontswap_ops->invalidate_page(type, offset); } |
4bb3e31ef mm: frontswap: tr... |
243 |
} |
29f233cff mm: frontswap: co... |
244 245 246 247 248 |
if (frontswap_writethrough_enabled) /* report failure so swap also writes to swap device */ ret = -1; return ret; } |
165c8aed5 frontswap: s/put_... |
249 |
EXPORT_SYMBOL(__frontswap_store); |
29f233cff mm: frontswap: co... |
250 251 252 253 254 255 |
/* * "Get" data from frontswap associated with swaptype and offset that were * specified when the data was put to frontswap and use it to fill the * specified page with data. Page must be locked and in the swap cache. */ |
165c8aed5 frontswap: s/put_... |
256 |
int __frontswap_load(struct page *page) |
29f233cff mm: frontswap: co... |
257 258 259 260 261 262 263 264 265 |
{ int ret = -1; swp_entry_t entry = { .val = page_private(page), }; int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); |
f066ea230 mm: frontswap: cl... |
266 267 268 269 |
/* * __frontswap_test() will check whether there is backend registered */ if (__frontswap_test(sis, offset)) |
1e01c968d frontswap: make f... |
270 |
ret = frontswap_ops->load(type, offset, page); |
e3483a5f3 frontswap: suppor... |
271 |
if (ret == 0) { |
165c8aed5 frontswap: s/put_... |
272 |
inc_frontswap_loads(); |
e3483a5f3 frontswap: suppor... |
273 274 |
if (frontswap_tmem_exclusive_gets_enabled) { SetPageDirty(page); |
f066ea230 mm: frontswap: cl... |
275 |
__frontswap_clear(sis, offset); |
e3483a5f3 frontswap: suppor... |
276 277 |
} } |
29f233cff mm: frontswap: co... |
278 279 |
return ret; } |
165c8aed5 frontswap: s/put_... |
280 |
EXPORT_SYMBOL(__frontswap_load); |
29f233cff mm: frontswap: co... |
281 282 283 284 285 286 287 288 289 290 |
/* * Invalidate any data from frontswap associated with the specified swaptype * and offset so that a subsequent "get" will fail. */ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; BUG_ON(sis == NULL); |
f066ea230 mm: frontswap: cl... |
291 292 293 294 |
/* * __frontswap_test() will check whether there is backend registered */ if (__frontswap_test(sis, offset)) { |
1e01c968d frontswap: make f... |
295 |
frontswap_ops->invalidate_page(type, offset); |
611edfed2 mm: frontswap: sp... |
296 |
__frontswap_clear(sis, offset); |
29f233cff mm: frontswap: co... |
297 298 299 300 301 302 303 304 305 306 307 308 |
inc_frontswap_invalidates(); } } EXPORT_SYMBOL(__frontswap_invalidate_page); /* * Invalidate all data from frontswap associated with all offsets for the * specified swaptype. */ void __frontswap_invalidate_area(unsigned type) { struct swap_info_struct *sis = swap_info[type]; |
1e01c968d frontswap: make f... |
309 |
if (frontswap_ops) { |
905cd0e1b mm: frontswap: la... |
310 311 312 |
BUG_ON(sis == NULL); if (sis->frontswap_map == NULL) return; |
1e01c968d frontswap: make f... |
313 |
frontswap_ops->invalidate_area(type); |
905cd0e1b mm: frontswap: la... |
314 |
atomic_set(&sis->frontswap_pages, 0); |
7b57976da frontswap: fix in... |
315 |
bitmap_zero(sis->frontswap_map, sis->max); |
905cd0e1b mm: frontswap: la... |
316 317 |
} clear_bit(type, need_init); |
29f233cff mm: frontswap: co... |
318 319 |
} EXPORT_SYMBOL(__frontswap_invalidate_area); |
96253444d mm: frontswap: sp... |
320 321 |
static unsigned long __frontswap_curr_pages(void) { |
96253444d mm: frontswap: sp... |
322 323 324 325 |
unsigned long totalpages = 0; struct swap_info_struct *si = NULL; assert_spin_locked(&swap_lock); |
18ab4d4ce swap: change swap... |
326 |
plist_for_each_entry(si, &swap_active_head, list) |
96253444d mm: frontswap: sp... |
327 |
totalpages += atomic_read(&si->frontswap_pages); |
96253444d mm: frontswap: sp... |
328 329 |
return totalpages; } |
f116695a5 mm: frontswap: sp... |
330 331 332 333 334 335 336 337 |
static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused, int *swapid) { int ret = -EINVAL; struct swap_info_struct *si = NULL; int si_frontswap_pages; unsigned long total_pages_to_unuse = total; unsigned long pages = 0, pages_to_unuse = 0; |
f116695a5 mm: frontswap: sp... |
338 339 |
assert_spin_locked(&swap_lock); |
18ab4d4ce swap: change swap... |
340 |
plist_for_each_entry(si, &swap_active_head, list) { |
f116695a5 mm: frontswap: sp... |
341 342 343 344 345 346 347 348 349 350 351 352 353 354 |
si_frontswap_pages = atomic_read(&si->frontswap_pages); if (total_pages_to_unuse < si_frontswap_pages) { pages = pages_to_unuse = total_pages_to_unuse; } else { pages = si_frontswap_pages; pages_to_unuse = 0; /* unuse all */ } /* ensure there is enough RAM to fetch pages from frontswap */ if (security_vm_enough_memory_mm(current->mm, pages)) { ret = -ENOMEM; continue; } vm_unacct_memory(pages); *unused = pages_to_unuse; |
adfab836f swap: change swap... |
355 |
*swapid = si->type; |
f116695a5 mm: frontswap: sp... |
356 357 358 359 360 361 |
ret = 0; break; } return ret; } |
a00bb1e9f mm: frontswap: fi... |
362 363 364 365 366 |
/* * Used to check if it's necessory and feasible to unuse pages. * Return 1 when nothing to do, 0 when need to shink pages, * error code when there is an error. */ |
69217b4cd mm: frontswap: sp... |
367 368 369 370 371 372 373 374 375 376 377 378 |
static int __frontswap_shrink(unsigned long target_pages, unsigned long *pages_to_unuse, int *type) { unsigned long total_pages = 0, total_pages_to_unuse; assert_spin_locked(&swap_lock); total_pages = __frontswap_curr_pages(); if (total_pages <= target_pages) { /* Nothing to do */ *pages_to_unuse = 0; |
a00bb1e9f mm: frontswap: fi... |
379 |
return 1; |
69217b4cd mm: frontswap: sp... |
380 381 382 383 |
} total_pages_to_unuse = total_pages - target_pages; return __frontswap_unuse_pages(total_pages_to_unuse, pages_to_unuse, type); } |
29f233cff mm: frontswap: co... |
384 385 386 387 388 389 390 391 392 393 |
/* * Frontswap, like a true swap device, may unnecessarily retain pages * under certain circumstances; "shrink" frontswap is essentially a * "partial swapoff" and works by calling try_to_unuse to attempt to * unuse enough frontswap pages to attempt to -- subject to memory * constraints -- reduce the number of pages in frontswap to the * number given in the parameter target_pages. */ void frontswap_shrink(unsigned long target_pages) { |
f116695a5 mm: frontswap: sp... |
394 |
unsigned long pages_to_unuse = 0; |
6b982fcf0 mm/frontswap: fix... |
395 |
int uninitialized_var(type), ret; |
29f233cff mm: frontswap: co... |
396 397 398 399 |
/* * we don't want to hold swap_lock while doing a very * lengthy try_to_unuse, but swap_list may change |
18ab4d4ce swap: change swap... |
400 |
* so restart scan from swap_active_head each time |
29f233cff mm: frontswap: co... |
401 402 |
*/ spin_lock(&swap_lock); |
69217b4cd mm: frontswap: sp... |
403 |
ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type); |
29f233cff mm: frontswap: co... |
404 |
spin_unlock(&swap_lock); |
a00bb1e9f mm: frontswap: fi... |
405 |
if (ret == 0) |
69217b4cd mm: frontswap: sp... |
406 |
try_to_unuse(type, true, pages_to_unuse); |
29f233cff mm: frontswap: co... |
407 408 409 410 411 412 413 414 415 416 417 |
return; } EXPORT_SYMBOL(frontswap_shrink); /* * Count and return the number of frontswap pages across all * swap devices. This is exported so that backend drivers can * determine current usage without reading debugfs. */ unsigned long frontswap_curr_pages(void) { |
29f233cff mm: frontswap: co... |
418 |
unsigned long totalpages = 0; |
29f233cff mm: frontswap: co... |
419 420 |
spin_lock(&swap_lock); |
96253444d mm: frontswap: sp... |
421 |
totalpages = __frontswap_curr_pages(); |
29f233cff mm: frontswap: co... |
422 |
spin_unlock(&swap_lock); |
96253444d mm: frontswap: sp... |
423 |
|
29f233cff mm: frontswap: co... |
424 425 426 427 428 429 430 431 432 433 |
return totalpages; } EXPORT_SYMBOL(frontswap_curr_pages); static int __init init_frontswap(void) { #ifdef CONFIG_DEBUG_FS struct dentry *root = debugfs_create_dir("frontswap", NULL); if (root == NULL) return -ENXIO; |
165c8aed5 frontswap: s/put_... |
434 435 436 437 |
debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads); debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores); debugfs_create_u64("failed_stores", S_IRUGO, root, &frontswap_failed_stores); |
29f233cff mm: frontswap: co... |
438 439 440 441 442 443 444 |
debugfs_create_u64("invalidates", S_IRUGO, root, &frontswap_invalidates); #endif return 0; } module_init(init_frontswap); |