Commit 9d0243bca345d5ce25d3f4b74b7facb3a6df1232
Committed by
Linus Torvalds
1 parent
bec6b0c89b
Exists in
master
and in
4 other branches
[PATCH] drop-pagecache
Add /proc/sys/vm/drop_caches. When written to, this will cause the kernel to discard as much pagecache and/or reclaimable slab objects as it can. THis operation requires root permissions. It won't drop dirty data, so the user should run `sync' first. Caveats: a) Holds inode_lock for exorbitant amounts of time. b) Needs to be taught about NUMA nodes: propagate these all the way through so the discarding can be controlled on a per-node basis. This is a debugging feature: useful for getting consistent results between filesystem benchmarks. We could possibly put it under a config option, but it's less than 300 bytes. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 9 changed files with 107 additions and 5 deletions Side-by-side Diff
Documentation/filesystems/proc.txt
| ... | ... | @@ -1302,6 +1302,23 @@ |
| 1302 | 1302 | unnecessary page faults in thrashing situation. The unit of the value is |
| 1303 | 1303 | second. The value would be useful to tune thrashing behavior. |
| 1304 | 1304 | |
| 1305 | +drop_caches | |
| 1306 | +----------- | |
| 1307 | + | |
| 1308 | +Writing to this will cause the kernel to drop clean caches, dentries and | |
| 1309 | +inodes from memory, causing that memory to become free. | |
| 1310 | + | |
| 1311 | +To free pagecache: | |
| 1312 | + echo 1 > /proc/sys/vm/drop_caches | |
| 1313 | +To free dentries and inodes: | |
| 1314 | + echo 2 > /proc/sys/vm/drop_caches | |
| 1315 | +To free pagecache, dentries and inodes: | |
| 1316 | + echo 3 > /proc/sys/vm/drop_caches | |
| 1317 | + | |
| 1318 | +As this is a non-destructive operation and dirty objects are not freeable, the | |
| 1319 | +user should run `sync' first. | |
| 1320 | + | |
| 1321 | + | |
| 1305 | 1322 | 2.5 /proc/sys/dev - Device specific parameters |
| 1306 | 1323 | ---------------------------------------------- |
| 1307 | 1324 |
Documentation/sysctl/vm.txt
| ... | ... | @@ -26,12 +26,13 @@ |
| 26 | 26 | - min_free_kbytes |
| 27 | 27 | - laptop_mode |
| 28 | 28 | - block_dump |
| 29 | +- drop-caches | |
| 29 | 30 | |
| 30 | 31 | ============================================================== |
| 31 | 32 | |
| 32 | 33 | dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, |
| 33 | 34 | dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, |
| 34 | -block_dump, swap_token_timeout: | |
| 35 | +block_dump, swap_token_timeout, drop-caches: | |
| 35 | 36 | |
| 36 | 37 | See Documentation/filesystems/proc.txt |
| 37 | 38 |
fs/Makefile
| ... | ... | @@ -10,7 +10,7 @@ |
| 10 | 10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ |
| 11 | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ |
| 12 | 12 | seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ |
| 13 | - ioprio.o pnode.o | |
| 13 | + ioprio.o pnode.o drop_caches.o | |
| 14 | 14 | |
| 15 | 15 | obj-$(CONFIG_INOTIFY) += inotify.o |
| 16 | 16 | obj-$(CONFIG_EPOLL) += eventpoll.o |
fs/drop_caches.c
| 1 | +/* | |
| 2 | + * Implement the manual drop-all-pagecache function | |
| 3 | + */ | |
| 4 | + | |
| 5 | +#include <linux/kernel.h> | |
| 6 | +#include <linux/mm.h> | |
| 7 | +#include <linux/fs.h> | |
| 8 | +#include <linux/writeback.h> | |
| 9 | +#include <linux/sysctl.h> | |
| 10 | +#include <linux/gfp.h> | |
| 11 | + | |
| 12 | +/* A global variable is a bit ugly, but it keeps the code simple */ | |
| 13 | +int sysctl_drop_caches; | |
| 14 | + | |
| 15 | +static void drop_pagecache_sb(struct super_block *sb) | |
| 16 | +{ | |
| 17 | + struct inode *inode; | |
| 18 | + | |
| 19 | + spin_lock(&inode_lock); | |
| 20 | + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | |
| 21 | + if (inode->i_state & (I_FREEING|I_WILL_FREE)) | |
| 22 | + continue; | |
| 23 | + invalidate_inode_pages(inode->i_mapping); | |
| 24 | + } | |
| 25 | + spin_unlock(&inode_lock); | |
| 26 | +} | |
| 27 | + | |
| 28 | +void drop_pagecache(void) | |
| 29 | +{ | |
| 30 | + struct super_block *sb; | |
| 31 | + | |
| 32 | + spin_lock(&sb_lock); | |
| 33 | +restart: | |
| 34 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
| 35 | + sb->s_count++; | |
| 36 | + spin_unlock(&sb_lock); | |
| 37 | + down_read(&sb->s_umount); | |
| 38 | + if (sb->s_root) | |
| 39 | + drop_pagecache_sb(sb); | |
| 40 | + up_read(&sb->s_umount); | |
| 41 | + spin_lock(&sb_lock); | |
| 42 | + if (__put_super_and_need_restart(sb)) | |
| 43 | + goto restart; | |
| 44 | + } | |
| 45 | + spin_unlock(&sb_lock); | |
| 46 | +} | |
| 47 | + | |
| 48 | +void drop_slab(void) | |
| 49 | +{ | |
| 50 | + int nr_objects; | |
| 51 | + | |
| 52 | + do { | |
| 53 | + nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); | |
| 54 | + } while (nr_objects > 10); | |
| 55 | +} | |
| 56 | + | |
| 57 | +int drop_caches_sysctl_handler(ctl_table *table, int write, | |
| 58 | + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | |
| 59 | +{ | |
| 60 | + proc_dointvec_minmax(table, write, file, buffer, length, ppos); | |
| 61 | + if (write) { | |
| 62 | + if (sysctl_drop_caches & 1) | |
| 63 | + drop_pagecache(); | |
| 64 | + if (sysctl_drop_caches & 2) | |
| 65 | + drop_slab(); | |
| 66 | + } | |
| 67 | + return 0; | |
| 68 | +} |
include/linux/mm.h
| ... | ... | @@ -1036,6 +1036,13 @@ |
| 1036 | 1036 | /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ |
| 1037 | 1037 | #define OOM_DISABLE -17 |
| 1038 | 1038 | |
| 1039 | +int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, | |
| 1040 | + void __user *, size_t *, loff_t *); | |
| 1041 | +int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |
| 1042 | + unsigned long lru_pages); | |
| 1043 | +void drop_pagecache(void); | |
| 1044 | +void drop_slab(void); | |
| 1045 | + | |
| 1039 | 1046 | #endif /* __KERNEL__ */ |
| 1040 | 1047 | #endif /* _LINUX_MM_H */ |
include/linux/sysctl.h
| ... | ... | @@ -180,6 +180,7 @@ |
| 180 | 180 | VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ |
| 181 | 181 | VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ |
| 182 | 182 | VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ |
| 183 | + VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ | |
| 183 | 184 | }; |
| 184 | 185 | |
| 185 | 186 |
kernel/sysctl.c
| ... | ... | @@ -68,6 +68,7 @@ |
| 68 | 68 | extern int printk_ratelimit_jiffies; |
| 69 | 69 | extern int printk_ratelimit_burst; |
| 70 | 70 | extern int pid_max_min, pid_max_max; |
| 71 | +extern int sysctl_drop_caches; | |
| 71 | 72 | |
| 72 | 73 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
| 73 | 74 | int unknown_nmi_panic; |
| ... | ... | @@ -772,6 +773,15 @@ |
| 772 | 773 | .maxlen = sizeof(sysctl_lowmem_reserve_ratio), |
| 773 | 774 | .mode = 0644, |
| 774 | 775 | .proc_handler = &lowmem_reserve_ratio_sysctl_handler, |
| 776 | + .strategy = &sysctl_intvec, | |
| 777 | + }, | |
| 778 | + { | |
| 779 | + .ctl_name = VM_DROP_PAGECACHE, | |
| 780 | + .procname = "drop_caches", | |
| 781 | + .data = &sysctl_drop_caches, | |
| 782 | + .maxlen = sizeof(int), | |
| 783 | + .mode = 0644, | |
| 784 | + .proc_handler = drop_caches_sysctl_handler, | |
| 775 | 785 | .strategy = &sysctl_intvec, |
| 776 | 786 | }, |
| 777 | 787 | { |
mm/truncate.c
mm/vmscan.c
| ... | ... | @@ -180,8 +180,7 @@ |
| 180 | 180 | * |
| 181 | 181 | * Returns the number of slab objects which we shrunk. |
| 182 | 182 | */ |
| 183 | -static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |
| 184 | - unsigned long lru_pages) | |
| 183 | +int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages) | |
| 185 | 184 | { |
| 186 | 185 | struct shrinker *shrinker; |
| 187 | 186 | int ret = 0; |