Commit 9d0243bca345d5ce25d3f4b74b7facb3a6df1232

Authored by Andrew Morton
Committed by Linus Torvalds
1 parent bec6b0c89b

[PATCH] drop-pagecache

Add /proc/sys/vm/drop_caches.  When written to, this will cause the kernel to
discard as much pagecache and/or reclaimable slab objects as it can.  THis
operation requires root permissions.

It won't drop dirty data, so the user should run `sync' first.

Caveats:

a) Holds inode_lock for exorbitant amounts of time.

b) Needs to be taught about NUMA nodes: propagate these all the way through
   so the discarding can be controlled on a per-node basis.

This is a debugging feature: useful for getting consistent results between
filesystem benchmarks.  We could possibly put it under a config option, but
it's less than 300 bytes.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 9 changed files with 107 additions and 5 deletions Side-by-side Diff

Documentation/filesystems/proc.txt
... ... @@ -1302,6 +1302,23 @@
1302 1302 unnecessary page faults in thrashing situation. The unit of the value is
1303 1303 second. The value would be useful to tune thrashing behavior.
1304 1304  
  1305 +drop_caches
  1306 +-----------
  1307 +
  1308 +Writing to this will cause the kernel to drop clean caches, dentries and
  1309 +inodes from memory, causing that memory to become free.
  1310 +
  1311 +To free pagecache:
  1312 + echo 1 > /proc/sys/vm/drop_caches
  1313 +To free dentries and inodes:
  1314 + echo 2 > /proc/sys/vm/drop_caches
  1315 +To free pagecache, dentries and inodes:
  1316 + echo 3 > /proc/sys/vm/drop_caches
  1317 +
  1318 +As this is a non-destructive operation and dirty objects are not freeable, the
  1319 +user should run `sync' first.
  1320 +
  1321 +
1305 1322 2.5 /proc/sys/dev - Device specific parameters
1306 1323 ----------------------------------------------
1307 1324  
Documentation/sysctl/vm.txt
... ... @@ -26,12 +26,13 @@
26 26 - min_free_kbytes
27 27 - laptop_mode
28 28 - block_dump
  29 +- drop-caches
29 30  
30 31 ==============================================================
31 32  
32 33 dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
33 34 dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
34   -block_dump, swap_token_timeout:
  35 +block_dump, swap_token_timeout, drop-caches:
35 36  
36 37 See Documentation/filesystems/proc.txt
37 38  
... ... @@ -10,7 +10,7 @@
10 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
11 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
13   - ioprio.o pnode.o
  13 + ioprio.o pnode.o drop_caches.o
14 14  
15 15 obj-$(CONFIG_INOTIFY) += inotify.o
16 16 obj-$(CONFIG_EPOLL) += eventpoll.o
  1 +/*
  2 + * Implement the manual drop-all-pagecache function
  3 + */
  4 +
  5 +#include <linux/kernel.h>
  6 +#include <linux/mm.h>
  7 +#include <linux/fs.h>
  8 +#include <linux/writeback.h>
  9 +#include <linux/sysctl.h>
  10 +#include <linux/gfp.h>
  11 +
  12 +/* A global variable is a bit ugly, but it keeps the code simple */
  13 +int sysctl_drop_caches;
  14 +
  15 +static void drop_pagecache_sb(struct super_block *sb)
  16 +{
  17 + struct inode *inode;
  18 +
  19 + spin_lock(&inode_lock);
  20 + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
  21 + if (inode->i_state & (I_FREEING|I_WILL_FREE))
  22 + continue;
  23 + invalidate_inode_pages(inode->i_mapping);
  24 + }
  25 + spin_unlock(&inode_lock);
  26 +}
  27 +
  28 +void drop_pagecache(void)
  29 +{
  30 + struct super_block *sb;
  31 +
  32 + spin_lock(&sb_lock);
  33 +restart:
  34 + list_for_each_entry(sb, &super_blocks, s_list) {
  35 + sb->s_count++;
  36 + spin_unlock(&sb_lock);
  37 + down_read(&sb->s_umount);
  38 + if (sb->s_root)
  39 + drop_pagecache_sb(sb);
  40 + up_read(&sb->s_umount);
  41 + spin_lock(&sb_lock);
  42 + if (__put_super_and_need_restart(sb))
  43 + goto restart;
  44 + }
  45 + spin_unlock(&sb_lock);
  46 +}
  47 +
  48 +void drop_slab(void)
  49 +{
  50 + int nr_objects;
  51 +
  52 + do {
  53 + nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
  54 + } while (nr_objects > 10);
  55 +}
  56 +
  57 +int drop_caches_sysctl_handler(ctl_table *table, int write,
  58 + struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
  59 +{
  60 + proc_dointvec_minmax(table, write, file, buffer, length, ppos);
  61 + if (write) {
  62 + if (sysctl_drop_caches & 1)
  63 + drop_pagecache();
  64 + if (sysctl_drop_caches & 2)
  65 + drop_slab();
  66 + }
  67 + return 0;
  68 +}
... ... @@ -1036,6 +1036,13 @@
1036 1036 /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
1037 1037 #define OOM_DISABLE -17
1038 1038  
  1039 +int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
  1040 + void __user *, size_t *, loff_t *);
  1041 +int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
  1042 + unsigned long lru_pages);
  1043 +void drop_pagecache(void);
  1044 +void drop_slab(void);
  1045 +
1039 1046 #endif /* __KERNEL__ */
1040 1047 #endif /* _LINUX_MM_H */
include/linux/sysctl.h
... ... @@ -180,6 +180,7 @@
180 180 VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
181 181 VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
182 182 VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
  183 + VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
183 184 };
184 185  
185 186  
... ... @@ -68,6 +68,7 @@
68 68 extern int printk_ratelimit_jiffies;
69 69 extern int printk_ratelimit_burst;
70 70 extern int pid_max_min, pid_max_max;
  71 +extern int sysctl_drop_caches;
71 72  
72 73 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
73 74 int unknown_nmi_panic;
... ... @@ -772,6 +773,15 @@
772 773 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
773 774 .mode = 0644,
774 775 .proc_handler = &lowmem_reserve_ratio_sysctl_handler,
  776 + .strategy = &sysctl_intvec,
  777 + },
  778 + {
  779 + .ctl_name = VM_DROP_PAGECACHE,
  780 + .procname = "drop_caches",
  781 + .data = &sysctl_drop_caches,
  782 + .maxlen = sizeof(int),
  783 + .mode = 0644,
  784 + .proc_handler = drop_caches_sysctl_handler,
775 785 .strategy = &sysctl_intvec,
776 786 },
777 787 {
... ... @@ -249,7 +249,6 @@
249 249 break;
250 250 }
251 251 pagevec_release(&pvec);
252   - cond_resched();
253 252 }
254 253 return ret;
255 254 }
... ... @@ -180,8 +180,7 @@
180 180 *
181 181 * Returns the number of slab objects which we shrunk.
182 182 */
183   -static int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
184   - unsigned long lru_pages)
  183 +int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages)
185 184 {
186 185 struct shrinker *shrinker;
187 186 int ret = 0;