Commit ddd588b5dd55f14320379961e47683db4e4c1d90

Authored by David Rientjes
Committed by Linus Torvalds
1 parent 94dcf29a11

oom: suppress nodes that are not allowed from meminfo on oom kill

The oom killer is extremely verbose for machines with a large number of
cpus and/or nodes.  This verbosity can often be harmful if it causes other
important messages to be scrolled from the kernel log and incurs a
signicant time delay, specifically for kernels with CONFIG_NODES_SHIFT >
8.

This patch causes only memory information to be displayed for nodes that
are allowed by current's cpuset when dumping the VM state.  Information
for all other nodes is irrelevant to the oom condition; we don't care if
there's an abundance of memory elsewhere if we can't access it.

This only affects the behavior of dumping memory information when an oom
is triggered.  Other dumps, such as for sysrq+m, still display the
unfiltered form when using the existing show_mem() interface.

Additionally, the per-cpu pageset statistics are extremely verbose in oom
killer output, so it is now suppressed.  This removes

	nodes_weight(current->mems_allowed) * (1 + nr_cpus)

lines from the oom killer output.

Callers may use __show_mem(SHOW_MEM_FILTER_NODES) to filter disallowed
nodes.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 49 additions and 4 deletions Side-by-side Diff

... ... @@ -859,7 +859,14 @@
859 859  
860 860 #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
861 861  
  862 +/*
  863 + * Flags passed to __show_mem() and __show_free_areas() to suppress output in
  864 + * various contexts.
  865 + */
  866 +#define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */
  867 +
862 868 extern void show_free_areas(void);
  869 +extern void __show_free_areas(unsigned int flags);
863 870  
864 871 int shmem_lock(struct file *file, int lock, struct user_struct *user);
865 872 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
... ... @@ -1348,6 +1355,7 @@
1348 1355 extern void mem_init(void);
1349 1356 extern void __init mmap_init(void);
1350 1357 extern void show_mem(void);
  1358 +extern void __show_mem(unsigned int flags);
1351 1359 extern void si_meminfo(struct sysinfo * val);
1352 1360 extern void si_meminfo_node(struct sysinfo *val, int nid);
1353 1361 extern int after_bootmem;
... ... @@ -9,14 +9,14 @@
9 9 #include <linux/nmi.h>
10 10 #include <linux/quicklist.h>
11 11  
12   -void show_mem(void)
  12 +void __show_mem(unsigned int filter)
13 13 {
14 14 pg_data_t *pgdat;
15 15 unsigned long total = 0, reserved = 0, shared = 0,
16 16 nonshared = 0, highmem = 0;
17 17  
18 18 printk("Mem-Info:\n");
19   - show_free_areas();
  19 + __show_free_areas(filter);
20 20  
21 21 for_each_online_pgdat(pgdat) {
22 22 unsigned long i, flags;
... ... @@ -60,5 +60,10 @@
60 60 printk("%lu pages in pagetable cache\n",
61 61 quicklist_total_size());
62 62 #endif
  63 +}
  64 +
  65 +void show_mem(void)
  66 +{
  67 + __show_mem(0);
63 68 }
... ... @@ -406,7 +406,7 @@
406 406 task_unlock(current);
407 407 dump_stack();
408 408 mem_cgroup_print_oom_info(mem, p);
409   - show_mem();
  409 + __show_mem(SHOW_MEM_FILTER_NODES);
410 410 if (sysctl_oom_dump_tasks)
411 411 dump_tasks(mem, nodemask);
412 412 }
... ... @@ -2411,19 +2411,42 @@
2411 2411 }
2412 2412 #endif
2413 2413  
  2414 +/*
  2415 + * Determine whether the zone's node should be displayed or not, depending on
  2416 + * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas().
  2417 + */
  2418 +static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
  2419 +{
  2420 + bool ret = false;
  2421 +
  2422 + if (!(flags & SHOW_MEM_FILTER_NODES))
  2423 + goto out;
  2424 +
  2425 + get_mems_allowed();
  2426 + ret = !node_isset(zone->zone_pgdat->node_id,
  2427 + cpuset_current_mems_allowed);
  2428 + put_mems_allowed();
  2429 +out:
  2430 + return ret;
  2431 +}
  2432 +
2414 2433 #define K(x) ((x) << (PAGE_SHIFT-10))
2415 2434  
2416 2435 /*
2417 2436 * Show free area list (used inside shift_scroll-lock stuff)
2418 2437 * We also calculate the percentage fragmentation. We do this by counting the
2419 2438 * memory on each free list with the exception of the first item on the list.
  2439 + * Suppresses nodes that are not allowed by current's cpuset if
  2440 + * SHOW_MEM_FILTER_NODES is passed.
2420 2441 */
2421   -void show_free_areas(void)
  2442 +void __show_free_areas(unsigned int filter)
2422 2443 {
2423 2444 int cpu;
2424 2445 struct zone *zone;
2425 2446  
2426 2447 for_each_populated_zone(zone) {
  2448 + if (skip_free_areas_zone(filter, zone))
  2449 + continue;
2427 2450 show_node(zone);
2428 2451 printk("%s per-cpu:\n", zone->name);
2429 2452  
... ... @@ -2465,6 +2488,8 @@
2465 2488 for_each_populated_zone(zone) {
2466 2489 int i;
2467 2490  
  2491 + if (skip_free_areas_zone(filter, zone))
  2492 + continue;
2468 2493 show_node(zone);
2469 2494 printk("%s"
2470 2495 " free:%lukB"
... ... @@ -2532,6 +2557,8 @@
2532 2557 for_each_populated_zone(zone) {
2533 2558 unsigned long nr[MAX_ORDER], flags, order, total = 0;
2534 2559  
  2560 + if (skip_free_areas_zone(filter, zone))
  2561 + continue;
2535 2562 show_node(zone);
2536 2563 printk("%s: ", zone->name);
2537 2564  
... ... @@ -2549,6 +2576,11 @@
2549 2576 printk("%ld total pagecache pages\n", global_page_state(NR_FILE_PAGES));
2550 2577  
2551 2578 show_swap_cache_info();
  2579 +}
  2580 +
  2581 +void show_free_areas(void)
  2582 +{
  2583 + __show_free_areas(0);
2552 2584 }
2553 2585  
2554 2586 static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)