Commit 33906bc5c87b50028364405ec425de9638afc719

Authored by Mel Gorman
Committed by Linus Torvalds
1 parent c6a8a8c589

vmscan: tracing: add trace events for kswapd wakeup, sleeping and direct reclaim

Add two trace events for kswapd waking up and going asleep for the
purposes of tracking kswapd activity and two trace events for direct
reclaim beginning and ending.  The information can be used to work out how
much time a process or the system is spending on the reclamation of pages
and in the case of direct reclaim, how many pages were reclaimed for that
process.  High frequency triggering of these events could point to memory
pressure problems.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Larry Woodman <lwoodman@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michael Rubin <mrubin@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 173 additions and 41 deletions Side-by-side Diff

include/trace/events/gfpflags.h
  1 +/*
  2 + * The order of these masks is important. Matching masks will be seen
  3 + * first and the left over flags will end up showing by themselves.
  4 + *
  5 + * For example, if we have GFP_KERNEL before GFP_USER we wil get:
  6 + *
  7 + * GFP_KERNEL|GFP_HARDWALL
  8 + *
  9 + * Thus most bits set go first.
  10 + */
  11 +#define show_gfp_flags(flags) \
  12 + (flags) ? __print_flags(flags, "|", \
  13 + {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \
  14 + {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \
  15 + {(unsigned long)GFP_USER, "GFP_USER"}, \
  16 + {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \
  17 + {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \
  18 + {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \
  19 + {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \
  20 + {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \
  21 + {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \
  22 + {(unsigned long)__GFP_WAIT, "GFP_WAIT"}, \
  23 + {(unsigned long)__GFP_IO, "GFP_IO"}, \
  24 + {(unsigned long)__GFP_COLD, "GFP_COLD"}, \
  25 + {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \
  26 + {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \
  27 + {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \
  28 + {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \
  29 + {(unsigned long)__GFP_COMP, "GFP_COMP"}, \
  30 + {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \
  31 + {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \
  32 + {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
  33 + {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
  34 + {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
  35 + {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \
  36 + ) : "GFP_NOWAIT"
include/trace/events/kmem.h
... ... @@ -6,43 +6,7 @@
6 6  
7 7 #include <linux/types.h>
8 8 #include <linux/tracepoint.h>
9   -
10   -/*
11   - * The order of these masks is important. Matching masks will be seen
12   - * first and the left over flags will end up showing by themselves.
13   - *
14   - * For example, if we have GFP_KERNEL before GFP_USER we wil get:
15   - *
16   - * GFP_KERNEL|GFP_HARDWALL
17   - *
18   - * Thus most bits set go first.
19   - */
20   -#define show_gfp_flags(flags) \
21   - (flags) ? __print_flags(flags, "|", \
22   - {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \
23   - {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \
24   - {(unsigned long)GFP_USER, "GFP_USER"}, \
25   - {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \
26   - {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \
27   - {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \
28   - {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \
29   - {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \
30   - {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \
31   - {(unsigned long)__GFP_WAIT, "GFP_WAIT"}, \
32   - {(unsigned long)__GFP_IO, "GFP_IO"}, \
33   - {(unsigned long)__GFP_COLD, "GFP_COLD"}, \
34   - {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \
35   - {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \
36   - {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \
37   - {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \
38   - {(unsigned long)__GFP_COMP, "GFP_COMP"}, \
39   - {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \
40   - {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \
41   - {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
42   - {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
43   - {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
44   - {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \
45   - ) : "GFP_NOWAIT"
  9 +#include "gfpflags.h"
46 10  
47 11 DECLARE_EVENT_CLASS(kmem_alloc,
48 12  
include/trace/events/vmscan.h
  1 +#undef TRACE_SYSTEM
  2 +#define TRACE_SYSTEM vmscan
  3 +
  4 +#if !defined(_TRACE_VMSCAN_H) || defined(TRACE_HEADER_MULTI_READ)
  5 +#define _TRACE_VMSCAN_H
  6 +
  7 +#include <linux/types.h>
  8 +#include <linux/tracepoint.h>
  9 +#include "gfpflags.h"
  10 +
  11 +TRACE_EVENT(mm_vmscan_kswapd_sleep,
  12 +
  13 + TP_PROTO(int nid),
  14 +
  15 + TP_ARGS(nid),
  16 +
  17 + TP_STRUCT__entry(
  18 + __field( int, nid )
  19 + ),
  20 +
  21 + TP_fast_assign(
  22 + __entry->nid = nid;
  23 + ),
  24 +
  25 + TP_printk("nid=%d", __entry->nid)
  26 +);
  27 +
  28 +TRACE_EVENT(mm_vmscan_kswapd_wake,
  29 +
  30 + TP_PROTO(int nid, int order),
  31 +
  32 + TP_ARGS(nid, order),
  33 +
  34 + TP_STRUCT__entry(
  35 + __field( int, nid )
  36 + __field( int, order )
  37 + ),
  38 +
  39 + TP_fast_assign(
  40 + __entry->nid = nid;
  41 + __entry->order = order;
  42 + ),
  43 +
  44 + TP_printk("nid=%d order=%d", __entry->nid, __entry->order)
  45 +);
  46 +
  47 +TRACE_EVENT(mm_vmscan_wakeup_kswapd,
  48 +
  49 + TP_PROTO(int nid, int zid, int order),
  50 +
  51 + TP_ARGS(nid, zid, order),
  52 +
  53 + TP_STRUCT__entry(
  54 + __field( int, nid )
  55 + __field( int, zid )
  56 + __field( int, order )
  57 + ),
  58 +
  59 + TP_fast_assign(
  60 + __entry->nid = nid;
  61 + __entry->zid = zid;
  62 + __entry->order = order;
  63 + ),
  64 +
  65 + TP_printk("nid=%d zid=%d order=%d",
  66 + __entry->nid,
  67 + __entry->zid,
  68 + __entry->order)
  69 +);
  70 +
  71 +TRACE_EVENT(mm_vmscan_direct_reclaim_begin,
  72 +
  73 + TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
  74 +
  75 + TP_ARGS(order, may_writepage, gfp_flags),
  76 +
  77 + TP_STRUCT__entry(
  78 + __field( int, order )
  79 + __field( int, may_writepage )
  80 + __field( gfp_t, gfp_flags )
  81 + ),
  82 +
  83 + TP_fast_assign(
  84 + __entry->order = order;
  85 + __entry->may_writepage = may_writepage;
  86 + __entry->gfp_flags = gfp_flags;
  87 + ),
  88 +
  89 + TP_printk("order=%d may_writepage=%d gfp_flags=%s",
  90 + __entry->order,
  91 + __entry->may_writepage,
  92 + show_gfp_flags(__entry->gfp_flags))
  93 +);
  94 +
  95 +TRACE_EVENT(mm_vmscan_direct_reclaim_end,
  96 +
  97 + TP_PROTO(unsigned long nr_reclaimed),
  98 +
  99 + TP_ARGS(nr_reclaimed),
  100 +
  101 + TP_STRUCT__entry(
  102 + __field( unsigned long, nr_reclaimed )
  103 + ),
  104 +
  105 + TP_fast_assign(
  106 + __entry->nr_reclaimed = nr_reclaimed;
  107 + ),
  108 +
  109 + TP_printk("nr_reclaimed=%lu", __entry->nr_reclaimed)
  110 +);
  111 +
  112 +#endif /* _TRACE_VMSCAN_H */
  113 +
  114 +/* This part must be outside protection */
  115 +#include <trace/define_trace.h>
... ... @@ -48,6 +48,9 @@
48 48  
49 49 #include "internal.h"
50 50  
  51 +#define CREATE_TRACE_POINTS
  52 +#include <trace/events/vmscan.h>
  53 +
51 54 struct scan_control {
52 55 /* Incremented by the number of inactive pages that were scanned */
53 56 unsigned long nr_scanned;
... ... @@ -1883,6 +1886,7 @@
1883 1886 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1884 1887 gfp_t gfp_mask, nodemask_t *nodemask)
1885 1888 {
  1889 + unsigned long nr_reclaimed;
1886 1890 struct scan_control sc = {
1887 1891 .gfp_mask = gfp_mask,
1888 1892 .may_writepage = !laptop_mode,
... ... @@ -1895,7 +1899,15 @@
1895 1899 .nodemask = nodemask,
1896 1900 };
1897 1901  
1898   - return do_try_to_free_pages(zonelist, &sc);
  1902 + trace_mm_vmscan_direct_reclaim_begin(order,
  1903 + sc.may_writepage,
  1904 + gfp_mask);
  1905 +
  1906 + nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
  1907 +
  1908 + trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
  1909 +
  1910 + return nr_reclaimed;
1899 1911 }
1900 1912  
1901 1913 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
1902 1914  
... ... @@ -2294,9 +2306,10 @@
2294 2306 * premature sleep. If not, then go fully
2295 2307 * to sleep until explicitly woken up
2296 2308 */
2297   - if (!sleeping_prematurely(pgdat, order, remaining))
  2309 + if (!sleeping_prematurely(pgdat, order, remaining)) {
  2310 + trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
2298 2311 schedule();
2299   - else {
  2312 + } else {
2300 2313 if (remaining)
2301 2314 count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
2302 2315 else
2303 2316  
... ... @@ -2316,8 +2329,10 @@
2316 2329 * We can speed up thawing tasks if we don't call balance_pgdat
2317 2330 * after returning from the refrigerator
2318 2331 */
2319   - if (!ret)
  2332 + if (!ret) {
  2333 + trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
2320 2334 balance_pgdat(pgdat, order);
  2335 + }
2321 2336 }
2322 2337 return 0;
2323 2338 }
... ... @@ -2337,6 +2352,7 @@
2337 2352 return;
2338 2353 if (pgdat->kswapd_max_order < order)
2339 2354 pgdat->kswapd_max_order = order;
  2355 + trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
2340 2356 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2341 2357 return;
2342 2358 if (!waitqueue_active(&pgdat->kswapd_wait))