Commit 78afd5612deb8268bafc8b6507d72341d5ed9aac

Authored by Andi Kleen
Committed by Linus Torvalds
1 parent 11bc82d67d

mm: add __GFP_OTHER_NODE flag

Add a new __GFP_OTHER_NODE flag to tell the low level numa statistics in
zone_statistics() that an allocation is on behalf of another thread.  This
way the local and remote counters can be still correct, even when
background daemons like khugepaged are changing memory mappings.

This only affects the accounting, but I think it's worth doing that right
to avoid confusing users.

I first tried to just pass down the right node, but this required a lot of
changes to pass down this parameter and at least one addition of a 10th
argument to a 9 argument function.  Using the flag is a lot less
intrusive.

Open: should be also used for migration?

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 12 additions and 5 deletions Side-by-side Diff

... ... @@ -35,6 +35,7 @@
35 35 #define ___GFP_NOTRACK 0
36 36 #endif
37 37 #define ___GFP_NO_KSWAPD 0x400000u
  38 +#define ___GFP_OTHER_NODE 0x800000u
38 39  
39 40 /*
40 41 * GFP bitmasks..
... ... @@ -83,6 +84,7 @@
83 84 #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
84 85  
85 86 #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
  87 +#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
86 88  
87 89 /*
88 90 * This may seem redundant, but it's a way of annotating false positives vs.
include/linux/vmstat.h
... ... @@ -220,12 +220,12 @@
220 220 zone_page_state(&zones[ZONE_MOVABLE], item);
221 221 }
222 222  
223   -extern void zone_statistics(struct zone *, struct zone *);
  223 +extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
224 224  
225 225 #else
226 226  
227 227 #define node_page_state(node, item) global_page_state(item)
228   -#define zone_statistics(_zl,_z) do { } while (0)
  228 +#define zone_statistics(_zl, _z, gfp) do { } while (0)
229 229  
230 230 #endif /* CONFIG_NUMA */
231 231  
... ... @@ -1337,7 +1337,7 @@
1337 1337 }
1338 1338  
1339 1339 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1340   - zone_statistics(preferred_zone, zone);
  1340 + zone_statistics(preferred_zone, zone, gfp_flags);
1341 1341 local_irq_restore(flags);
1342 1342  
1343 1343 VM_BUG_ON(bad_range(zone, page));
... ... @@ -500,8 +500,12 @@
500 500 * z = the zone from which the allocation occurred.
501 501 *
502 502 * Must be called with interrupts disabled.
  503 + *
  504 + * When __GFP_OTHER_NODE is set assume the node of the preferred
  505 + * zone is the local node. This is useful for daemons who allocate
  506 + * memory on behalf of other processes.
503 507 */
504   -void zone_statistics(struct zone *preferred_zone, struct zone *z)
  508 +void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
505 509 {
506 510 if (z->zone_pgdat == preferred_zone->zone_pgdat) {
507 511 __inc_zone_state(z, NUMA_HIT);
... ... @@ -509,7 +513,8 @@
509 513 __inc_zone_state(z, NUMA_MISS);
510 514 __inc_zone_state(preferred_zone, NUMA_FOREIGN);
511 515 }
512   - if (z->node == numa_node_id())
  516 + if (z->node == ((flags & __GFP_OTHER_NODE) ?
  517 + preferred_zone->node : numa_node_id()))
513 518 __inc_zone_state(z, NUMA_LOCAL);
514 519 else
515 520 __inc_zone_state(z, NUMA_OTHER);