Commit fc36b8d3d819047eb4d23ca079fb4d3af20ff076
Committed by
Linus Torvalds
1 parent
53f2556b67
Exists in
master
and in
20 other branches
mempolicy: use MPOL_F_LOCAL to Indicate Preferred Local Policy
Now that we're using "preferred local" policy for system default, we need to make this as fast as possible. Because of the variable size of the mempolicy structure [based on size of nodemasks], the preferred_node may be in a different cacheline from the mode. This can result in accessing an extra cacheline in the normal case of system default policy. Suspect this is the cause of an observed 2-3% slowdown in page fault testing relative to kernel without this patch series. To alleviate this, use an internal mode flag, MPOL_F_LOCAL in the mempolicy flags member which is guaranteed [?] to be in the same cacheline as the mode itself. Verified that reworked mempolicy now performs slightly better on 25-rc8-mm1 for both anon and shmem segments with system default and vma [preferred local] policy. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: David Rientjes <rientjes@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 28 additions and 31 deletions Side-by-side Diff
Documentation/vm/numa_memory_policy.txt
... | ... | @@ -176,12 +176,11 @@ |
176 | 176 | containing the cpu where the allocation takes place. |
177 | 177 | |
178 | 178 | Internally, the Preferred policy uses a single node--the |
179 | - preferred_node member of struct mempolicy. A "distinguished | |
180 | - value of this preferred_node, currently '-1', is interpreted | |
181 | - as "the node containing the cpu where the allocation takes | |
182 | - place"--local allocation. "Local" allocation policy can be | |
183 | - viewed as a Preferred policy that starts at the node containing | |
184 | - the cpu where the allocation takes place. | |
179 | + preferred_node member of struct mempolicy. When the internal | |
180 | + mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and | |
181 | + the policy is interpreted as local allocation. "Local" allocation | |
182 | + policy can be viewed as a Preferred policy that starts at the node | |
183 | + containing the cpu where the allocation takes place. | |
185 | 184 | |
186 | 185 | It is possible for the user to specify that local allocation is |
187 | 186 | always preferred by passing an empty nodemask with this mode. |
include/linux/mempolicy.h
mm/mempolicy.c
... | ... | @@ -110,7 +110,7 @@ |
110 | 110 | struct mempolicy default_policy = { |
111 | 111 | .refcnt = ATOMIC_INIT(1), /* never free it */ |
112 | 112 | .mode = MPOL_PREFERRED, |
113 | - .v = { .preferred_node = -1 }, | |
113 | + .flags = MPOL_F_LOCAL, | |
114 | 114 | }; |
115 | 115 | |
116 | 116 | static const struct mempolicy_operations { |
... | ... | @@ -163,7 +163,7 @@ |
163 | 163 | static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) |
164 | 164 | { |
165 | 165 | if (!nodes) |
166 | - pol->v.preferred_node = -1; /* local allocation */ | |
166 | + pol->flags |= MPOL_F_LOCAL; /* local allocation */ | |
167 | 167 | else if (nodes_empty(*nodes)) |
168 | 168 | return -EINVAL; /* no allowed nodes */ |
169 | 169 | else |
170 | 170 | |
171 | 171 | |
... | ... | @@ -290,14 +290,15 @@ |
290 | 290 | if (pol->flags & MPOL_F_STATIC_NODES) { |
291 | 291 | int node = first_node(pol->w.user_nodemask); |
292 | 292 | |
293 | - if (node_isset(node, *nodes)) | |
293 | + if (node_isset(node, *nodes)) { | |
294 | 294 | pol->v.preferred_node = node; |
295 | - else | |
296 | - pol->v.preferred_node = -1; | |
295 | + pol->flags &= ~MPOL_F_LOCAL; | |
296 | + } else | |
297 | + pol->flags |= MPOL_F_LOCAL; | |
297 | 298 | } else if (pol->flags & MPOL_F_RELATIVE_NODES) { |
298 | 299 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); |
299 | 300 | pol->v.preferred_node = first_node(tmp); |
300 | - } else if (pol->v.preferred_node != -1) { | |
301 | + } else if (!(pol->flags & MPOL_F_LOCAL)) { | |
301 | 302 | pol->v.preferred_node = node_remap(pol->v.preferred_node, |
302 | 303 | pol->w.cpuset_mems_allowed, |
303 | 304 | *nodes); |
... | ... | @@ -645,7 +646,7 @@ |
645 | 646 | *nodes = p->v.nodes; |
646 | 647 | break; |
647 | 648 | case MPOL_PREFERRED: |
648 | - if (p->v.preferred_node >= 0) | |
649 | + if (!(p->flags & MPOL_F_LOCAL)) | |
649 | 650 | node_set(p->v.preferred_node, *nodes); |
650 | 651 | /* else return empty node mask for local allocation */ |
651 | 652 | break; |
652 | 653 | |
... | ... | @@ -1324,13 +1325,12 @@ |
1324 | 1325 | /* Return a zonelist indicated by gfp for node representing a mempolicy */ |
1325 | 1326 | static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy) |
1326 | 1327 | { |
1327 | - int nd; | |
1328 | + int nd = numa_node_id(); | |
1328 | 1329 | |
1329 | 1330 | switch (policy->mode) { |
1330 | 1331 | case MPOL_PREFERRED: |
1331 | - nd = policy->v.preferred_node; | |
1332 | - if (nd < 0) | |
1333 | - nd = numa_node_id(); | |
1332 | + if (!(policy->flags & MPOL_F_LOCAL)) | |
1333 | + nd = policy->v.preferred_node; | |
1334 | 1334 | break; |
1335 | 1335 | case MPOL_BIND: |
1336 | 1336 | /* |
1337 | 1337 | |
1338 | 1338 | |
... | ... | @@ -1339,16 +1339,13 @@ |
1339 | 1339 | * current node is part of the mask, we use the zonelist for |
1340 | 1340 | * the first node in the mask instead. |
1341 | 1341 | */ |
1342 | - nd = numa_node_id(); | |
1343 | 1342 | if (unlikely(gfp & __GFP_THISNODE) && |
1344 | 1343 | unlikely(!node_isset(nd, policy->v.nodes))) |
1345 | 1344 | nd = first_node(policy->v.nodes); |
1346 | 1345 | break; |
1347 | 1346 | case MPOL_INTERLEAVE: /* should not happen */ |
1348 | - nd = numa_node_id(); | |
1349 | 1347 | break; |
1350 | 1348 | default: |
1351 | - nd = 0; | |
1352 | 1349 | BUG(); |
1353 | 1350 | } |
1354 | 1351 | return node_zonelist(nd, gfp); |
1355 | 1352 | |
... | ... | @@ -1379,14 +1376,15 @@ |
1379 | 1376 | */ |
1380 | 1377 | unsigned slab_node(struct mempolicy *policy) |
1381 | 1378 | { |
1382 | - if (!policy) | |
1379 | + if (!policy || policy->flags & MPOL_F_LOCAL) | |
1383 | 1380 | return numa_node_id(); |
1384 | 1381 | |
1385 | 1382 | switch (policy->mode) { |
1386 | 1383 | case MPOL_PREFERRED: |
1387 | - if (unlikely(policy->v.preferred_node >= 0)) | |
1388 | - return policy->v.preferred_node; | |
1389 | - return numa_node_id(); | |
1384 | + /* | |
1385 | + * handled MPOL_F_LOCAL above | |
1386 | + */ | |
1387 | + return policy->v.preferred_node; | |
1390 | 1388 | |
1391 | 1389 | case MPOL_INTERLEAVE: |
1392 | 1390 | return interleave_nodes(policy); |
... | ... | @@ -1666,7 +1664,8 @@ |
1666 | 1664 | case MPOL_INTERLEAVE: |
1667 | 1665 | return nodes_equal(a->v.nodes, b->v.nodes); |
1668 | 1666 | case MPOL_PREFERRED: |
1669 | - return a->v.preferred_node == b->v.preferred_node; | |
1667 | + return a->v.preferred_node == b->v.preferred_node && | |
1668 | + a->flags == b->flags; | |
1670 | 1669 | default: |
1671 | 1670 | BUG(); |
1672 | 1671 | return 0; |
... | ... | @@ -1946,7 +1945,7 @@ |
1946 | 1945 | } |
1947 | 1946 | |
1948 | 1947 | /* |
1949 | - * "local" is pseudo-policy: MPOL_PREFERRED with preferred_node == -1 | |
1948 | + * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag | |
1950 | 1949 | * Used only for mpol_to_str() |
1951 | 1950 | */ |
1952 | 1951 | #define MPOL_LOCAL (MPOL_INTERLEAVE + 1) |
... | ... | @@ -1962,7 +1961,6 @@ |
1962 | 1961 | { |
1963 | 1962 | char *p = buffer; |
1964 | 1963 | int l; |
1965 | - int nid; | |
1966 | 1964 | nodemask_t nodes; |
1967 | 1965 | unsigned short mode; |
1968 | 1966 | unsigned short flags = pol ? pol->flags : 0; |
1969 | 1967 | |
... | ... | @@ -1979,11 +1977,10 @@ |
1979 | 1977 | |
1980 | 1978 | case MPOL_PREFERRED: |
1981 | 1979 | nodes_clear(nodes); |
1982 | - nid = pol->v.preferred_node; | |
1983 | - if (nid < 0) | |
1980 | + if (flags & MPOL_F_LOCAL) | |
1984 | 1981 | mode = MPOL_LOCAL; /* pseudo-policy */ |
1985 | 1982 | else |
1986 | - node_set(nid, nodes); | |
1983 | + node_set(pol->v.preferred_node, nodes); | |
1987 | 1984 | break; |
1988 | 1985 | |
1989 | 1986 | case MPOL_BIND: |
... | ... | @@ -2004,7 +2001,7 @@ |
2004 | 2001 | strcpy(p, policy_types[mode]); |
2005 | 2002 | p += l; |
2006 | 2003 | |
2007 | - if (flags) { | |
2004 | + if (flags & MPOL_MODE_FLAGS) { | |
2008 | 2005 | int need_bar = 0; |
2009 | 2006 | |
2010 | 2007 | if (buffer + maxlen < p + 2) |