Commit fc36b8d3d819047eb4d23ca079fb4d3af20ff076

Authored by Lee Schermerhorn
Committed by Linus Torvalds
1 parent 53f2556b67

mempolicy: use MPOL_F_LOCAL to Indicate Preferred Local Policy

Now that we're using "preferred local" policy for system default, we need to
make this as fast as possible.  Because of the variable size of the mempolicy
structure [based on size of nodemasks], the preferred_node may be in a
different cacheline from the mode.  This can result in accessing an extra
cacheline in the normal case of system default policy.  Suspect this is the
cause of an observed 2-3% slowdown in page fault testing relative to kernel
without this patch series.

To alleviate this, use an internal mode flag, MPOL_F_LOCAL in the mempolicy
flags member which is guaranteed [?] to be in the same cacheline as the mode
itself.

Verified that reworked mempolicy now performs slightly better on 25-rc8-mm1
for both anon and shmem segments with system default and vma [preferred local]
policy.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 28 additions and 31 deletions Side-by-side Diff

Documentation/vm/numa_memory_policy.txt
... ... @@ -176,12 +176,11 @@
176 176 containing the cpu where the allocation takes place.
177 177  
178 178 Internally, the Preferred policy uses a single node--the
179   - preferred_node member of struct mempolicy. A "distinguished
180   - value of this preferred_node, currently '-1', is interpreted
181   - as "the node containing the cpu where the allocation takes
182   - place"--local allocation. "Local" allocation policy can be
183   - viewed as a Preferred policy that starts at the node containing
184   - the cpu where the allocation takes place.
  179 + preferred_node member of struct mempolicy. When the internal
  180 + mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
  181 + the policy is interpreted as local allocation. "Local" allocation
  182 + policy can be viewed as a Preferred policy that starts at the node
  183 + containing the cpu where the allocation takes place.
185 184  
186 185 It is possible for the user to specify that local allocation is
187 186 always preferred by passing an empty nodemask with this mode.
include/linux/mempolicy.h
... ... @@ -50,6 +50,7 @@
50 50 * are never OR'ed into the mode in mempolicy API arguments.
51 51 */
52 52 #define MPOL_F_SHARED (1 << 0) /* identify shared policies */
  53 +#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
53 54  
54 55 #ifdef __KERNEL__
55 56  
... ... @@ -110,7 +110,7 @@
110 110 struct mempolicy default_policy = {
111 111 .refcnt = ATOMIC_INIT(1), /* never free it */
112 112 .mode = MPOL_PREFERRED,
113   - .v = { .preferred_node = -1 },
  113 + .flags = MPOL_F_LOCAL,
114 114 };
115 115  
116 116 static const struct mempolicy_operations {
... ... @@ -163,7 +163,7 @@
163 163 static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
164 164 {
165 165 if (!nodes)
166   - pol->v.preferred_node = -1; /* local allocation */
  166 + pol->flags |= MPOL_F_LOCAL; /* local allocation */
167 167 else if (nodes_empty(*nodes))
168 168 return -EINVAL; /* no allowed nodes */
169 169 else
170 170  
171 171  
... ... @@ -290,14 +290,15 @@
290 290 if (pol->flags & MPOL_F_STATIC_NODES) {
291 291 int node = first_node(pol->w.user_nodemask);
292 292  
293   - if (node_isset(node, *nodes))
  293 + if (node_isset(node, *nodes)) {
294 294 pol->v.preferred_node = node;
295   - else
296   - pol->v.preferred_node = -1;
  295 + pol->flags &= ~MPOL_F_LOCAL;
  296 + } else
  297 + pol->flags |= MPOL_F_LOCAL;
297 298 } else if (pol->flags & MPOL_F_RELATIVE_NODES) {
298 299 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
299 300 pol->v.preferred_node = first_node(tmp);
300   - } else if (pol->v.preferred_node != -1) {
  301 + } else if (!(pol->flags & MPOL_F_LOCAL)) {
301 302 pol->v.preferred_node = node_remap(pol->v.preferred_node,
302 303 pol->w.cpuset_mems_allowed,
303 304 *nodes);
... ... @@ -645,7 +646,7 @@
645 646 *nodes = p->v.nodes;
646 647 break;
647 648 case MPOL_PREFERRED:
648   - if (p->v.preferred_node >= 0)
  649 + if (!(p->flags & MPOL_F_LOCAL))
649 650 node_set(p->v.preferred_node, *nodes);
650 651 /* else return empty node mask for local allocation */
651 652 break;
652 653  
... ... @@ -1324,13 +1325,12 @@
1324 1325 /* Return a zonelist indicated by gfp for node representing a mempolicy */
1325 1326 static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
1326 1327 {
1327   - int nd;
  1328 + int nd = numa_node_id();
1328 1329  
1329 1330 switch (policy->mode) {
1330 1331 case MPOL_PREFERRED:
1331   - nd = policy->v.preferred_node;
1332   - if (nd < 0)
1333   - nd = numa_node_id();
  1332 + if (!(policy->flags & MPOL_F_LOCAL))
  1333 + nd = policy->v.preferred_node;
1334 1334 break;
1335 1335 case MPOL_BIND:
1336 1336 /*
1337 1337  
1338 1338  
... ... @@ -1339,16 +1339,13 @@
1339 1339 * current node is part of the mask, we use the zonelist for
1340 1340 * the first node in the mask instead.
1341 1341 */
1342   - nd = numa_node_id();
1343 1342 if (unlikely(gfp & __GFP_THISNODE) &&
1344 1343 unlikely(!node_isset(nd, policy->v.nodes)))
1345 1344 nd = first_node(policy->v.nodes);
1346 1345 break;
1347 1346 case MPOL_INTERLEAVE: /* should not happen */
1348   - nd = numa_node_id();
1349 1347 break;
1350 1348 default:
1351   - nd = 0;
1352 1349 BUG();
1353 1350 }
1354 1351 return node_zonelist(nd, gfp);
1355 1352  
... ... @@ -1379,14 +1376,15 @@
1379 1376 */
1380 1377 unsigned slab_node(struct mempolicy *policy)
1381 1378 {
1382   - if (!policy)
  1379 + if (!policy || policy->flags & MPOL_F_LOCAL)
1383 1380 return numa_node_id();
1384 1381  
1385 1382 switch (policy->mode) {
1386 1383 case MPOL_PREFERRED:
1387   - if (unlikely(policy->v.preferred_node >= 0))
1388   - return policy->v.preferred_node;
1389   - return numa_node_id();
  1384 + /*
  1385 + * handled MPOL_F_LOCAL above
  1386 + */
  1387 + return policy->v.preferred_node;
1390 1388  
1391 1389 case MPOL_INTERLEAVE:
1392 1390 return interleave_nodes(policy);
... ... @@ -1666,7 +1664,8 @@
1666 1664 case MPOL_INTERLEAVE:
1667 1665 return nodes_equal(a->v.nodes, b->v.nodes);
1668 1666 case MPOL_PREFERRED:
1669   - return a->v.preferred_node == b->v.preferred_node;
  1667 + return a->v.preferred_node == b->v.preferred_node &&
  1668 + a->flags == b->flags;
1670 1669 default:
1671 1670 BUG();
1672 1671 return 0;
... ... @@ -1946,7 +1945,7 @@
1946 1945 }
1947 1946  
1948 1947 /*
1949   - * "local" is pseudo-policy: MPOL_PREFERRED with preferred_node == -1
  1948 + * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag
1950 1949 * Used only for mpol_to_str()
1951 1950 */
1952 1951 #define MPOL_LOCAL (MPOL_INTERLEAVE + 1)
... ... @@ -1962,7 +1961,6 @@
1962 1961 {
1963 1962 char *p = buffer;
1964 1963 int l;
1965   - int nid;
1966 1964 nodemask_t nodes;
1967 1965 unsigned short mode;
1968 1966 unsigned short flags = pol ? pol->flags : 0;
1969 1967  
... ... @@ -1979,11 +1977,10 @@
1979 1977  
1980 1978 case MPOL_PREFERRED:
1981 1979 nodes_clear(nodes);
1982   - nid = pol->v.preferred_node;
1983   - if (nid < 0)
  1980 + if (flags & MPOL_F_LOCAL)
1984 1981 mode = MPOL_LOCAL; /* pseudo-policy */
1985 1982 else
1986   - node_set(nid, nodes);
  1983 + node_set(pol->v.preferred_node, nodes);
1987 1984 break;
1988 1985  
1989 1986 case MPOL_BIND:
... ... @@ -2004,7 +2001,7 @@
2004 2001 strcpy(p, policy_types[mode]);
2005 2002 p += l;
2006 2003  
2007   - if (flags) {
  2004 + if (flags & MPOL_MODE_FLAGS) {
2008 2005 int need_bar = 0;
2009 2006  
2010 2007 if (buffer + maxlen < p + 2)