Commit dd1a239f6f2d4d3eedd318583ec319aa145b324c

Authored by Mel Gorman
Committed by Linus Torvalds
1 parent 54a6eb5c47

mm: have zonelist contains structs with both a zone pointer and zone_idx

Filtering zonelists requires very frequent use of zone_idx().  This is costly
as it involves a lookup of another structure and a substraction operation.  As
the zone_idx is often required, it should be quickly accessible.  The node idx
could also be stored here if it was found that accessing zone->node is
significant which may be the case on workloads where nodemasks are heavily
used.

This patch introduces a struct zoneref to store a zone pointer and a zone
index.  The zonelist then consists of an array of these struct zonerefs which
are looked up as necessary.  Helpers are given for accessing the zone index as
well as the node index.

[kamezawa.hiroyu@jp.fujitsu.com: Suggested struct zoneref instead of embedding information in pointers]
[hugh@veritas.com: mm-have-zonelist: fix memcg ooms]
[hugh@veritas.com: just return do_try_to_free_pages]
[hugh@veritas.com: do_try_to_free_pages gfp_mask redundant]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 12 changed files with 158 additions and 100 deletions Side-by-side Diff

arch/parisc/mm/init.c
... ... @@ -608,7 +608,7 @@
608 608 for (i = 0; i < npmem_ranges; i++) {
609 609 zl = node_zonelist(i);
610 610 for (j = 0; j < MAX_NR_ZONES; j++) {
611   - struct zone **z;
  611 + struct zoneref *z;
612 612 struct zone *zone;
613 613  
614 614 printk("Zone list for zone %d on node %d: ", j, i);
... ... @@ -360,16 +360,16 @@
360 360 */
361 361 static void free_more_memory(void)
362 362 {
363   - struct zone **zones;
  363 + struct zoneref *zrefs;
364 364 int nid;
365 365  
366 366 wakeup_pdflush(1024);
367 367 yield();
368 368  
369 369 for_each_online_node(nid) {
370   - zones = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
  370 + zrefs = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
371 371 gfp_zone(GFP_NOFS));
372   - if (*zones)
  372 + if (zrefs->zone)
373 373 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
374 374 GFP_NOFS);
375 375 }
include/linux/mmzone.h
... ... @@ -469,6 +469,15 @@
469 469 #endif
470 470  
471 471 /*
  472 + * This struct contains information about a zone in a zonelist. It is stored
  473 + * here to avoid dereferences into large structures and lookups of tables
  474 + */
  475 +struct zoneref {
  476 + struct zone *zone; /* Pointer to actual zone */
  477 + int zone_idx; /* zone_idx(zoneref->zone) */
  478 +};
  479 +
  480 +/*
472 481 * One allocation request operates on a zonelist. A zonelist
473 482 * is a list of zones, the first one is the 'goal' of the
474 483 * allocation, the other zones are fallback zones, in decreasing
475 484  
476 485  
... ... @@ -476,11 +485,18 @@
476 485 *
477 486 * If zlcache_ptr is not NULL, then it is just the address of zlcache,
478 487 * as explained above. If zlcache_ptr is NULL, there is no zlcache.
  488 + * *
  489 + * To speed the reading of the zonelist, the zonerefs contain the zone index
  490 + * of the entry being read. Helper functions to access information given
  491 + * a struct zoneref are
  492 + *
  493 + * zonelist_zone() - Return the struct zone * for an entry in _zonerefs
  494 + * zonelist_zone_idx() - Return the index of the zone for an entry
  495 + * zonelist_node_idx() - Return the index of the node for an entry
479 496 */
480   -
481 497 struct zonelist {
482 498 struct zonelist_cache *zlcache_ptr; // NULL or &zlcache
483   - struct zone *zones[MAX_ZONES_PER_ZONELIST + 1]; // NULL delimited
  499 + struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
484 500 #ifdef CONFIG_NUMA
485 501 struct zonelist_cache zlcache; // optional ...
486 502 #endif
487 503  
488 504  
489 505  
490 506  
491 507  
... ... @@ -713,26 +729,52 @@
713 729 zone; \
714 730 zone = next_zone(zone))
715 731  
  732 +static inline struct zone *zonelist_zone(struct zoneref *zoneref)
  733 +{
  734 + return zoneref->zone;
  735 +}
  736 +
  737 +static inline int zonelist_zone_idx(struct zoneref *zoneref)
  738 +{
  739 + return zoneref->zone_idx;
  740 +}
  741 +
  742 +static inline int zonelist_node_idx(struct zoneref *zoneref)
  743 +{
  744 +#ifdef CONFIG_NUMA
  745 + /* zone_to_nid not available in this context */
  746 + return zoneref->zone->node;
  747 +#else
  748 + return 0;
  749 +#endif /* CONFIG_NUMA */
  750 +}
  751 +
  752 +static inline void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
  753 +{
  754 + zoneref->zone = zone;
  755 + zoneref->zone_idx = zone_idx(zone);
  756 +}
  757 +
716 758 /* Returns the first zone at or below highest_zoneidx in a zonelist */
717   -static inline struct zone **first_zones_zonelist(struct zonelist *zonelist,
  759 +static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
718 760 enum zone_type highest_zoneidx)
719 761 {
720   - struct zone **z;
  762 + struct zoneref *z;
721 763  
722 764 /* Find the first suitable zone to use for the allocation */
723   - z = zonelist->zones;
724   - while (*z && zone_idx(*z) > highest_zoneidx)
  765 + z = zonelist->_zonerefs;
  766 + while (zonelist_zone_idx(z) > highest_zoneidx)
725 767 z++;
726 768  
727 769 return z;
728 770 }
729 771  
730 772 /* Returns the next zone at or below highest_zoneidx in a zonelist */
731   -static inline struct zone **next_zones_zonelist(struct zone **z,
  773 +static inline struct zoneref *next_zones_zonelist(struct zoneref *z,
732 774 enum zone_type highest_zoneidx)
733 775 {
734 776 /* Find the next suitable zone to use for the allocation */
735   - while (*z && zone_idx(*z) > highest_zoneidx)
  777 + while (zonelist_zone_idx(z) > highest_zoneidx)
736 778 z++;
737 779  
738 780 return z;
739 781  
... ... @@ -748,9 +790,11 @@
748 790 * This iterator iterates though all zones at or below a given zone index.
749 791 */
750 792 #define for_each_zone_zonelist(zone, z, zlist, highidx) \
751   - for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \
  793 + for (z = first_zones_zonelist(zlist, highidx), \
  794 + zone = zonelist_zone(z++); \
752 795 zone; \
753   - z = next_zones_zonelist(z, highidx), zone = *z++)
  796 + z = next_zones_zonelist(z, highidx), \
  797 + zone = zonelist_zone(z++))
754 798  
755 799 #ifdef CONFIG_SPARSEMEM
756 800 #include <asm/sparsemem.h>
... ... @@ -23,8 +23,8 @@
23 23 CONSTRAINT_MEMORY_POLICY,
24 24 };
25 25  
26   -extern int try_set_zone_oom(struct zonelist *zonelist);
27   -extern void clear_zonelist_oom(struct zonelist *zonelist);
  26 +extern int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_flags);
  27 +extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
28 28  
29 29 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
30 30 extern int register_oom_notifier(struct notifier_block *nb);
... ... @@ -1967,8 +1967,8 @@
1967 1967 {
1968 1968 int i;
1969 1969  
1970   - for (i = 0; zl->zones[i]; i++) {
1971   - int nid = zone_to_nid(zl->zones[i]);
  1970 + for (i = 0; zl->_zonerefs[i].zone; i++) {
  1971 + int nid = zonelist_node_idx(&zl->_zonerefs[i]);
1972 1972  
1973 1973 if (node_isset(nid, current->mems_allowed))
1974 1974 return 1;
... ... @@ -97,7 +97,8 @@
97 97 struct mempolicy *mpol;
98 98 struct zonelist *zonelist = huge_zonelist(vma, address,
99 99 htlb_alloc_mask, &mpol);
100   - struct zone *zone, **z;
  100 + struct zone *zone;
  101 + struct zoneref *z;
101 102  
102 103 for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) {
103 104 nid = zone_to_nid(zone);
... ... @@ -186,7 +186,7 @@
186 186 for_each_node_mask(nd, *nodes) {
187 187 struct zone *z = &NODE_DATA(nd)->node_zones[k];
188 188 if (z->present_pages > 0)
189   - zl->zones[num++] = z;
  189 + zoneref_set_zone(z, &zl->_zonerefs[num++]);
190 190 }
191 191 if (k == 0)
192 192 break;
... ... @@ -196,7 +196,8 @@
196 196 kfree(zl);
197 197 return ERR_PTR(-EINVAL);
198 198 }
199   - zl->zones[num] = NULL;
  199 + zl->_zonerefs[num].zone = NULL;
  200 + zl->_zonerefs[num].zone_idx = 0;
200 201 return zl;
201 202 }
202 203  
... ... @@ -504,9 +505,11 @@
504 505 nodes_clear(*nodes);
505 506 switch (p->policy) {
506 507 case MPOL_BIND:
507   - for (i = 0; p->v.zonelist->zones[i]; i++)
508   - node_set(zone_to_nid(p->v.zonelist->zones[i]),
509   - *nodes);
  508 + for (i = 0; p->v.zonelist->_zonerefs[i].zone; i++) {
  509 + struct zoneref *zref;
  510 + zref = &p->v.zonelist->_zonerefs[i];
  511 + node_set(zonelist_node_idx(zref), *nodes);
  512 + }
510 513 break;
511 514 case MPOL_DEFAULT:
512 515 break;
513 516  
... ... @@ -1212,12 +1215,13 @@
1212 1215 case MPOL_INTERLEAVE:
1213 1216 return interleave_nodes(policy);
1214 1217  
1215   - case MPOL_BIND:
  1218 + case MPOL_BIND: {
1216 1219 /*
1217 1220 * Follow bind policy behavior and start allocation at the
1218 1221 * first node.
1219 1222 */
1220   - return zone_to_nid(policy->v.zonelist->zones[0]);
  1223 + return zonelist_node_idx(policy->v.zonelist->_zonerefs);
  1224 + }
1221 1225  
1222 1226 case MPOL_PREFERRED:
1223 1227 if (policy->v.preferred_node >= 0)
... ... @@ -1323,7 +1327,7 @@
1323 1327  
1324 1328 zl = node_zonelist(nid, gfp);
1325 1329 page = __alloc_pages(gfp, order, zl);
1326   - if (page && page_zone(page) == zl->zones[0])
  1330 + if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
1327 1331 inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
1328 1332 return page;
1329 1333 }
1330 1334  
... ... @@ -1463,10 +1467,14 @@
1463 1467 return a->v.preferred_node == b->v.preferred_node;
1464 1468 case MPOL_BIND: {
1465 1469 int i;
1466   - for (i = 0; a->v.zonelist->zones[i]; i++)
1467   - if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i])
  1470 + for (i = 0; a->v.zonelist->_zonerefs[i].zone; i++) {
  1471 + struct zone *za, *zb;
  1472 + za = zonelist_zone(&a->v.zonelist->_zonerefs[i]);
  1473 + zb = zonelist_zone(&b->v.zonelist->_zonerefs[i]);
  1474 + if (za != zb)
1468 1475 return 0;
1469   - return b->v.zonelist->zones[i] == NULL;
  1476 + }
  1477 + return b->v.zonelist->_zonerefs[i].zone == NULL;
1470 1478 }
1471 1479 default:
1472 1480 BUG();
1473 1481  
... ... @@ -1785,12 +1793,12 @@
1785 1793 break;
1786 1794 case MPOL_BIND: {
1787 1795 nodemask_t nodes;
1788   - struct zone **z;
  1796 + struct zoneref *z;
1789 1797 struct zonelist *zonelist;
1790 1798  
1791 1799 nodes_clear(nodes);
1792   - for (z = pol->v.zonelist->zones; *z; z++)
1793   - node_set(zone_to_nid(*z), nodes);
  1800 + for (z = pol->v.zonelist->_zonerefs; z->zone; z++)
  1801 + node_set(zonelist_node_idx(z), nodes);
1794 1802 nodes_remap(tmp, nodes, *mpolmask, *newmask);
1795 1803 nodes = tmp;
1796 1804  
... ... @@ -176,7 +176,7 @@
176 176 {
177 177 #ifdef CONFIG_NUMA
178 178 struct zone *zone;
179   - struct zone **z;
  179 + struct zoneref *z;
180 180 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
181 181 nodemask_t nodes = node_states[N_HIGH_MEMORY];
182 182  
183 183  
184 184  
185 185  
186 186  
187 187  
... ... @@ -462,29 +462,29 @@
462 462 * if a parallel OOM killing is already taking place that includes a zone in
463 463 * the zonelist. Otherwise, locks all zones in the zonelist and returns 1.
464 464 */
465   -int try_set_zone_oom(struct zonelist *zonelist)
  465 +int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask)
466 466 {
467   - struct zone **z;
  467 + struct zoneref *z;
  468 + struct zone *zone;
468 469 int ret = 1;
469 470  
470   - z = zonelist->zones;
471   -
472 471 spin_lock(&zone_scan_mutex);
473   - do {
474   - if (zone_is_oom_locked(*z)) {
  472 + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
  473 + if (zone_is_oom_locked(zone)) {
475 474 ret = 0;
476 475 goto out;
477 476 }
478   - } while (*(++z) != NULL);
  477 + }
479 478  
480   - /*
481   - * Lock each zone in the zonelist under zone_scan_mutex so a parallel
482   - * invocation of try_set_zone_oom() doesn't succeed when it shouldn't.
483   - */
484   - z = zonelist->zones;
485   - do {
486   - zone_set_flag(*z, ZONE_OOM_LOCKED);
487   - } while (*(++z) != NULL);
  479 + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
  480 + /*
  481 + * Lock each zone in the zonelist under zone_scan_mutex so a
  482 + * parallel invocation of try_set_zone_oom() doesn't succeed
  483 + * when it shouldn't.
  484 + */
  485 + zone_set_flag(zone, ZONE_OOM_LOCKED);
  486 + }
  487 +
488 488 out:
489 489 spin_unlock(&zone_scan_mutex);
490 490 return ret;
491 491  
492 492  
493 493  
... ... @@ -495,16 +495,15 @@
495 495 * allocation attempts with zonelists containing them may now recall the OOM
496 496 * killer, if necessary.
497 497 */
498   -void clear_zonelist_oom(struct zonelist *zonelist)
  498 +void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
499 499 {
500   - struct zone **z;
  500 + struct zoneref *z;
  501 + struct zone *zone;
501 502  
502   - z = zonelist->zones;
503   -
504 503 spin_lock(&zone_scan_mutex);
505   - do {
506   - zone_clear_flag(*z, ZONE_OOM_LOCKED);
507   - } while (*(++z) != NULL);
  504 + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
  505 + zone_clear_flag(zone, ZONE_OOM_LOCKED);
  506 + }
508 507 spin_unlock(&zone_scan_mutex);
509 508 }
510 509  
... ... @@ -1317,7 +1317,7 @@
1317 1317 * We are low on memory in the second scan, and should leave no stone
1318 1318 * unturned looking for a free page.
1319 1319 */
1320   -static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
  1320 +static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
1321 1321 nodemask_t *allowednodes)
1322 1322 {
1323 1323 struct zonelist_cache *zlc; /* cached zonelist speedup info */
... ... @@ -1328,7 +1328,7 @@
1328 1328 if (!zlc)
1329 1329 return 1;
1330 1330  
1331   - i = z - zonelist->zones;
  1331 + i = z - zonelist->_zonerefs;
1332 1332 n = zlc->z_to_n[i];
1333 1333  
1334 1334 /* This zone is worth trying if it is allowed but not full */
... ... @@ -1340,7 +1340,7 @@
1340 1340 * zlc->fullzones, so that subsequent attempts to allocate a page
1341 1341 * from that zone don't waste time re-examining it.
1342 1342 */
1343   -static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
  1343 +static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
1344 1344 {
1345 1345 struct zonelist_cache *zlc; /* cached zonelist speedup info */
1346 1346 int i; /* index of *z in zonelist zones */
... ... @@ -1349,7 +1349,7 @@
1349 1349 if (!zlc)
1350 1350 return;
1351 1351  
1352   - i = z - zonelist->zones;
  1352 + i = z - zonelist->_zonerefs;
1353 1353  
1354 1354 set_bit(i, zlc->fullzones);
1355 1355 }
1356 1356  
... ... @@ -1361,13 +1361,13 @@
1361 1361 return NULL;
1362 1362 }
1363 1363  
1364   -static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
  1364 +static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
1365 1365 nodemask_t *allowednodes)
1366 1366 {
1367 1367 return 1;
1368 1368 }
1369 1369  
1370   -static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
  1370 +static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
1371 1371 {
1372 1372 }
1373 1373 #endif /* CONFIG_NUMA */
... ... @@ -1380,7 +1380,7 @@
1380 1380 get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
1381 1381 struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
1382 1382 {
1383   - struct zone **z;
  1383 + struct zoneref *z;
1384 1384 struct page *page = NULL;
1385 1385 int classzone_idx;
1386 1386 struct zone *zone, *preferred_zone;
... ... @@ -1389,8 +1389,8 @@
1389 1389 int did_zlc_setup = 0; /* just call zlc_setup() one time */
1390 1390  
1391 1391 z = first_zones_zonelist(zonelist, high_zoneidx);
1392   - classzone_idx = zone_idx(*z);
1393   - preferred_zone = *z;
  1392 + classzone_idx = zonelist_zone_idx(z);
  1393 + preferred_zone = zonelist_zone(z);
1394 1394  
1395 1395 zonelist_scan:
1396 1396 /*
... ... @@ -1453,7 +1453,8 @@
1453 1453 {
1454 1454 const gfp_t wait = gfp_mask & __GFP_WAIT;
1455 1455 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
1456   - struct zone **z;
  1456 + struct zoneref *z;
  1457 + struct zone *zone;
1457 1458 struct page *page;
1458 1459 struct reclaim_state reclaim_state;
1459 1460 struct task_struct *p = current;
1460 1461  
... ... @@ -1467,9 +1468,9 @@
1467 1468 return NULL;
1468 1469  
1469 1470 restart:
1470   - z = zonelist->zones; /* the list of zones suitable for gfp_mask */
  1471 + z = zonelist->_zonerefs; /* the list of zones suitable for gfp_mask */
1471 1472  
1472   - if (unlikely(*z == NULL)) {
  1473 + if (unlikely(!z->zone)) {
1473 1474 /*
1474 1475 * Happens if we have an empty zonelist as a result of
1475 1476 * GFP_THISNODE being used on a memoryless node
... ... @@ -1493,8 +1494,8 @@
1493 1494 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
1494 1495 goto nopage;
1495 1496  
1496   - for (z = zonelist->zones; *z; z++)
1497   - wakeup_kswapd(*z, order);
  1497 + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
  1498 + wakeup_kswapd(zone, order);
1498 1499  
1499 1500 /*
1500 1501 * OK, we're below the kswapd watermark and have kicked background
... ... @@ -1575,7 +1576,7 @@
1575 1576 if (page)
1576 1577 goto got_pg;
1577 1578 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
1578   - if (!try_set_zone_oom(zonelist)) {
  1579 + if (!try_set_zone_oom(zonelist, gfp_mask)) {
1579 1580 schedule_timeout_uninterruptible(1);
1580 1581 goto restart;
1581 1582 }
1582 1583  
1583 1584  
... ... @@ -1589,18 +1590,18 @@
1589 1590 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
1590 1591 zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
1591 1592 if (page) {
1592   - clear_zonelist_oom(zonelist);
  1593 + clear_zonelist_oom(zonelist, gfp_mask);
1593 1594 goto got_pg;
1594 1595 }
1595 1596  
1596 1597 /* The OOM killer will not help higher order allocs so fail */
1597 1598 if (order > PAGE_ALLOC_COSTLY_ORDER) {
1598   - clear_zonelist_oom(zonelist);
  1599 + clear_zonelist_oom(zonelist, gfp_mask);
1599 1600 goto nopage;
1600 1601 }
1601 1602  
1602 1603 out_of_memory(zonelist, gfp_mask, order);
1603   - clear_zonelist_oom(zonelist);
  1604 + clear_zonelist_oom(zonelist, gfp_mask);
1604 1605 goto restart;
1605 1606 }
1606 1607  
... ... @@ -1702,7 +1703,7 @@
1702 1703  
1703 1704 static unsigned int nr_free_zone_pages(int offset)
1704 1705 {
1705   - struct zone **z;
  1706 + struct zoneref *z;
1706 1707 struct zone *zone;
1707 1708  
1708 1709 /* Just pick one node, since fallback list is circular */
... ... @@ -1896,7 +1897,8 @@
1896 1897 zone_type--;
1897 1898 zone = pgdat->node_zones + zone_type;
1898 1899 if (populated_zone(zone)) {
1899   - zonelist->zones[nr_zones++] = zone;
  1900 + zoneref_set_zone(zone,
  1901 + &zonelist->_zonerefs[nr_zones++]);
1900 1902 check_highest_zone(zone_type);
1901 1903 }
1902 1904  
1903 1905  
... ... @@ -2072,11 +2074,12 @@
2072 2074 struct zonelist *zonelist;
2073 2075  
2074 2076 zonelist = &pgdat->node_zonelists[0];
2075   - for (j = 0; zonelist->zones[j] != NULL; j++)
  2077 + for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
2076 2078 ;
2077 2079 j = build_zonelists_node(NODE_DATA(node), zonelist, j,
2078 2080 MAX_NR_ZONES - 1);
2079   - zonelist->zones[j] = NULL;
  2081 + zonelist->_zonerefs[j].zone = NULL;
  2082 + zonelist->_zonerefs[j].zone_idx = 0;
2080 2083 }
2081 2084  
2082 2085 /*
... ... @@ -2089,7 +2092,8 @@
2089 2092  
2090 2093 zonelist = &pgdat->node_zonelists[1];
2091 2094 j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
2092   - zonelist->zones[j] = NULL;
  2095 + zonelist->_zonerefs[j].zone = NULL;
  2096 + zonelist->_zonerefs[j].zone_idx = 0;
2093 2097 }
2094 2098  
2095 2099 /*
2096 2100  
... ... @@ -2114,12 +2118,14 @@
2114 2118 node = node_order[j];
2115 2119 z = &NODE_DATA(node)->node_zones[zone_type];
2116 2120 if (populated_zone(z)) {
2117   - zonelist->zones[pos++] = z;
  2121 + zoneref_set_zone(z,
  2122 + &zonelist->_zonerefs[pos++]);
2118 2123 check_highest_zone(zone_type);
2119 2124 }
2120 2125 }
2121 2126 }
2122   - zonelist->zones[pos] = NULL;
  2127 + zonelist->_zonerefs[pos].zone = NULL;
  2128 + zonelist->_zonerefs[pos].zone_idx = 0;
2123 2129 }
2124 2130  
2125 2131 static int default_zonelist_order(void)
... ... @@ -2196,7 +2202,8 @@
2196 2202 /* initialize zonelists */
2197 2203 for (i = 0; i < MAX_ZONELISTS; i++) {
2198 2204 zonelist = pgdat->node_zonelists + i;
2199   - zonelist->zones[0] = NULL;
  2205 + zonelist->_zonerefs[0].zone = NULL;
  2206 + zonelist->_zonerefs[0].zone_idx = 0;
2200 2207 }
2201 2208  
2202 2209 /* NUMA-aware ordering of nodes */
2203 2210  
... ... @@ -2248,13 +2255,13 @@
2248 2255 {
2249 2256 struct zonelist *zonelist;
2250 2257 struct zonelist_cache *zlc;
2251   - struct zone **z;
  2258 + struct zoneref *z;
2252 2259  
2253 2260 zonelist = &pgdat->node_zonelists[0];
2254 2261 zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
2255 2262 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
2256   - for (z = zonelist->zones; *z; z++)
2257   - zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z);
  2263 + for (z = zonelist->_zonerefs; z->zone; z++)
  2264 + zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
2258 2265 }
2259 2266  
2260 2267  
... ... @@ -2297,7 +2304,8 @@
2297 2304 MAX_NR_ZONES - 1);
2298 2305 }
2299 2306  
2300   - zonelist->zones[j] = NULL;
  2307 + zonelist->_zonerefs[j].zone = NULL;
  2308 + zonelist->_zonerefs[j].zone_idx = 0;
2301 2309 }
2302 2310  
2303 2311 /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
... ... @@ -3242,7 +3242,7 @@
3242 3242 {
3243 3243 struct zonelist *zonelist;
3244 3244 gfp_t local_flags;
3245   - struct zone **z;
  3245 + struct zoneref *z;
3246 3246 struct zone *zone;
3247 3247 enum zone_type high_zoneidx = gfp_zone(flags);
3248 3248 void *obj = NULL;
... ... @@ -1284,7 +1284,7 @@
1284 1284 {
1285 1285 #ifdef CONFIG_NUMA
1286 1286 struct zonelist *zonelist;
1287   - struct zone **z;
  1287 + struct zoneref *z;
1288 1288 struct zone *zone;
1289 1289 enum zone_type high_zoneidx = gfp_zone(flags);
1290 1290 struct page *page;
... ... @@ -1251,7 +1251,7 @@
1251 1251 {
1252 1252 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1253 1253 unsigned long nr_reclaimed = 0;
1254   - struct zone **z;
  1254 + struct zoneref *z;
1255 1255 struct zone *zone;
1256 1256  
1257 1257 sc->all_unreclaimable = 1;
... ... @@ -1301,7 +1301,7 @@
1301 1301 * allocation attempt will fail.
1302 1302 */
1303 1303 static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1304   - gfp_t gfp_mask, struct scan_control *sc)
  1304 + struct scan_control *sc)
1305 1305 {
1306 1306 int priority;
1307 1307 int ret = 0;
1308 1308  
... ... @@ -1309,9 +1309,9 @@
1309 1309 unsigned long nr_reclaimed = 0;
1310 1310 struct reclaim_state *reclaim_state = current->reclaim_state;
1311 1311 unsigned long lru_pages = 0;
1312   - struct zone **z;
  1312 + struct zoneref *z;
1313 1313 struct zone *zone;
1314   - enum zone_type high_zoneidx = gfp_zone(gfp_mask);
  1314 + enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1315 1315  
1316 1316 if (scan_global_lru(sc))
1317 1317 count_vm_event(ALLOCSTALL);
... ... @@ -1339,7 +1339,7 @@
1339 1339 * over limit cgroups
1340 1340 */
1341 1341 if (scan_global_lru(sc)) {
1342   - shrink_slab(sc->nr_scanned, gfp_mask, lru_pages);
  1342 + shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
1343 1343 if (reclaim_state) {
1344 1344 nr_reclaimed += reclaim_state->reclaimed_slab;
1345 1345 reclaim_state->reclaimed_slab = 0;
... ... @@ -1410,7 +1410,7 @@
1410 1410 .isolate_pages = isolate_pages_global,
1411 1411 };
1412 1412  
1413   - return do_try_to_free_pages(zonelist, gfp_mask, &sc);
  1413 + return do_try_to_free_pages(zonelist, &sc);
1414 1414 }
1415 1415  
1416 1416 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
... ... @@ -1419,7 +1419,6 @@
1419 1419 gfp_t gfp_mask)
1420 1420 {
1421 1421 struct scan_control sc = {
1422   - .gfp_mask = gfp_mask,
1423 1422 .may_writepage = !laptop_mode,
1424 1423 .may_swap = 1,
1425 1424 .swap_cluster_max = SWAP_CLUSTER_MAX,
1426 1425  
... ... @@ -1429,12 +1428,11 @@
1429 1428 .isolate_pages = mem_cgroup_isolate_pages,
1430 1429 };
1431 1430 struct zonelist *zonelist;
1432   - int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE);
1433 1431  
1434   - zonelist = &NODE_DATA(numa_node_id())->node_zonelists[target_zone];
1435   - if (do_try_to_free_pages(zonelist, sc.gfp_mask, &sc))
1436   - return 1;
1437   - return 0;
  1432 + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
  1433 + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
  1434 + zonelist = NODE_DATA(numa_node_id())->node_zonelists;
  1435 + return do_try_to_free_pages(zonelist, &sc);
1438 1436 }
1439 1437 #endif
1440 1438