mm: rearrange zone fields into read-only, page alloc, statistics and page reclaim lines

commit 3484b2de9499df23c4604a513b36f96326ae81ad upstream. The arrangement of struct zone has changed over time and now it has reached the point where there is some inappropriate sharing going on. On x86-64 for example o The zone->node field is shared with the zone lock and zone->node is accessed frequently from the page allocator due to the fair zone allocation policy. o span_seqlock is almost never used by shares a line with free_area o Some zone statistics share a cache line with the LRU lock so reclaim-intensive and allocator-intensive workloads can bounce the cache line on a stat update This patch rearranges struct zone to put read-only and read-mostly fields together and then splits the page allocator intensive fields, the zone statistics and the page reclaim intensive fields into their own cache lines. Note that the type of lowmem_reserve changes due to the watermark calculations being signed and avoiding a signed/unsigned conversion there. On the test configuration I used the overall size of struct zone shrunk by one cache line. On smaller machines, this is not likely to be noticable. However, on a 4-node NUMA machine running tiobench the system CPU overhead is reduced by this patch. 3.16.0-rc3 3.16.0-rc3 vanillarearrange-v5r9 User 746.94 759.78 System 65336.22 58350.98 Elapsed 27553.52 27282.02 Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Jiri Slaby <jslaby@suse.cz>

mm: rearrange zone fields into read-only, page alloc, statistics and page reclaim lines
commit 3484b2de9499df23c4604a513b36f96326ae81ad upstream. The arrangement of struct zone has changed over time and now it has reached the point where there is some inappropriate sharing going on. On x86-64 for example o The zone->node field is shared with the zone lock and zone->node is accessed frequently from the page allocator due to the fair zone allocation policy. o span_seqlock is almost never used by shares a line with free_area o Some zone statistics share a cache line with the LRU lock so reclaim-intensive and allocator-intensive workloads can bounce the cache line on a stat update This patch rearranges struct zone to put read-only and read-mostly fields together and then splits the page allocator intensive fields, the zone statistics and the page reclaim intensive fields into their own cache lines. Note that the type of lowmem_reserve changes due to the watermark calculations being signed and avoiding a signed/unsigned conversion there. On the test configuration I used the overall size of struct zone shrunk by one cache line. On smaller machines, this is not likely to be noticable. However, on a 4-node NUMA machine running tiobench the system CPU overhead is reduced by this patch. 3.16.0-rc3 3.16.0-rc3 vanillarearrange-v5r9 User 746.94 759.78 System 65336.22 58350.98 Elapsed 27553.52 27282.02 Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Mel Gorman · Jiri Slaby
1 parent 59d4b11371
Showing 3 changed files with 110 additions and 106 deletions Side-by-side Diff
include/linux/mmzone.h
mm/page_alloc.c
mm/vmstat.c
@@ -321,19 +321,12 @@
 #ifndef __GENERATING_BOUNDS_H
  
 struct zone {
-	/* Fields commonly accessed by the page allocator */
+	/* Read-mostly fields */
  
 	/* zone watermarks, access with *_wmark_pages(zone) macros */
 	unsigned long watermark[NR_WMARK];
  
 	/*
-	 * When free pages are below this point, additional steps are taken
-	 * when reading the number of free pages to avoid per-cpu counter
-	 * drift allowing watermarks to be breached
-	 */
-	unsigned long percpu_drift_mark;
-
-	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
 	 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
  
  
  
  
  
  
  
  
@@ -341,42 +334,27 @@
 	 * on the higher zones). This array is recalculated at runtime if the
 	 * sysctl_lowmem_reserve_ratio sysctl changes.
 	 */
-	unsigned long		lowmem_reserve[MAX_NR_ZONES];
+	long lowmem_reserve[MAX_NR_ZONES];
  
-	/*
-	 * This is a per-zone reserve of pages that should not be
-	 * considered dirtyable memory.
-	 */
-	unsigned long		dirty_balance_reserve;
-
 #ifdef CONFIG_NUMA
 	int node;
+#endif
+
 	/*
-	 * zone reclaim becomes active if more unmapped pages exist.
+	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+	 * this zone's LRU.  Maintained by the pageout code.
 	 */
-	unsigned long		min_unmapped_pages;
-	unsigned long		min_slab_pages;
-#endif
+	unsigned int inactive_ratio;
+
+	struct pglist_data	*zone_pgdat;
 	struct per_cpu_pageset __percpu *pageset;
+
 	/*
-	 * free areas of different sizes
+	 * This is a per-zone reserve of pages that should not be
+	 * considered dirtyable memory.
 	 */
-	spinlock_t		lock;
-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
-	/* Set to true when the PG_migrate_skip bits should be cleared */
-	bool			compact_blockskip_flush;
+	unsigned long		dirty_balance_reserve;
  
-	/* pfn where compaction free scanner should start */
-	unsigned long		compact_cached_free_pfn;
-	/* pfn where async and sync compaction migration scanner should start */
-	unsigned long		compact_cached_migrate_pfn[2];
-#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
-	/* see spanned/present_pages for more description */
-	seqlock_t		span_seqlock;
-#endif
-	struct free_area	free_area[MAX_ORDER];
-
 #ifndef CONFIG_SPARSEMEM
 	/*
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
  
  
  
@@ -385,71 +363,14 @@
 	unsigned long		*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
  
-#ifdef CONFIG_COMPACTION
+#ifdef CONFIG_NUMA
 	/*
-	 * On compaction failure, 1<<compact_defer_shift compactions
-	 * are skipped before trying again. The number attempted since
-	 * last failure is tracked with compact_considered.
+	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
-	unsigned int		compact_considered;
-	unsigned int		compact_defer_shift;
-	int			compact_order_failed;
-#endif
+	unsigned long		min_unmapped_pages;
+	unsigned long		min_slab_pages;
+#endif /* CONFIG_NUMA */
  
-	ZONE_PADDING(_pad1_)
-
-	/* Fields commonly accessed by the page reclaim scanner */
-	spinlock_t		lru_lock;
-	struct lruvec		lruvec;
-
-	unsigned long		pages_scanned;	   /* since last reclaim */
-	unsigned long		flags;		   /* zone flags, see below */
-
-	/* Zone statistics */
-	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
-
-	/*
-	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
-	 * this zone's LRU.  Maintained by the pageout code.
-	 */
-	unsigned int inactive_ratio;
-
-
-	ZONE_PADDING(_pad2_)
-	/* Rarely used or read-mostly fields */
-
-	/*
-	 * wait_table		-- the array holding the hash table
-	 * wait_table_hash_nr_entries	-- the size of the hash table array
-	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
-	 *
-	 * The purpose of all these is to keep track of the people
-	 * waiting for a page to become available and make them
-	 * runnable again when possible. The trouble is that this
-	 * consumes a lot of space, especially when so few things
-	 * wait on pages at a given time. So instead of using
-	 * per-page waitqueues, we use a waitqueue hash table.
-	 *
-	 * The bucket discipline is to sleep on the same queue when
-	 * colliding and wake all in that wait queue when removing.
-	 * When something wakes, it must check to be sure its page is
-	 * truly available, a la thundering herd. The cost of a
-	 * collision is great, but given the expected load of the
-	 * table, they should be so rare as to be outweighed by the
-	 * benefits from the saved space.
-	 *
-	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
-	 * primary users of these fields, and in mm/page_alloc.c
-	 * free_area_init_core() performs the initialization of them.
-	 */
-	wait_queue_head_t	* wait_table;
-	unsigned long		wait_table_hash_nr_entries;
-	unsigned long		wait_table_bits;
-
-	/*
-	 * Discontig memory support fields.
-	 */
-	struct pglist_data	*zone_pgdat;
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
 	unsigned long		zone_start_pfn;
  
  
  
  
  
  
@@ -495,20 +416,104 @@
 	 * adjust_managed_page_count() should be used instead of directly
 	 * touching zone->managed_pages and totalram_pages.
 	 */
+	unsigned long		managed_pages;
 	unsigned long		spanned_pages;
 	unsigned long		present_pages;
-	unsigned long		managed_pages;
  
+	const char		*name;
+
 	/*
 	 * Number of MIGRATE_RESEVE page block. To maintain for just
 	 * optimization. Protected by zone->lock.
 	 */
 	int			nr_migrate_reserve_block;
  
+#ifdef CONFIG_MEMORY_HOTPLUG
+	/* see spanned/present_pages for more description */
+	seqlock_t		span_seqlock;
+#endif
+
 	/*
-	 * rarely used fields:
+	 * wait_table		-- the array holding the hash table
+	 * wait_table_hash_nr_entries	-- the size of the hash table array
+	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
+	 *
+	 * The purpose of all these is to keep track of the people
+	 * waiting for a page to become available and make them
+	 * runnable again when possible. The trouble is that this
+	 * consumes a lot of space, especially when so few things
+	 * wait on pages at a given time. So instead of using
+	 * per-page waitqueues, we use a waitqueue hash table.
+	 *
+	 * The bucket discipline is to sleep on the same queue when
+	 * colliding and wake all in that wait queue when removing.
+	 * When something wakes, it must check to be sure its page is
+	 * truly available, a la thundering herd. The cost of a
+	 * collision is great, but given the expected load of the
+	 * table, they should be so rare as to be outweighed by the
+	 * benefits from the saved space.
+	 *
+	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+	 * primary users of these fields, and in mm/page_alloc.c
+	 * free_area_init_core() performs the initialization of them.
 	 */
-	const char		*name;
+	wait_queue_head_t	*wait_table;
+	unsigned long		wait_table_hash_nr_entries;
+	unsigned long		wait_table_bits;
+
+	ZONE_PADDING(_pad1_)
+
+	/* Write-intensive fields used from the page allocator */
+	spinlock_t		lock;
+
+	/* free areas of different sizes */
+	struct free_area	free_area[MAX_ORDER];
+
+	/* zone flags, see below */
+	unsigned long		flags;
+
+	ZONE_PADDING(_pad2_)
+
+	/* Write-intensive fields used by page reclaim */
+
+	/* Fields commonly accessed by the page reclaim scanner */
+	spinlock_t		lru_lock;
+	unsigned long		pages_scanned;	   /* since last reclaim */
+	struct lruvec		lruvec;
+
+	/*
+	 * When free pages are below this point, additional steps are taken
+	 * when reading the number of free pages to avoid per-cpu counter
+	 * drift allowing watermarks to be breached
+	 */
+	unsigned long percpu_drift_mark;
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+	/* pfn where compaction free scanner should start */
+	unsigned long		compact_cached_free_pfn;
+	/* pfn where async and sync compaction migration scanner should start */
+	unsigned long		compact_cached_migrate_pfn[2];
+#endif
+
+#ifdef CONFIG_COMPACTION
+	/*
+	 * On compaction failure, 1<<compact_defer_shift compactions
+	 * are skipped before trying again. The number attempted since
+	 * last failure is tracked with compact_considered.
+	 */
+	unsigned int		compact_considered;
+	unsigned int		compact_defer_shift;
+	int			compact_order_failed;
+#endif
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+	/* Set to true when the PG_migrate_skip bits should be cleared */
+	bool			compact_blockskip_flush;
+#endif
+
+	ZONE_PADDING(_pad3_)
+	/* Zone statistics */
+	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 } ____cacheline_internodealigned_in_smp;
  
 typedef enum {
@@ -1685,7 +1685,6 @@
 {
 	/* free_pages my go negative - that's OK */
 	long min = mark;
-	long lowmem_reserve = z->lowmem_reserve[classzone_idx];
 	int o;
 	long free_cma = 0;
  
@@ -1700,7 +1699,7 @@
 		free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
  
-	if (free_pages - free_cma <= min + lowmem_reserve)
+	if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
 		return false;
 	for (o = 0; o < order; o++) {
 		/* At the next order, this order's pages become unavailable */
@@ -3224,7 +3223,7 @@
 			);
 		printk("lowmem_reserve[]:");
 		for (i = 0; i < MAX_NR_ZONES; i++)
-			printk(" %lu", zone->lowmem_reserve[i]);
+			printk(" %ld", zone->lowmem_reserve[i]);
 		printk("\n");
 	}
  
@@ -5527,7 +5526,7 @@
 	for_each_online_pgdat(pgdat) {
 		for (i = 0; i < MAX_NR_ZONES; i++) {
 			struct zone *zone = pgdat->node_zones + i;
-			unsigned long max = 0;
+			long max = 0;
  
 			/* Find valid and maximum lowmem_reserve in the zone */
 			for (j = i; j < MAX_NR_ZONES; j++) {
@@ -1065,10 +1065,10 @@
 				zone_page_state(zone, i));
  
 	seq_printf(m,
-		   "\n        protection: (%lu",
+		   "\n        protection: (%ld",
 		   zone->lowmem_reserve[0]);
 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
-		seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
 	seq_printf(m,
 		   ")"
 		   "\n  pagesets");
...	...	@@ -321,19 +321,12 @@
321	321	#ifndef __GENERATING_BOUNDS_H
322	322
323	323	struct zone {
324		- /* Fields commonly accessed by the page allocator */
	324	+ /* Read-mostly fields */
325	325
326	326	/* zone watermarks, access with _wmark_pages(zone) macros /
327	327	unsigned long watermark[NR_WMARK];
328	328
329	329	/*
330		- * When free pages are below this point, additional steps are taken
331		- * when reading the number of free pages to avoid per-cpu counter
332		- * drift allowing watermarks to be breached
333		- */
334		- unsigned long percpu_drift_mark;
335		-
336		- /*
337	330	* We don't know if the memory that we're going to allocate will be freeable
338	331	* or/and it will be released eventually, so to avoid totally wasting several
339	332	* GB of ram we must reserve some of the lower zone memory (otherwise we risk
340	333
341	334
342	335
343	336
344	337
345	338
346	339
347	340
...	...	@@ -341,42 +334,27 @@
341	334	* on the higher zones). This array is recalculated at runtime if the
342	335	* sysctl_lowmem_reserve_ratio sysctl changes.
343	336	*/
344		- unsigned long lowmem_reserve[MAX_NR_ZONES];
	337	+ long lowmem_reserve[MAX_NR_ZONES];
345	338
346		- /*
347		- * This is a per-zone reserve of pages that should not be
348		- * considered dirtyable memory.
349		- */
350		- unsigned long dirty_balance_reserve;
351		-
352	339	#ifdef CONFIG_NUMA
353	340	int node;
	341	+#endif
	342	+
354	343	/*
355		- * zone reclaim becomes active if more unmapped pages exist.
	344	+ * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
	345	+ * this zone's LRU. Maintained by the pageout code.
356	346	*/
357		- unsigned long min_unmapped_pages;
358		- unsigned long min_slab_pages;
359		-#endif
	347	+ unsigned int inactive_ratio;
	348	+
	349	+ struct pglist_data *zone_pgdat;
360	350	struct per_cpu_pageset __percpu *pageset;
	351	+
361	352	/*
362		- * free areas of different sizes
	353	+ * This is a per-zone reserve of pages that should not be
	354	+ * considered dirtyable memory.
363	355	*/
364		- spinlock_t lock;
365		-#if defined CONFIG_COMPACTION \|\| defined CONFIG_CMA
366		- /* Set to true when the PG_migrate_skip bits should be cleared */
367		- bool compact_blockskip_flush;
	356	+ unsigned long dirty_balance_reserve;
368	357
369		- /* pfn where compaction free scanner should start */
370		- unsigned long compact_cached_free_pfn;
371		- /* pfn where async and sync compaction migration scanner should start */
372		- unsigned long compact_cached_migrate_pfn[2];
373		-#endif
374		-#ifdef CONFIG_MEMORY_HOTPLUG
375		- /* see spanned/present_pages for more description */
376		- seqlock_t span_seqlock;
377		-#endif
378		- struct free_area free_area[MAX_ORDER];
379		-
380	358	#ifndef CONFIG_SPARSEMEM
381	359	/*
382	360	* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
383	361
384	362
385	363
...	...	@@ -385,71 +363,14 @@
385	363	unsigned long *pageblock_flags;
386	364	#endif /* CONFIG_SPARSEMEM */
387	365
388		-#ifdef CONFIG_COMPACTION
	366	+#ifdef CONFIG_NUMA
389	367	/*
390		- * On compaction failure, 1<<compact_defer_shift compactions
391		- * are skipped before trying again. The number attempted since
392		- * last failure is tracked with compact_considered.
	368	+ * zone reclaim becomes active if more unmapped pages exist.
393	369	*/
394		- unsigned int compact_considered;
395		- unsigned int compact_defer_shift;
396		- int compact_order_failed;
397		-#endif
	370	+ unsigned long min_unmapped_pages;
	371	+ unsigned long min_slab_pages;
	372	+#endif /* CONFIG_NUMA */
398	373
399		- ZONE_PADDING(_pad1_)
400		-
401		- /* Fields commonly accessed by the page reclaim scanner */
402		- spinlock_t lru_lock;
403		- struct lruvec lruvec;
404		-
405		- unsigned long pages_scanned; /* since last reclaim */
406		- unsigned long flags; /* zone flags, see below */
407		-
408		- /* Zone statistics */
409		- atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
410		-
411		- /*
412		- * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
413		- * this zone's LRU. Maintained by the pageout code.
414		- */
415		- unsigned int inactive_ratio;
416		-
417		-
418		- ZONE_PADDING(_pad2_)
419		- /* Rarely used or read-mostly fields */
420		-
421		- /*
422		- * wait_table -- the array holding the hash table
423		- * wait_table_hash_nr_entries -- the size of the hash table array
424		- * wait_table_bits -- wait_table_size == (1 << wait_table_bits)
425		- *
426		- * The purpose of all these is to keep track of the people
427		- * waiting for a page to become available and make them
428		- * runnable again when possible. The trouble is that this
429		- * consumes a lot of space, especially when so few things
430		- * wait on pages at a given time. So instead of using
431		- * per-page waitqueues, we use a waitqueue hash table.
432		- *
433		- * The bucket discipline is to sleep on the same queue when
434		- * colliding and wake all in that wait queue when removing.
435		- * When something wakes, it must check to be sure its page is
436		- * truly available, a la thundering herd. The cost of a
437		- * collision is great, but given the expected load of the
438		- * table, they should be so rare as to be outweighed by the
439		- * benefits from the saved space.
440		- *
441		- * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
442		- * primary users of these fields, and in mm/page_alloc.c
443		- * free_area_init_core() performs the initialization of them.
444		- */
445		- wait_queue_head_t * wait_table;
446		- unsigned long wait_table_hash_nr_entries;
447		- unsigned long wait_table_bits;
448		-
449		- /*
450		- * Discontig memory support fields.
451		- */
452		- struct pglist_data *zone_pgdat;
453	374	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
454	375	unsigned long zone_start_pfn;
455	376
456	377
457	378
458	379
459	380
460	381
...	...	@@ -495,20 +416,104 @@
495	416	* adjust_managed_page_count() should be used instead of directly
496	417	* touching zone->managed_pages and totalram_pages.
497	418	*/
	419	+ unsigned long managed_pages;
498	420	unsigned long spanned_pages;
499	421	unsigned long present_pages;
500		- unsigned long managed_pages;
501	422
	423	+ const char *name;
	424	+
502	425	/*
503	426	* Number of MIGRATE_RESEVE page block. To maintain for just
504	427	* optimization. Protected by zone->lock.
505	428	*/
506	429	int nr_migrate_reserve_block;
507	430
	431	+#ifdef CONFIG_MEMORY_HOTPLUG
	432	+ /* see spanned/present_pages for more description */
	433	+ seqlock_t span_seqlock;
	434	+#endif
	435	+
508	436	/*
509		- * rarely used fields:
	437	+ * wait_table -- the array holding the hash table
	438	+ * wait_table_hash_nr_entries -- the size of the hash table array
	439	+ * wait_table_bits -- wait_table_size == (1 << wait_table_bits)
	440	+ *
	441	+ * The purpose of all these is to keep track of the people
	442	+ * waiting for a page to become available and make them
	443	+ * runnable again when possible. The trouble is that this
	444	+ * consumes a lot of space, especially when so few things
	445	+ * wait on pages at a given time. So instead of using
	446	+ * per-page waitqueues, we use a waitqueue hash table.
	447	+ *
	448	+ * The bucket discipline is to sleep on the same queue when
	449	+ * colliding and wake all in that wait queue when removing.
	450	+ * When something wakes, it must check to be sure its page is
	451	+ * truly available, a la thundering herd. The cost of a
	452	+ * collision is great, but given the expected load of the
	453	+ * table, they should be so rare as to be outweighed by the
	454	+ * benefits from the saved space.
	455	+ *
	456	+ * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
	457	+ * primary users of these fields, and in mm/page_alloc.c
	458	+ * free_area_init_core() performs the initialization of them.
510	459	*/
511		- const char *name;
	460	+ wait_queue_head_t *wait_table;
	461	+ unsigned long wait_table_hash_nr_entries;
	462	+ unsigned long wait_table_bits;
	463	+
	464	+ ZONE_PADDING(_pad1_)
	465	+
	466	+ /* Write-intensive fields used from the page allocator */
	467	+ spinlock_t lock;
	468	+
	469	+ /* free areas of different sizes */
	470	+ struct free_area free_area[MAX_ORDER];
	471	+
	472	+ /* zone flags, see below */
	473	+ unsigned long flags;
	474	+
	475	+ ZONE_PADDING(_pad2_)
	476	+
	477	+ /* Write-intensive fields used by page reclaim */
	478	+
	479	+ /* Fields commonly accessed by the page reclaim scanner */
	480	+ spinlock_t lru_lock;
	481	+ unsigned long pages_scanned; /* since last reclaim */
	482	+ struct lruvec lruvec;
	483	+
	484	+ /*
	485	+ * When free pages are below this point, additional steps are taken
	486	+ * when reading the number of free pages to avoid per-cpu counter
	487	+ * drift allowing watermarks to be breached
	488	+ */
	489	+ unsigned long percpu_drift_mark;
	490	+
	491	+#if defined CONFIG_COMPACTION \|\| defined CONFIG_CMA
	492	+ /* pfn where compaction free scanner should start */
	493	+ unsigned long compact_cached_free_pfn;
	494	+ /* pfn where async and sync compaction migration scanner should start */
	495	+ unsigned long compact_cached_migrate_pfn[2];
	496	+#endif
	497	+
	498	+#ifdef CONFIG_COMPACTION
	499	+ /*
	500	+ * On compaction failure, 1<<compact_defer_shift compactions
	501	+ * are skipped before trying again. The number attempted since
	502	+ * last failure is tracked with compact_considered.
	503	+ */
	504	+ unsigned int compact_considered;
	505	+ unsigned int compact_defer_shift;
	506	+ int compact_order_failed;
	507	+#endif
	508	+
	509	+#if defined CONFIG_COMPACTION \|\| defined CONFIG_CMA
	510	+ /* Set to true when the PG_migrate_skip bits should be cleared */
	511	+ bool compact_blockskip_flush;
	512	+#endif
	513	+
	514	+ ZONE_PADDING(_pad3_)
	515	+ /* Zone statistics */
	516	+ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
512	517	} ____cacheline_internodealigned_in_smp;
513	518
514	519	typedef enum {
...	...	@@ -1685,7 +1685,6 @@
1685	1685	{
1686	1686	/* free_pages my go negative - that's OK */
1687	1687	long min = mark;
1688		- long lowmem_reserve = z->lowmem_reserve[classzone_idx];
1689	1688	int o;
1690	1689	long free_cma = 0;
1691	1690
...	...	@@ -1700,7 +1699,7 @@
1700	1699	free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
1701	1700	#endif
1702	1701
1703		- if (free_pages - free_cma <= min + lowmem_reserve)
	1702	+ if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
1704	1703	return false;
1705	1704	for (o = 0; o < order; o++) {
1706	1705	/* At the next order, this order's pages become unavailable */
...	...	@@ -3224,7 +3223,7 @@
3224	3223	);
3225	3224	printk("lowmem_reserve[]:");
3226	3225	for (i = 0; i < MAX_NR_ZONES; i++)
3227		- printk(" %lu", zone->lowmem_reserve[i]);
	3226	+ printk(" %ld", zone->lowmem_reserve[i]);
3228	3227	printk("\n");
3229	3228	}
3230	3229
...	...	@@ -5527,7 +5526,7 @@
5527	5526	for_each_online_pgdat(pgdat) {
5528	5527	for (i = 0; i < MAX_NR_ZONES; i++) {
5529	5528	struct zone *zone = pgdat->node_zones + i;
5530		- unsigned long max = 0;
	5529	+ long max = 0;
5531	5530
5532	5531	/* Find valid and maximum lowmem_reserve in the zone */
5533	5532	for (j = i; j < MAX_NR_ZONES; j++) {
...	...	@@ -1065,10 +1065,10 @@
1065	1065	zone_page_state(zone, i));
1066	1066
1067	1067	seq_printf(m,
1068		- "\n protection: (%lu",
	1068	+ "\n protection: (%ld",
1069	1069	zone->lowmem_reserve[0]);
1070	1070	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1071		- seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
	1071	+ seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1072	1072	seq_printf(m,
1073	1073	")"
1074	1074	"\n pagesets");