Blame view

mm/vmstat.c 51.7 KB
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1
2
3
4
5
  /*
   *  linux/mm/vmstat.c
   *
   *  Manages VM statistics
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
6
7
8
9
   *
   *  zoned VM statistics
   *  Copyright (C) 2006 Silicon Graphics, Inc.,
   *		Christoph Lameter <christoph@lameter.com>
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
10
   *  Copyright (C) 2008-2014 Christoph Lameter
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
11
   */
8f32f7e5a   Alexey Dobriyan   proc: move /proc/...
12
  #include <linux/fs.h>
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
13
  #include <linux/mm.h>
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
14
  #include <linux/err.h>
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
15
  #include <linux/module.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
16
  #include <linux/slab.h>
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
17
  #include <linux/cpu.h>
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
18
  #include <linux/cpumask.h>
c748e1340   Adrian Bunk   mm/vmstat.c: prop...
19
  #include <linux/vmstat.h>
3c4868710   Andrew Morton   mm/vmstat.c: fix/...
20
21
22
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  #include <linux/debugfs.h>
e8edc6e03   Alexey Dobriyan   Detach sched.h fr...
23
  #include <linux/sched.h>
f1a5ab121   Mel Gorman   mm: export fragme...
24
  #include <linux/math64.h>
79da826ae   Michael Rubin   writeback: report...
25
  #include <linux/writeback.h>
36deb0be3   Namhyung Kim   vmstat: include c...
26
  #include <linux/compaction.h>
6e543d578   Lisa Du   mm: vmscan: fix d...
27
  #include <linux/mm_inline.h>
48c96a368   Joonsoo Kim   mm/page_owner: ke...
28
29
  #include <linux/page_ext.h>
  #include <linux/page_owner.h>
6e543d578   Lisa Du   mm: vmscan: fix d...
30
31
  
  #include "internal.h"
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
32

1d90ca897   Kemi Wang   mm: update NUMA c...
33
  #define NUMA_STATS_THRESHOLD (U16_MAX - 2)
4518085e1   Kemi Wang   mm, sysctl: make ...
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
  #ifdef CONFIG_NUMA
  int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
  
  /* zero numa counters within a zone */
  static void zero_zone_numa_counters(struct zone *zone)
  {
  	int item, cpu;
  
  	for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
  		atomic_long_set(&zone->vm_numa_stat[item], 0);
  		for_each_online_cpu(cpu)
  			per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
  						= 0;
  	}
  }
  
  /* zero numa counters of all the populated zones */
  static void zero_zones_numa_counters(void)
  {
  	struct zone *zone;
  
  	for_each_populated_zone(zone)
  		zero_zone_numa_counters(zone);
  }
  
  /* zero global numa counters */
  static void zero_global_numa_counters(void)
  {
  	int item;
  
  	for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
  		atomic_long_set(&vm_numa_stat[item], 0);
  }
  
  static void invalid_numa_statistics(void)
  {
  	zero_zones_numa_counters();
  	zero_global_numa_counters();
  }
  
  static DEFINE_MUTEX(vm_numa_stat_lock);
  
  int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
  		void __user *buffer, size_t *length, loff_t *ppos)
  {
  	int ret, oldval;
  
  	mutex_lock(&vm_numa_stat_lock);
  	if (write)
  		oldval = sysctl_vm_numa_stat;
  	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
  	if (ret || !write)
  		goto out;
  
  	if (oldval == sysctl_vm_numa_stat)
  		goto out;
  	else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
  		static_branch_enable(&vm_numa_stat_key);
  		pr_info("enable numa statistics
  ");
  	} else {
  		static_branch_disable(&vm_numa_stat_key);
  		invalid_numa_statistics();
  		pr_info("disable numa statistics, and clear numa counters
  ");
  	}
  
  out:
  	mutex_unlock(&vm_numa_stat_lock);
  	return ret;
  }
  #endif
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
106
107
108
  #ifdef CONFIG_VM_EVENT_COUNTERS
  DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
  EXPORT_PER_CPU_SYMBOL(vm_event_states);
31f961a89   Minchan Kim   mm: use for_each_...
109
  static void sum_vm_events(unsigned long *ret)
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
110
  {
9eccf2a81   Christoph Lameter   vmstat: remove pr...
111
  	int cpu;
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
112
113
114
  	int i;
  
  	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
31f961a89   Minchan Kim   mm: use for_each_...
115
  	for_each_online_cpu(cpu) {
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
116
  		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
117
118
119
120
121
122
123
124
125
126
127
128
  		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
  			ret[i] += this->event[i];
  	}
  }
  
  /*
   * Accumulate the vm event counters across all CPUs.
   * The result is unavoidably approximate - it can change
   * during and after execution of this function.
  */
  void all_vm_events(unsigned long *ret)
  {
b5be11329   KOSAKI Motohiro   make vmstat cpu-u...
129
  	get_online_cpus();
31f961a89   Minchan Kim   mm: use for_each_...
130
  	sum_vm_events(ret);
b5be11329   KOSAKI Motohiro   make vmstat cpu-u...
131
  	put_online_cpus();
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
132
  }
32dd66fce   Heiko Carstens   [PATCH] vmstat: e...
133
  EXPORT_SYMBOL_GPL(all_vm_events);
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
134

f8891e5e1   Christoph Lameter   [PATCH] Light wei...
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
  /*
   * Fold the foreign cpu events into our own.
   *
   * This is adding to the events on one processor
   * but keeps the global counts constant.
   */
  void vm_events_fold_cpu(int cpu)
  {
  	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
  	int i;
  
  	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
  		count_vm_events(i, fold_state->event[i]);
  		fold_state->event[i] = 0;
  	}
  }
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
151
152
  
  #endif /* CONFIG_VM_EVENT_COUNTERS */
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
153
154
155
156
157
  /*
   * Manage combined zone based / global counters
   *
   * vm_stat contains the global counters
   */
75ef71840   Mel Gorman   mm, vmstat: add i...
158
  atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
3a321d2a3   Kemi Wang   mm: change the ca...
159
  atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
75ef71840   Mel Gorman   mm, vmstat: add i...
160
161
  atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
  EXPORT_SYMBOL(vm_zone_stat);
3a321d2a3   Kemi Wang   mm: change the ca...
162
  EXPORT_SYMBOL(vm_numa_stat);
75ef71840   Mel Gorman   mm, vmstat: add i...
163
  EXPORT_SYMBOL(vm_node_stat);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
164
165
  
  #ifdef CONFIG_SMP
b44129b30   Mel Gorman   mm: vmstat: use a...
166
  int calculate_pressure_threshold(struct zone *zone)
88f5acf88   Mel Gorman   mm: page allocato...
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
  {
  	int threshold;
  	int watermark_distance;
  
  	/*
  	 * As vmstats are not up to date, there is drift between the estimated
  	 * and real values. For high thresholds and a high number of CPUs, it
  	 * is possible for the min watermark to be breached while the estimated
  	 * value looks fine. The pressure threshold is a reduced value such
  	 * that even the maximum amount of drift will not accidentally breach
  	 * the min watermark
  	 */
  	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
  	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
  
  	/*
  	 * Maximum threshold is 125
  	 */
  	threshold = min(125, threshold);
  
  	return threshold;
  }
b44129b30   Mel Gorman   mm: vmstat: use a...
189
  int calculate_normal_threshold(struct zone *zone)
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
  {
  	int threshold;
  	int mem;	/* memory in 128 MB units */
  
  	/*
  	 * The threshold scales with the number of processors and the amount
  	 * of memory per zone. More memory means that we can defer updates for
  	 * longer, more processors could lead to more contention.
   	 * fls() is used to have a cheap way of logarithmic scaling.
  	 *
  	 * Some sample thresholds:
  	 *
  	 * Threshold	Processors	(fls)	Zonesize	fls(mem+1)
  	 * ------------------------------------------------------------------
  	 * 8		1		1	0.9-1 GB	4
  	 * 16		2		2	0.9-1 GB	4
  	 * 20 		2		2	1-2 GB		5
  	 * 24		2		2	2-4 GB		6
  	 * 28		2		2	4-8 GB		7
  	 * 32		2		2	8-16 GB		8
  	 * 4		2		2	<128M		1
  	 * 30		4		3	2-4 GB		5
  	 * 48		4		3	8-16 GB		8
  	 * 32		8		4	1-2 GB		4
  	 * 32		8		4	0.9-1GB		4
  	 * 10		16		5	<128M		1
  	 * 40		16		5	900M		4
  	 * 70		64		7	2-4 GB		5
  	 * 84		64		7	4-8 GB		6
  	 * 108		512		9	4-8 GB		6
  	 * 125		1024		10	8-16 GB		8
  	 * 125		1024		10	16-32 GB	9
  	 */
b40da0494   Jiang Liu   mm: use zone->pre...
223
  	mem = zone->managed_pages >> (27 - PAGE_SHIFT);
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
224
225
226
227
228
229
230
231
232
233
  
  	threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
  
  	/*
  	 * Maximum threshold is 125
  	 */
  	threshold = min(125, threshold);
  
  	return threshold;
  }
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
234
235
  
  /*
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
236
   * Refresh the thresholds for each zone.
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
237
   */
a6cccdc36   KOSAKI Motohiro   mm, mem-hotplug: ...
238
  void refresh_zone_stat_thresholds(void)
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
239
  {
75ef71840   Mel Gorman   mm, vmstat: add i...
240
  	struct pglist_data *pgdat;
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
241
242
243
  	struct zone *zone;
  	int cpu;
  	int threshold;
75ef71840   Mel Gorman   mm, vmstat: add i...
244
245
246
247
248
249
  	/* Zero current pgdat thresholds */
  	for_each_online_pgdat(pgdat) {
  		for_each_online_cpu(cpu) {
  			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
  		}
  	}
ee99c71c5   KOSAKI Motohiro   mm: introduce for...
250
  	for_each_populated_zone(zone) {
75ef71840   Mel Gorman   mm, vmstat: add i...
251
  		struct pglist_data *pgdat = zone->zone_pgdat;
aa4548403   Christoph Lameter   mm: page allocato...
252
  		unsigned long max_drift, tolerate_drift;
b44129b30   Mel Gorman   mm: vmstat: use a...
253
  		threshold = calculate_normal_threshold(zone);
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
254

75ef71840   Mel Gorman   mm, vmstat: add i...
255
256
  		for_each_online_cpu(cpu) {
  			int pgdat_threshold;
99dcc3e5a   Christoph Lameter   this_cpu: Page al...
257
258
  			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
  							= threshold;
1d90ca897   Kemi Wang   mm: update NUMA c...
259

75ef71840   Mel Gorman   mm, vmstat: add i...
260
261
262
263
264
  			/* Base nodestat threshold on the largest populated zone. */
  			pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
  			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
  				= max(threshold, pgdat_threshold);
  		}
aa4548403   Christoph Lameter   mm: page allocato...
265
266
267
268
269
270
271
272
273
274
  		/*
  		 * Only set percpu_drift_mark if there is a danger that
  		 * NR_FREE_PAGES reports the low watermark is ok when in fact
  		 * the min watermark could be breached by an allocation
  		 */
  		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
  		max_drift = num_online_cpus() * threshold;
  		if (max_drift > tolerate_drift)
  			zone->percpu_drift_mark = high_wmark_pages(zone) +
  					max_drift;
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
275
  	}
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
276
  }
b44129b30   Mel Gorman   mm: vmstat: use a...
277
278
  void set_pgdat_percpu_threshold(pg_data_t *pgdat,
  				int (*calculate_pressure)(struct zone *))
88f5acf88   Mel Gorman   mm: page allocato...
279
280
281
282
283
  {
  	struct zone *zone;
  	int cpu;
  	int threshold;
  	int i;
88f5acf88   Mel Gorman   mm: page allocato...
284
285
286
287
  	for (i = 0; i < pgdat->nr_zones; i++) {
  		zone = &pgdat->node_zones[i];
  		if (!zone->percpu_drift_mark)
  			continue;
b44129b30   Mel Gorman   mm: vmstat: use a...
288
  		threshold = (*calculate_pressure)(zone);
1d90ca897   Kemi Wang   mm: update NUMA c...
289
  		for_each_online_cpu(cpu)
88f5acf88   Mel Gorman   mm: page allocato...
290
291
292
  			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
  							= threshold;
  	}
88f5acf88   Mel Gorman   mm: page allocato...
293
  }
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
294
  /*
bea04b073   Jianyu Zhan   mm: use the light...
295
296
297
   * For use when we know that interrupts are disabled,
   * or when we know that preemption is disabled and that
   * particular counter cannot be updated from interrupt context.
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
298
299
   */
  void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6cdb18ad9   Heiko Carstens   mm/vmstat: fix ov...
300
  			   long delta)
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
301
  {
12938a922   Christoph Lameter   vmstat: Optimize ...
302
303
  	struct per_cpu_pageset __percpu *pcp = zone->pageset;
  	s8 __percpu *p = pcp->vm_stat_diff + item;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
304
  	long x;
12938a922   Christoph Lameter   vmstat: Optimize ...
305
306
307
  	long t;
  
  	x = delta + __this_cpu_read(*p);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
308

12938a922   Christoph Lameter   vmstat: Optimize ...
309
  	t = __this_cpu_read(pcp->stat_threshold);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
310

12938a922   Christoph Lameter   vmstat: Optimize ...
311
  	if (unlikely(x > t || x < -t)) {
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
312
313
314
  		zone_page_state_add(x, zone, item);
  		x = 0;
  	}
12938a922   Christoph Lameter   vmstat: Optimize ...
315
  	__this_cpu_write(*p, x);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
316
317
  }
  EXPORT_SYMBOL(__mod_zone_page_state);
75ef71840   Mel Gorman   mm, vmstat: add i...
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
  void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
  				long delta)
  {
  	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
  	s8 __percpu *p = pcp->vm_node_stat_diff + item;
  	long x;
  	long t;
  
  	x = delta + __this_cpu_read(*p);
  
  	t = __this_cpu_read(pcp->stat_threshold);
  
  	if (unlikely(x > t || x < -t)) {
  		node_page_state_add(x, pgdat, item);
  		x = 0;
  	}
  	__this_cpu_write(*p, x);
  }
  EXPORT_SYMBOL(__mod_node_page_state);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
337
  /*
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
338
339
340
341
342
343
344
345
346
   * Optimized increment and decrement functions.
   *
   * These are only for a single page and therefore can take a struct page *
   * argument instead of struct zone *. This allows the inclusion of the code
   * generated for page_zone(page) into the optimized functions.
   *
   * No overflow check is necessary and therefore the differential can be
   * incremented or decremented in place which may allow the compilers to
   * generate better code.
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
347
348
349
   * The increment or decrement is known and therefore one boundary check can
   * be omitted.
   *
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
350
351
352
   * NOTE: These functions are very performance sensitive. Change only
   * with care.
   *
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
353
354
355
356
357
358
359
   * Some processors have inc/dec instructions that are atomic vs an interrupt.
   * However, the code must first determine the differential location in a zone
   * based on the processor number and then inc/dec the counter. There is no
   * guarantee without disabling preemption that the processor will not change
   * in between and therefore the atomicity vs. interrupt cannot be exploited
   * in a useful way here.
   */
c87853859   Christoph Lameter   [PATCH] Use ZVC f...
360
  void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
361
  {
12938a922   Christoph Lameter   vmstat: Optimize ...
362
363
364
  	struct per_cpu_pageset __percpu *pcp = zone->pageset;
  	s8 __percpu *p = pcp->vm_stat_diff + item;
  	s8 v, t;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
365

908ee0f12   Christoph Lameter   vmstat: Use this_...
366
  	v = __this_cpu_inc_return(*p);
12938a922   Christoph Lameter   vmstat: Optimize ...
367
368
369
  	t = __this_cpu_read(pcp->stat_threshold);
  	if (unlikely(v > t)) {
  		s8 overstep = t >> 1;
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
370

12938a922   Christoph Lameter   vmstat: Optimize ...
371
372
  		zone_page_state_add(v + overstep, zone, item);
  		__this_cpu_write(*p, -overstep);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
373
374
  	}
  }
ca889e6c4   Christoph Lameter   [PATCH] Use Zoned...
375

75ef71840   Mel Gorman   mm, vmstat: add i...
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
  void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  {
  	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
  	s8 __percpu *p = pcp->vm_node_stat_diff + item;
  	s8 v, t;
  
  	v = __this_cpu_inc_return(*p);
  	t = __this_cpu_read(pcp->stat_threshold);
  	if (unlikely(v > t)) {
  		s8 overstep = t >> 1;
  
  		node_page_state_add(v + overstep, pgdat, item);
  		__this_cpu_write(*p, -overstep);
  	}
  }
ca889e6c4   Christoph Lameter   [PATCH] Use Zoned...
391
392
393
394
  void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  {
  	__inc_zone_state(page_zone(page), item);
  }
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
395
  EXPORT_SYMBOL(__inc_zone_page_state);
75ef71840   Mel Gorman   mm, vmstat: add i...
396
397
398
399
400
  void __inc_node_page_state(struct page *page, enum node_stat_item item)
  {
  	__inc_node_state(page_pgdat(page), item);
  }
  EXPORT_SYMBOL(__inc_node_page_state);
c87853859   Christoph Lameter   [PATCH] Use ZVC f...
401
  void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
402
  {
12938a922   Christoph Lameter   vmstat: Optimize ...
403
404
405
  	struct per_cpu_pageset __percpu *pcp = zone->pageset;
  	s8 __percpu *p = pcp->vm_stat_diff + item;
  	s8 v, t;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
406

908ee0f12   Christoph Lameter   vmstat: Use this_...
407
  	v = __this_cpu_dec_return(*p);
12938a922   Christoph Lameter   vmstat: Optimize ...
408
409
410
  	t = __this_cpu_read(pcp->stat_threshold);
  	if (unlikely(v < - t)) {
  		s8 overstep = t >> 1;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
411

12938a922   Christoph Lameter   vmstat: Optimize ...
412
413
  		zone_page_state_add(v - overstep, zone, item);
  		__this_cpu_write(*p, overstep);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
414
415
  	}
  }
c87853859   Christoph Lameter   [PATCH] Use ZVC f...
416

75ef71840   Mel Gorman   mm, vmstat: add i...
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
  void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  {
  	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
  	s8 __percpu *p = pcp->vm_node_stat_diff + item;
  	s8 v, t;
  
  	v = __this_cpu_dec_return(*p);
  	t = __this_cpu_read(pcp->stat_threshold);
  	if (unlikely(v < - t)) {
  		s8 overstep = t >> 1;
  
  		node_page_state_add(v - overstep, pgdat, item);
  		__this_cpu_write(*p, overstep);
  	}
  }
c87853859   Christoph Lameter   [PATCH] Use ZVC f...
432
433
434
435
  void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  {
  	__dec_zone_state(page_zone(page), item);
  }
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
436
  EXPORT_SYMBOL(__dec_zone_page_state);
75ef71840   Mel Gorman   mm, vmstat: add i...
437
438
439
440
441
  void __dec_node_page_state(struct page *page, enum node_stat_item item)
  {
  	__dec_node_state(page_pgdat(page), item);
  }
  EXPORT_SYMBOL(__dec_node_page_state);
4156153c4   Heiko Carstens   mm,x86,um: move C...
442
  #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
7c8391206   Christoph Lameter   vmstat: User per ...
443
444
445
446
447
448
449
450
451
452
453
454
  /*
   * If we have cmpxchg_local support then we do not need to incur the overhead
   * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
   *
   * mod_state() modifies the zone counter state through atomic per cpu
   * operations.
   *
   * Overstep mode specifies how overstep should handled:
   *     0       No overstepping
   *     1       Overstepping half of threshold
   *     -1      Overstepping minus half of threshold
  */
75ef71840   Mel Gorman   mm, vmstat: add i...
455
456
  static inline void mod_zone_state(struct zone *zone,
         enum zone_stat_item item, long delta, int overstep_mode)
7c8391206   Christoph Lameter   vmstat: User per ...
457
458
459
460
461
462
463
464
465
466
467
  {
  	struct per_cpu_pageset __percpu *pcp = zone->pageset;
  	s8 __percpu *p = pcp->vm_stat_diff + item;
  	long o, n, t, z;
  
  	do {
  		z = 0;  /* overflow to zone counters */
  
  		/*
  		 * The fetching of the stat_threshold is racy. We may apply
  		 * a counter threshold to the wrong the cpu if we get
d3bc23671   Christoph Lameter   vmstat: update co...
468
469
470
471
472
473
  		 * rescheduled while executing here. However, the next
  		 * counter update will apply the threshold again and
  		 * therefore bring the counter under the threshold again.
  		 *
  		 * Most of the time the thresholds are the same anyways
  		 * for all cpus in a zone.
7c8391206   Christoph Lameter   vmstat: User per ...
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
  		 */
  		t = this_cpu_read(pcp->stat_threshold);
  
  		o = this_cpu_read(*p);
  		n = delta + o;
  
  		if (n > t || n < -t) {
  			int os = overstep_mode * (t >> 1) ;
  
  			/* Overflow must be added to zone counters */
  			z = n + os;
  			n = -os;
  		}
  	} while (this_cpu_cmpxchg(*p, o, n) != o);
  
  	if (z)
  		zone_page_state_add(z, zone, item);
  }
  
  void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6cdb18ad9   Heiko Carstens   mm/vmstat: fix ov...
494
  			 long delta)
7c8391206   Christoph Lameter   vmstat: User per ...
495
  {
75ef71840   Mel Gorman   mm, vmstat: add i...
496
  	mod_zone_state(zone, item, delta, 0);
7c8391206   Christoph Lameter   vmstat: User per ...
497
498
  }
  EXPORT_SYMBOL(mod_zone_page_state);
7c8391206   Christoph Lameter   vmstat: User per ...
499
500
  void inc_zone_page_state(struct page *page, enum zone_stat_item item)
  {
75ef71840   Mel Gorman   mm, vmstat: add i...
501
  	mod_zone_state(page_zone(page), item, 1, 1);
7c8391206   Christoph Lameter   vmstat: User per ...
502
503
504
505
506
  }
  EXPORT_SYMBOL(inc_zone_page_state);
  
  void dec_zone_page_state(struct page *page, enum zone_stat_item item)
  {
75ef71840   Mel Gorman   mm, vmstat: add i...
507
  	mod_zone_state(page_zone(page), item, -1, -1);
7c8391206   Christoph Lameter   vmstat: User per ...
508
509
  }
  EXPORT_SYMBOL(dec_zone_page_state);
75ef71840   Mel Gorman   mm, vmstat: add i...
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
  
  static inline void mod_node_state(struct pglist_data *pgdat,
         enum node_stat_item item, int delta, int overstep_mode)
  {
  	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
  	s8 __percpu *p = pcp->vm_node_stat_diff + item;
  	long o, n, t, z;
  
  	do {
  		z = 0;  /* overflow to node counters */
  
  		/*
  		 * The fetching of the stat_threshold is racy. We may apply
  		 * a counter threshold to the wrong the cpu if we get
  		 * rescheduled while executing here. However, the next
  		 * counter update will apply the threshold again and
  		 * therefore bring the counter under the threshold again.
  		 *
  		 * Most of the time the thresholds are the same anyways
  		 * for all cpus in a node.
  		 */
  		t = this_cpu_read(pcp->stat_threshold);
  
  		o = this_cpu_read(*p);
  		n = delta + o;
  
  		if (n > t || n < -t) {
  			int os = overstep_mode * (t >> 1) ;
  
  			/* Overflow must be added to node counters */
  			z = n + os;
  			n = -os;
  		}
  	} while (this_cpu_cmpxchg(*p, o, n) != o);
  
  	if (z)
  		node_page_state_add(z, pgdat, item);
  }
  
  void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
  					long delta)
  {
  	mod_node_state(pgdat, item, delta, 0);
  }
  EXPORT_SYMBOL(mod_node_page_state);
  
  void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  {
  	mod_node_state(pgdat, item, 1, 1);
  }
  
  void inc_node_page_state(struct page *page, enum node_stat_item item)
  {
  	mod_node_state(page_pgdat(page), item, 1, 1);
  }
  EXPORT_SYMBOL(inc_node_page_state);
  
  void dec_node_page_state(struct page *page, enum node_stat_item item)
  {
  	mod_node_state(page_pgdat(page), item, -1, -1);
  }
  EXPORT_SYMBOL(dec_node_page_state);
7c8391206   Christoph Lameter   vmstat: User per ...
572
573
574
575
576
  #else
  /*
   * Use interrupt disable to serialize counter updates
   */
  void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6cdb18ad9   Heiko Carstens   mm/vmstat: fix ov...
577
  			 long delta)
7c8391206   Christoph Lameter   vmstat: User per ...
578
579
580
581
582
583
584
585
  {
  	unsigned long flags;
  
  	local_irq_save(flags);
  	__mod_zone_page_state(zone, item, delta);
  	local_irq_restore(flags);
  }
  EXPORT_SYMBOL(mod_zone_page_state);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
586
587
588
589
  void inc_zone_page_state(struct page *page, enum zone_stat_item item)
  {
  	unsigned long flags;
  	struct zone *zone;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
590
591
592
  
  	zone = page_zone(page);
  	local_irq_save(flags);
ca889e6c4   Christoph Lameter   [PATCH] Use Zoned...
593
  	__inc_zone_state(zone, item);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
594
595
596
597
598
599
600
  	local_irq_restore(flags);
  }
  EXPORT_SYMBOL(inc_zone_page_state);
  
  void dec_zone_page_state(struct page *page, enum zone_stat_item item)
  {
  	unsigned long flags;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
601

2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
602
  	local_irq_save(flags);
a302eb4e4   Christoph Lameter   [PATCH] ZVC: Over...
603
  	__dec_zone_page_state(page, item);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
604
605
606
  	local_irq_restore(flags);
  }
  EXPORT_SYMBOL(dec_zone_page_state);
75ef71840   Mel Gorman   mm, vmstat: add i...
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
  void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
  {
  	unsigned long flags;
  
  	local_irq_save(flags);
  	__inc_node_state(pgdat, item);
  	local_irq_restore(flags);
  }
  EXPORT_SYMBOL(inc_node_state);
  
  void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
  					long delta)
  {
  	unsigned long flags;
  
  	local_irq_save(flags);
  	__mod_node_page_state(pgdat, item, delta);
  	local_irq_restore(flags);
  }
  EXPORT_SYMBOL(mod_node_page_state);
  
  void inc_node_page_state(struct page *page, enum node_stat_item item)
  {
  	unsigned long flags;
  	struct pglist_data *pgdat;
  
  	pgdat = page_pgdat(page);
  	local_irq_save(flags);
  	__inc_node_state(pgdat, item);
  	local_irq_restore(flags);
  }
  EXPORT_SYMBOL(inc_node_page_state);
  
  void dec_node_page_state(struct page *page, enum node_stat_item item)
  {
  	unsigned long flags;
  
  	local_irq_save(flags);
  	__dec_node_page_state(page, item);
  	local_irq_restore(flags);
  }
  EXPORT_SYMBOL(dec_node_page_state);
  #endif
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
650
651
652
653
654
  
  /*
   * Fold a differential into the global counters.
   * Returns the number of counters updated.
   */
3a321d2a3   Kemi Wang   mm: change the ca...
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
  #ifdef CONFIG_NUMA
  static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
  {
  	int i;
  	int changes = 0;
  
  	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  		if (zone_diff[i]) {
  			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
  			changes++;
  	}
  
  	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
  		if (numa_diff[i]) {
  			atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
  			changes++;
  	}
  
  	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
  		if (node_diff[i]) {
  			atomic_long_add(node_diff[i], &vm_node_stat[i]);
  			changes++;
  	}
  	return changes;
  }
  #else
75ef71840   Mel Gorman   mm, vmstat: add i...
681
  static int fold_diff(int *zone_diff, int *node_diff)
4edb0748b   Christoph Lameter   vmstat: create fo...
682
683
  {
  	int i;
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
684
  	int changes = 0;
4edb0748b   Christoph Lameter   vmstat: create fo...
685
686
  
  	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
75ef71840   Mel Gorman   mm, vmstat: add i...
687
688
689
690
691
692
693
694
  		if (zone_diff[i]) {
  			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
  			changes++;
  	}
  
  	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
  		if (node_diff[i]) {
  			atomic_long_add(node_diff[i], &vm_node_stat[i]);
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
695
696
697
  			changes++;
  	}
  	return changes;
4edb0748b   Christoph Lameter   vmstat: create fo...
698
  }
3a321d2a3   Kemi Wang   mm: change the ca...
699
  #endif /* CONFIG_NUMA */
4edb0748b   Christoph Lameter   vmstat: create fo...
700

2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
701
  /*
2bb921e52   Christoph Lameter   vmstat: create se...
702
   * Update the zone counters for the current cpu.
a7f75e258   Christoph Lameter   vmstat: small rev...
703
   *
4037d4522   Christoph Lameter   Move remote node ...
704
705
706
707
708
709
710
711
712
713
   * Note that refresh_cpu_vm_stats strives to only access
   * node local memory. The per cpu pagesets on remote zones are placed
   * in the memory local to the processor using that pageset. So the
   * loop over all zones will access a series of cachelines local to
   * the processor.
   *
   * The call to zone_page_state_add updates the cachelines with the
   * statistics in the remote zone struct as well as the global cachelines
   * with the global counters. These could cause remote node cache line
   * bouncing and will have to be only done when necessary.
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
714
715
   *
   * The function returns the number of global counters updated.
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
716
   */
0eb77e988   Christoph Lameter   vmstat: make vmst...
717
  static int refresh_cpu_vm_stats(bool do_pagesets)
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
718
  {
75ef71840   Mel Gorman   mm, vmstat: add i...
719
  	struct pglist_data *pgdat;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
720
721
  	struct zone *zone;
  	int i;
75ef71840   Mel Gorman   mm, vmstat: add i...
722
  	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
3a321d2a3   Kemi Wang   mm: change the ca...
723
724
725
  #ifdef CONFIG_NUMA
  	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
  #endif
75ef71840   Mel Gorman   mm, vmstat: add i...
726
  	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
727
  	int changes = 0;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
728

ee99c71c5   KOSAKI Motohiro   mm: introduce for...
729
  	for_each_populated_zone(zone) {
fbc2edb05   Christoph Lameter   vmstat: use this_...
730
  		struct per_cpu_pageset __percpu *p = zone->pageset;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
731

fbc2edb05   Christoph Lameter   vmstat: use this_...
732
733
  		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
  			int v;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
734

fbc2edb05   Christoph Lameter   vmstat: use this_...
735
736
  			v = this_cpu_xchg(p->vm_stat_diff[i], 0);
  			if (v) {
a7f75e258   Christoph Lameter   vmstat: small rev...
737

a7f75e258   Christoph Lameter   vmstat: small rev...
738
  				atomic_long_add(v, &zone->vm_stat[i]);
75ef71840   Mel Gorman   mm, vmstat: add i...
739
  				global_zone_diff[i] += v;
4037d4522   Christoph Lameter   Move remote node ...
740
741
  #ifdef CONFIG_NUMA
  				/* 3 seconds idle till flush */
fbc2edb05   Christoph Lameter   vmstat: use this_...
742
  				__this_cpu_write(p->expire, 3);
4037d4522   Christoph Lameter   Move remote node ...
743
  #endif
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
744
  			}
fbc2edb05   Christoph Lameter   vmstat: use this_...
745
  		}
4037d4522   Christoph Lameter   Move remote node ...
746
  #ifdef CONFIG_NUMA
3a321d2a3   Kemi Wang   mm: change the ca...
747
748
749
750
751
752
753
754
755
756
757
  		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
  			int v;
  
  			v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
  			if (v) {
  
  				atomic_long_add(v, &zone->vm_numa_stat[i]);
  				global_numa_diff[i] += v;
  				__this_cpu_write(p->expire, 3);
  			}
  		}
0eb77e988   Christoph Lameter   vmstat: make vmst...
758
759
760
761
762
763
764
765
766
767
  		if (do_pagesets) {
  			cond_resched();
  			/*
  			 * Deal with draining the remote pageset of this
  			 * processor
  			 *
  			 * Check if there are pages remaining in this pageset
  			 * if not then there is nothing to expire.
  			 */
  			if (!__this_cpu_read(p->expire) ||
fbc2edb05   Christoph Lameter   vmstat: use this_...
768
  			       !__this_cpu_read(p->pcp.count))
0eb77e988   Christoph Lameter   vmstat: make vmst...
769
  				continue;
4037d4522   Christoph Lameter   Move remote node ...
770

0eb77e988   Christoph Lameter   vmstat: make vmst...
771
772
773
774
775
776
777
  			/*
  			 * We never drain zones local to this processor.
  			 */
  			if (zone_to_nid(zone) == numa_node_id()) {
  				__this_cpu_write(p->expire, 0);
  				continue;
  			}
4037d4522   Christoph Lameter   Move remote node ...
778

0eb77e988   Christoph Lameter   vmstat: make vmst...
779
780
  			if (__this_cpu_dec_return(p->expire))
  				continue;
4037d4522   Christoph Lameter   Move remote node ...
781

0eb77e988   Christoph Lameter   vmstat: make vmst...
782
783
784
785
  			if (__this_cpu_read(p->pcp.count)) {
  				drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
  				changes++;
  			}
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
786
  		}
4037d4522   Christoph Lameter   Move remote node ...
787
  #endif
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
788
  	}
75ef71840   Mel Gorman   mm, vmstat: add i...
789
790
791
792
793
794
795
796
797
798
799
800
801
802
  
  	for_each_online_pgdat(pgdat) {
  		struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
  
  		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
  			int v;
  
  			v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
  			if (v) {
  				atomic_long_add(v, &pgdat->vm_stat[i]);
  				global_node_diff[i] += v;
  			}
  		}
  	}
3a321d2a3   Kemi Wang   mm: change the ca...
803
804
805
806
  #ifdef CONFIG_NUMA
  	changes += fold_diff(global_zone_diff, global_numa_diff,
  			     global_node_diff);
  #else
75ef71840   Mel Gorman   mm, vmstat: add i...
807
  	changes += fold_diff(global_zone_diff, global_node_diff);
3a321d2a3   Kemi Wang   mm: change the ca...
808
  #endif
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
809
  	return changes;
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
810
  }
40f4b1ead   Cody P Schafer   mm/vmstat: add no...
811
  /*
2bb921e52   Christoph Lameter   vmstat: create se...
812
813
814
815
816
817
   * Fold the data for an offline cpu into the global array.
   * There cannot be any access by the offline cpu and therefore
   * synchronization is simplified.
   */
  void cpu_vm_stats_fold(int cpu)
  {
75ef71840   Mel Gorman   mm, vmstat: add i...
818
  	struct pglist_data *pgdat;
2bb921e52   Christoph Lameter   vmstat: create se...
819
820
  	struct zone *zone;
  	int i;
75ef71840   Mel Gorman   mm, vmstat: add i...
821
  	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
3a321d2a3   Kemi Wang   mm: change the ca...
822
823
824
  #ifdef CONFIG_NUMA
  	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
  #endif
75ef71840   Mel Gorman   mm, vmstat: add i...
825
  	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
2bb921e52   Christoph Lameter   vmstat: create se...
826
827
828
829
830
831
832
833
834
835
836
837
838
  
  	for_each_populated_zone(zone) {
  		struct per_cpu_pageset *p;
  
  		p = per_cpu_ptr(zone->pageset, cpu);
  
  		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  			if (p->vm_stat_diff[i]) {
  				int v;
  
  				v = p->vm_stat_diff[i];
  				p->vm_stat_diff[i] = 0;
  				atomic_long_add(v, &zone->vm_stat[i]);
75ef71840   Mel Gorman   mm, vmstat: add i...
839
  				global_zone_diff[i] += v;
2bb921e52   Christoph Lameter   vmstat: create se...
840
  			}
3a321d2a3   Kemi Wang   mm: change the ca...
841
842
843
844
845
846
847
848
849
850
851
852
  
  #ifdef CONFIG_NUMA
  		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
  			if (p->vm_numa_stat_diff[i]) {
  				int v;
  
  				v = p->vm_numa_stat_diff[i];
  				p->vm_numa_stat_diff[i] = 0;
  				atomic_long_add(v, &zone->vm_numa_stat[i]);
  				global_numa_diff[i] += v;
  			}
  #endif
2bb921e52   Christoph Lameter   vmstat: create se...
853
  	}
75ef71840   Mel Gorman   mm, vmstat: add i...
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
  	for_each_online_pgdat(pgdat) {
  		struct per_cpu_nodestat *p;
  
  		p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
  
  		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
  			if (p->vm_node_stat_diff[i]) {
  				int v;
  
  				v = p->vm_node_stat_diff[i];
  				p->vm_node_stat_diff[i] = 0;
  				atomic_long_add(v, &pgdat->vm_stat[i]);
  				global_node_diff[i] += v;
  			}
  	}
3a321d2a3   Kemi Wang   mm: change the ca...
869
870
871
  #ifdef CONFIG_NUMA
  	fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
  #else
75ef71840   Mel Gorman   mm, vmstat: add i...
872
  	fold_diff(global_zone_diff, global_node_diff);
3a321d2a3   Kemi Wang   mm: change the ca...
873
  #endif
2bb921e52   Christoph Lameter   vmstat: create se...
874
875
876
  }
  
  /*
40f4b1ead   Cody P Schafer   mm/vmstat: add no...
877
878
879
   * this is only called if !populated_zone(zone), which implies no other users of
   * pset->vm_stat_diff[] exsist.
   */
5a8838138   Minchan Kim   memory-hotplug: f...
880
881
882
883
884
885
886
887
888
  void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
  {
  	int i;
  
  	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  		if (pset->vm_stat_diff[i]) {
  			int v = pset->vm_stat_diff[i];
  			pset->vm_stat_diff[i] = 0;
  			atomic_long_add(v, &zone->vm_stat[i]);
75ef71840   Mel Gorman   mm, vmstat: add i...
889
  			atomic_long_add(v, &vm_zone_stat[i]);
5a8838138   Minchan Kim   memory-hotplug: f...
890
  		}
3a321d2a3   Kemi Wang   mm: change the ca...
891
892
893
894
895
896
897
898
899
900
901
  
  #ifdef CONFIG_NUMA
  	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
  		if (pset->vm_numa_stat_diff[i]) {
  			int v = pset->vm_numa_stat_diff[i];
  
  			pset->vm_numa_stat_diff[i] = 0;
  			atomic_long_add(v, &zone->vm_numa_stat[i]);
  			atomic_long_add(v, &vm_numa_stat[i]);
  		}
  #endif
5a8838138   Minchan Kim   memory-hotplug: f...
902
  }
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
903
  #endif
ca889e6c4   Christoph Lameter   [PATCH] Use Zoned...
904
  #ifdef CONFIG_NUMA
3a321d2a3   Kemi Wang   mm: change the ca...
905
906
907
908
  void __inc_numa_state(struct zone *zone,
  				 enum numa_stat_item item)
  {
  	struct per_cpu_pageset __percpu *pcp = zone->pageset;
1d90ca897   Kemi Wang   mm: update NUMA c...
909
910
  	u16 __percpu *p = pcp->vm_numa_stat_diff + item;
  	u16 v;
3a321d2a3   Kemi Wang   mm: change the ca...
911
912
  
  	v = __this_cpu_inc_return(*p);
3a321d2a3   Kemi Wang   mm: change the ca...
913

1d90ca897   Kemi Wang   mm: update NUMA c...
914
915
916
  	if (unlikely(v > NUMA_STATS_THRESHOLD)) {
  		zone_numa_state_add(v, zone, item);
  		__this_cpu_write(*p, 0);
3a321d2a3   Kemi Wang   mm: change the ca...
917
918
  	}
  }
ca889e6c4   Christoph Lameter   [PATCH] Use Zoned...
919
  /*
75ef71840   Mel Gorman   mm, vmstat: add i...
920
921
922
   * Determine the per node value of a stat item. This function
   * is called frequently in a NUMA machine, so try to be as
   * frugal as possible.
c2d42c16a   Andrew Morton   mm/vmstat.c: unin...
923
   */
75ef71840   Mel Gorman   mm, vmstat: add i...
924
925
  unsigned long sum_zone_node_page_state(int node,
  				 enum zone_stat_item item)
c2d42c16a   Andrew Morton   mm/vmstat.c: unin...
926
927
  {
  	struct zone *zones = NODE_DATA(node)->node_zones;
e87d59f7a   Joonsoo Kim   mm/vmstat: make n...
928
929
  	int i;
  	unsigned long count = 0;
c2d42c16a   Andrew Morton   mm/vmstat.c: unin...
930

e87d59f7a   Joonsoo Kim   mm/vmstat: make n...
931
932
933
934
  	for (i = 0; i < MAX_NR_ZONES; i++)
  		count += zone_page_state(zones + i, item);
  
  	return count;
c2d42c16a   Andrew Morton   mm/vmstat.c: unin...
935
  }
638032224   Kemi Wang   mm: consider the ...
936
937
938
939
  /*
   * Determine the per node value of a numa stat item. To avoid deviation,
   * the per cpu stat number in vm_numa_stat_diff[] is also included.
   */
3a321d2a3   Kemi Wang   mm: change the ca...
940
941
942
943
944
945
946
947
  unsigned long sum_zone_numa_state(int node,
  				 enum numa_stat_item item)
  {
  	struct zone *zones = NODE_DATA(node)->node_zones;
  	int i;
  	unsigned long count = 0;
  
  	for (i = 0; i < MAX_NR_ZONES; i++)
638032224   Kemi Wang   mm: consider the ...
948
  		count += zone_numa_state_snapshot(zones + i, item);
3a321d2a3   Kemi Wang   mm: change the ca...
949
950
951
  
  	return count;
  }
75ef71840   Mel Gorman   mm, vmstat: add i...
952
953
954
955
956
957
958
959
960
961
962
963
964
  /*
   * Determine the per node value of a stat item.
   */
  unsigned long node_page_state(struct pglist_data *pgdat,
  				enum node_stat_item item)
  {
  	long x = atomic_long_read(&pgdat->vm_stat[item]);
  #ifdef CONFIG_SMP
  	if (x < 0)
  		x = 0;
  #endif
  	return x;
  }
ca889e6c4   Christoph Lameter   [PATCH] Use Zoned...
965
  #endif
d7a5752c0   Mel Gorman   mm: export unusab...
966
  #ifdef CONFIG_COMPACTION
36deb0be3   Namhyung Kim   vmstat: include c...
967

d7a5752c0   Mel Gorman   mm: export unusab...
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
  struct contig_page_info {
  	unsigned long free_pages;
  	unsigned long free_blocks_total;
  	unsigned long free_blocks_suitable;
  };
  
  /*
   * Calculate the number of free pages in a zone, how many contiguous
   * pages are free and how many are large enough to satisfy an allocation of
   * the target size. Note that this function makes no attempt to estimate
   * how many suitable free blocks there *might* be if MOVABLE pages were
   * migrated. Calculating that is possible, but expensive and can be
   * figured out from userspace
   */
  static void fill_contig_page_info(struct zone *zone,
  				unsigned int suitable_order,
  				struct contig_page_info *info)
  {
  	unsigned int order;
  
  	info->free_pages = 0;
  	info->free_blocks_total = 0;
  	info->free_blocks_suitable = 0;
  
  	for (order = 0; order < MAX_ORDER; order++) {
  		unsigned long blocks;
  
  		/* Count number of free blocks */
  		blocks = zone->free_area[order].nr_free;
  		info->free_blocks_total += blocks;
  
  		/* Count free base pages */
  		info->free_pages += blocks << order;
  
  		/* Count the suitable free blocks */
  		if (order >= suitable_order)
  			info->free_blocks_suitable += blocks <<
  						(order - suitable_order);
  	}
  }
f1a5ab121   Mel Gorman   mm: export fragme...
1008
1009
1010
1011
1012
1013
1014
1015
  
  /*
   * A fragmentation index only makes sense if an allocation of a requested
   * size would fail. If that is true, the fragmentation index indicates
   * whether external fragmentation or a lack of memory was the problem.
   * The value can be used to determine if page reclaim or compaction
   * should be used
   */
56de7263f   Mel Gorman   mm: compaction: d...
1016
  static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
f1a5ab121   Mel Gorman   mm: export fragme...
1017
1018
  {
  	unsigned long requested = 1UL << order;
88d6ac40c   Wen Yang   mm/vmstat: fix di...
1019
1020
  	if (WARN_ON_ONCE(order >= MAX_ORDER))
  		return 0;
f1a5ab121   Mel Gorman   mm: export fragme...
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
  	if (!info->free_blocks_total)
  		return 0;
  
  	/* Fragmentation index only makes sense when a request would fail */
  	if (info->free_blocks_suitable)
  		return -1000;
  
  	/*
  	 * Index is between 0 and 1 so return within 3 decimal places
  	 *
  	 * 0 => allocation would fail due to lack of memory
  	 * 1 => allocation would fail due to fragmentation
  	 */
  	return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
  }
56de7263f   Mel Gorman   mm: compaction: d...
1036
1037
1038
1039
1040
1041
1042
1043
1044
  
  /* Same as __fragmentation index but allocs contig_page_info on stack */
  int fragmentation_index(struct zone *zone, unsigned int order)
  {
  	struct contig_page_info info;
  
  	fill_contig_page_info(zone, order, &info);
  	return __fragmentation_index(order, &info);
  }
d7a5752c0   Mel Gorman   mm: export unusab...
1045
  #endif
0d6617c77   David Rientjes   numa: fix NUMA co...
1046
  #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
  #ifdef CONFIG_ZONE_DMA
  #define TEXT_FOR_DMA(xx) xx "_dma",
  #else
  #define TEXT_FOR_DMA(xx)
  #endif
  
  #ifdef CONFIG_ZONE_DMA32
  #define TEXT_FOR_DMA32(xx) xx "_dma32",
  #else
  #define TEXT_FOR_DMA32(xx)
  #endif
  
  #ifdef CONFIG_HIGHMEM
  #define TEXT_FOR_HIGHMEM(xx) xx "_high",
  #else
  #define TEXT_FOR_HIGHMEM(xx)
  #endif
  
  #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
  					TEXT_FOR_HIGHMEM(xx) xx "_movable",
  
  const char * const vmstat_text[] = {
09316c09d   Konstantin Khlebnikov   mm/balloon_compac...
1069
  	/* enum zone_stat_item countes */
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1070
  	"nr_free_pages",
71c799f49   Minchan Kim   mm: add per-zone ...
1071
1072
1073
1074
1075
  	"nr_zone_inactive_anon",
  	"nr_zone_active_anon",
  	"nr_zone_inactive_file",
  	"nr_zone_active_file",
  	"nr_zone_unevictable",
5a1c84b40   Mel Gorman   mm: remove reclai...
1076
  	"nr_zone_write_pending",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1077
  	"nr_mlock",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1078
1079
  	"nr_page_table_pages",
  	"nr_kernel_stack",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1080
  	"nr_bounce",
91537fee0   Minchan Kim   mm: add NR_ZSMALL...
1081
1082
1083
  #if IS_ENABLED(CONFIG_ZSMALLOC)
  	"nr_zspages",
  #endif
3a321d2a3   Kemi Wang   mm: change the ca...
1084
1085
1086
  	"nr_free_cma",
  
  	/* enum numa_stat_item counters */
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1087
1088
1089
1090
1091
1092
1093
1094
  #ifdef CONFIG_NUMA
  	"numa_hit",
  	"numa_miss",
  	"numa_foreign",
  	"numa_interleave",
  	"numa_local",
  	"numa_other",
  #endif
09316c09d   Konstantin Khlebnikov   mm/balloon_compac...
1095

599d0c954   Mel Gorman   mm, vmscan: move ...
1096
1097
1098
1099
1100
1101
  	/* Node-based counters */
  	"nr_inactive_anon",
  	"nr_active_anon",
  	"nr_inactive_file",
  	"nr_active_file",
  	"nr_unevictable",
385386cff   Johannes Weiner   mm: vmstat: move ...
1102
1103
  	"nr_slab_reclaimable",
  	"nr_slab_unreclaimable",
599d0c954   Mel Gorman   mm, vmscan: move ...
1104
1105
  	"nr_isolated_anon",
  	"nr_isolated_file",
1e6b10857   Mel Gorman   mm, workingset: m...
1106
1107
1108
  	"workingset_refault",
  	"workingset_activate",
  	"workingset_nodereclaim",
50658e2e0   Mel Gorman   mm: move page map...
1109
1110
  	"nr_anon_pages",
  	"nr_mapped",
11fb99898   Mel Gorman   mm: move most fil...
1111
1112
1113
1114
1115
1116
1117
1118
1119
  	"nr_file_pages",
  	"nr_dirty",
  	"nr_writeback",
  	"nr_writeback_temp",
  	"nr_shmem",
  	"nr_shmem_hugepages",
  	"nr_shmem_pmdmapped",
  	"nr_anon_transparent_hugepages",
  	"nr_unstable",
c4a25635b   Mel Gorman   mm: move vmscan w...
1120
1121
1122
1123
  	"nr_vmscan_write",
  	"nr_vmscan_immediate_reclaim",
  	"nr_dirtied",
  	"nr_written",
7aaf77272   Roman Gushchin   mm: don't show nr...
1124
  	"", /* nr_indirectly_reclaimable */
599d0c954   Mel Gorman   mm, vmscan: move ...
1125

09316c09d   Konstantin Khlebnikov   mm/balloon_compac...
1126
  	/* enum writeback_stat_item counters */
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1127
1128
1129
1130
  	"nr_dirty_threshold",
  	"nr_dirty_background_threshold",
  
  #ifdef CONFIG_VM_EVENT_COUNTERS
09316c09d   Konstantin Khlebnikov   mm/balloon_compac...
1131
  	/* enum vm_event_item counters */
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1132
1133
1134
1135
1136
1137
  	"pgpgin",
  	"pgpgout",
  	"pswpin",
  	"pswpout",
  
  	TEXTS_FOR_ZONES("pgalloc")
7cc30fcfd   Mel Gorman   mm: vmstat: accou...
1138
1139
  	TEXTS_FOR_ZONES("allocstall")
  	TEXTS_FOR_ZONES("pgskip")
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1140
1141
1142
1143
  
  	"pgfree",
  	"pgactivate",
  	"pgdeactivate",
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
1144
  	"pglazyfree",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1145
1146
1147
  
  	"pgfault",
  	"pgmajfault",
854e9ed09   Minchan Kim   mm: support madvi...
1148
  	"pglazyfreed",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1149

599d0c954   Mel Gorman   mm, vmscan: move ...
1150
1151
1152
1153
1154
  	"pgrefill",
  	"pgsteal_kswapd",
  	"pgsteal_direct",
  	"pgscan_kswapd",
  	"pgscan_direct",
68243e76e   Mel Gorman   mm: account for t...
1155
  	"pgscan_direct_throttle",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1156
1157
1158
1159
1160
1161
  
  #ifdef CONFIG_NUMA
  	"zone_reclaim_failed",
  #endif
  	"pginodesteal",
  	"slabs_scanned",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1162
1163
1164
  	"kswapd_inodesteal",
  	"kswapd_low_wmark_hit_quickly",
  	"kswapd_high_wmark_hit_quickly",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1165
  	"pageoutrun",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1166
1167
  
  	"pgrotated",
5509a5d27   Dave Hansen   drop_caches: add ...
1168
1169
  	"drop_pagecache",
  	"drop_slab",
8e675f7af   Konstantin Khlebnikov   mm/oom_kill: coun...
1170
  	"oom_kill",
5509a5d27   Dave Hansen   drop_caches: add ...
1171

03c5a6e16   Mel Gorman   mm: numa: Add pte...
1172
1173
  #ifdef CONFIG_NUMA_BALANCING
  	"numa_pte_updates",
72403b4a0   Mel Gorman   mm: numa: return ...
1174
  	"numa_huge_pte_updates",
03c5a6e16   Mel Gorman   mm: numa: Add pte...
1175
1176
1177
1178
  	"numa_hint_faults",
  	"numa_hint_faults_local",
  	"numa_pages_migrated",
  #endif
5647bc293   Mel Gorman   mm: compaction: M...
1179
1180
1181
1182
  #ifdef CONFIG_MIGRATION
  	"pgmigrate_success",
  	"pgmigrate_fail",
  #endif
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1183
  #ifdef CONFIG_COMPACTION
397487db6   Mel Gorman   mm: compaction: A...
1184
1185
1186
  	"compact_migrate_scanned",
  	"compact_free_scanned",
  	"compact_isolated",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1187
1188
1189
  	"compact_stall",
  	"compact_fail",
  	"compact_success",
698b1b306   Vlastimil Babka   mm, compaction: i...
1190
  	"compact_daemon_wake",
7f354a548   David Rientjes   mm, compaction: a...
1191
1192
  	"compact_daemon_migrate_scanned",
  	"compact_daemon_free_scanned",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
  #endif
  
  #ifdef CONFIG_HUGETLB_PAGE
  	"htlb_buddy_alloc_success",
  	"htlb_buddy_alloc_fail",
  #endif
  	"unevictable_pgs_culled",
  	"unevictable_pgs_scanned",
  	"unevictable_pgs_rescued",
  	"unevictable_pgs_mlocked",
  	"unevictable_pgs_munlocked",
  	"unevictable_pgs_cleared",
  	"unevictable_pgs_stranded",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1206
1207
1208
1209
1210
1211
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  	"thp_fault_alloc",
  	"thp_fault_fallback",
  	"thp_collapse_alloc",
  	"thp_collapse_alloc_failed",
95ecedcd6   Kirill A. Shutemov   thp, vmstats: add...
1212
1213
  	"thp_file_alloc",
  	"thp_file_mapped",
122afea96   Kirill A. Shutemov   mm, vmstats: new ...
1214
1215
  	"thp_split_page",
  	"thp_split_page_failed",
f9719a03d   Kirill A. Shutemov   thp, vmstats: cou...
1216
  	"thp_deferred_split_page",
122afea96   Kirill A. Shutemov   mm, vmstats: new ...
1217
  	"thp_split_pmd",
ce9311cf9   Yisheng Xie   mm/vmstats: add t...
1218
1219
1220
  #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
  	"thp_split_pud",
  #endif
d8a8e1f0d   Kirill A. Shutemov   thp, vmstat: impl...
1221
1222
  	"thp_zero_page_alloc",
  	"thp_zero_page_alloc_failed",
225311a46   Huang Ying   mm: test code to ...
1223
  	"thp_swpout",
fe490cc0f   Huang Ying   mm, THP, swap: ad...
1224
  	"thp_swpout_fallback",
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1225
  #endif
09316c09d   Konstantin Khlebnikov   mm/balloon_compac...
1226
1227
1228
1229
1230
1231
1232
  #ifdef CONFIG_MEMORY_BALLOON
  	"balloon_inflate",
  	"balloon_deflate",
  #ifdef CONFIG_BALLOON_COMPACTION
  	"balloon_migrate",
  #endif
  #endif /* CONFIG_MEMORY_BALLOON */
ec6599344   Mel Gorman   mm, x86: Account ...
1233
  #ifdef CONFIG_DEBUG_TLBFLUSH
6df46865f   Dave Hansen   mm: vmstats: trac...
1234
  #ifdef CONFIG_SMP
9824cf975   Dave Hansen   mm: vmstats: tlb ...
1235
1236
  	"nr_tlb_remote_flush",
  	"nr_tlb_remote_flush_received",
58bc4c34d   Jann Horn   mm/vmstat.c: skip...
1237
1238
1239
  #else
  	"", /* nr_tlb_remote_flush */
  	"", /* nr_tlb_remote_flush_received */
ec6599344   Mel Gorman   mm, x86: Account ...
1240
  #endif /* CONFIG_SMP */
9824cf975   Dave Hansen   mm: vmstats: tlb ...
1241
1242
  	"nr_tlb_local_flush_all",
  	"nr_tlb_local_flush_one",
ec6599344   Mel Gorman   mm, x86: Account ...
1243
  #endif /* CONFIG_DEBUG_TLBFLUSH */
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1244

4f115147f   Davidlohr Bueso   mm,vmacache: add ...
1245
1246
1247
1248
  #ifdef CONFIG_DEBUG_VM_VMACACHE
  	"vmacache_find_calls",
  	"vmacache_find_hits",
  #endif
cbc65df24   Huang Ying   mm, swap: add swa...
1249
1250
1251
1252
  #ifdef CONFIG_SWAP
  	"swap_ra",
  	"swap_ra_hit",
  #endif
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1253
1254
  #endif /* CONFIG_VM_EVENTS_COUNTERS */
  };
0d6617c77   David Rientjes   numa: fix NUMA co...
1255
  #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
fa25c503d   KOSAKI Motohiro   mm: per-node vmst...
1256

3c4868710   Andrew Morton   mm/vmstat.c: fix/...
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
  #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
       defined(CONFIG_PROC_FS)
  static void *frag_start(struct seq_file *m, loff_t *pos)
  {
  	pg_data_t *pgdat;
  	loff_t node = *pos;
  
  	for (pgdat = first_online_pgdat();
  	     pgdat && node;
  	     pgdat = next_online_pgdat(pgdat))
  		--node;
  
  	return pgdat;
  }
  
  static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
  {
  	pg_data_t *pgdat = (pg_data_t *)arg;
  
  	(*pos)++;
  	return next_online_pgdat(pgdat);
  }
  
  static void frag_stop(struct seq_file *m, void *arg)
  {
  }
b2bd85981   David Rientjes   mm, vmstat: print...
1283
1284
1285
1286
  /*
   * Walk zones in a node and print using a callback.
   * If @assert_populated is true, only use callback for zones that are populated.
   */
3c4868710   Andrew Morton   mm/vmstat.c: fix/...
1287
  static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
727c080f0   Vinayak Menon   mm: avoid taking ...
1288
  		bool assert_populated, bool nolock,
3c4868710   Andrew Morton   mm/vmstat.c: fix/...
1289
1290
1291
1292
1293
1294
1295
  		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
  {
  	struct zone *zone;
  	struct zone *node_zones = pgdat->node_zones;
  	unsigned long flags;
  
  	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
b2bd85981   David Rientjes   mm, vmstat: print...
1296
  		if (assert_populated && !populated_zone(zone))
3c4868710   Andrew Morton   mm/vmstat.c: fix/...
1297
  			continue;
727c080f0   Vinayak Menon   mm: avoid taking ...
1298
1299
  		if (!nolock)
  			spin_lock_irqsave(&zone->lock, flags);
3c4868710   Andrew Morton   mm/vmstat.c: fix/...
1300
  		print(m, pgdat, zone);
727c080f0   Vinayak Menon   mm: avoid taking ...
1301
1302
  		if (!nolock)
  			spin_unlock_irqrestore(&zone->lock, flags);
3c4868710   Andrew Morton   mm/vmstat.c: fix/...
1303
1304
1305
  	}
  }
  #endif
d7a5752c0   Mel Gorman   mm: export unusab...
1306
  #ifdef CONFIG_PROC_FS
467c996c1   Mel Gorman   Print out statist...
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
  static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
  						struct zone *zone)
  {
  	int order;
  
  	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
  	for (order = 0; order < MAX_ORDER; ++order)
  		seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
  	seq_putc(m, '
  ');
  }
  
  /*
   * This walks the free areas for each zone.
   */
  static int frag_show(struct seq_file *m, void *arg)
  {
  	pg_data_t *pgdat = (pg_data_t *)arg;
727c080f0   Vinayak Menon   mm: avoid taking ...
1325
  	walk_zones_in_node(m, pgdat, true, false, frag_show_print);
467c996c1   Mel Gorman   Print out statist...
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
  	return 0;
  }
  
  static void pagetypeinfo_showfree_print(struct seq_file *m,
  					pg_data_t *pgdat, struct zone *zone)
  {
  	int order, mtype;
  
  	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
  		seq_printf(m, "Node %4d, zone %8s, type %12s ",
  					pgdat->node_id,
  					zone->name,
  					migratetype_names[mtype]);
  		for (order = 0; order < MAX_ORDER; ++order) {
  			unsigned long freecount = 0;
  			struct free_area *area;
  			struct list_head *curr;
  
  			area = &(zone->free_area[order]);
  
  			list_for_each(curr, &area->free_list[mtype])
  				freecount++;
  			seq_printf(m, "%6lu ", freecount);
  		}
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1350
1351
1352
  		seq_putc(m, '
  ');
  	}
467c996c1   Mel Gorman   Print out statist...
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
  }
  
  /* Print out the free pages at each order for each migatetype */
  static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
  {
  	int order;
  	pg_data_t *pgdat = (pg_data_t *)arg;
  
  	/* Print header */
  	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
  	for (order = 0; order < MAX_ORDER; ++order)
  		seq_printf(m, "%6d ", order);
  	seq_putc(m, '
  ');
727c080f0   Vinayak Menon   mm: avoid taking ...
1367
  	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
467c996c1   Mel Gorman   Print out statist...
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
  
  	return 0;
  }
  
  static void pagetypeinfo_showblockcount_print(struct seq_file *m,
  					pg_data_t *pgdat, struct zone *zone)
  {
  	int mtype;
  	unsigned long pfn;
  	unsigned long start_pfn = zone->zone_start_pfn;
108bcc96e   Cody P Schafer   mm: add & use zon...
1378
  	unsigned long end_pfn = zone_end_pfn(zone);
467c996c1   Mel Gorman   Print out statist...
1379
1380
1381
1382
  	unsigned long count[MIGRATE_TYPES] = { 0, };
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
  		struct page *page;
d336e94e4   Michal Hocko   mm, vmstat: skip ...
1383
1384
  		page = pfn_to_online_page(pfn);
  		if (!page)
467c996c1   Mel Gorman   Print out statist...
1385
  			continue;
eb33575cf   Mel Gorman   [ARM] Double chec...
1386
1387
  		/* Watch for unexpected holes punched in the memmap */
  		if (!memmap_valid_within(pfn, page, zone))
e80d6a248   Mel Gorman   [ARM] Skip memory...
1388
  			continue;
eb33575cf   Mel Gorman   [ARM] Double chec...
1389

a91c43c73   Joonsoo Kim   mm/vmstat: add zo...
1390
1391
  		if (page_zone(page) != zone)
  			continue;
467c996c1   Mel Gorman   Print out statist...
1392
  		mtype = get_pageblock_migratetype(page);
e80d6a248   Mel Gorman   [ARM] Skip memory...
1393
1394
  		if (mtype < MIGRATE_TYPES)
  			count[mtype]++;
467c996c1   Mel Gorman   Print out statist...
1395
1396
1397
1398
1399
1400
1401
1402
1403
  	}
  
  	/* Print counts */
  	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
  	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
  		seq_printf(m, "%12lu ", count[mtype]);
  	seq_putc(m, '
  ');
  }
f113e6412   SeongJae Park   mm/vmstat.c: fix ...
1404
  /* Print out the number of pageblocks for each migratetype */
467c996c1   Mel Gorman   Print out statist...
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
  static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
  {
  	int mtype;
  	pg_data_t *pgdat = (pg_data_t *)arg;
  
  	seq_printf(m, "
  %-23s", "Number of blocks type ");
  	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
  		seq_printf(m, "%12s ", migratetype_names[mtype]);
  	seq_putc(m, '
  ');
727c080f0   Vinayak Menon   mm: avoid taking ...
1416
1417
  	walk_zones_in_node(m, pgdat, true, false,
  		pagetypeinfo_showblockcount_print);
467c996c1   Mel Gorman   Print out statist...
1418
1419
1420
  
  	return 0;
  }
48c96a368   Joonsoo Kim   mm/page_owner: ke...
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
  /*
   * Print out the number of pageblocks for each migratetype that contain pages
   * of other types. This gives an indication of how well fallbacks are being
   * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
   * to determine what is going on
   */
  static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
  {
  #ifdef CONFIG_PAGE_OWNER
  	int mtype;
7dd80b8af   Vlastimil Babka   mm, page_owner: c...
1431
  	if (!static_branch_unlikely(&page_owner_inited))
48c96a368   Joonsoo Kim   mm/page_owner: ke...
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
  		return;
  
  	drain_all_pages(NULL);
  
  	seq_printf(m, "
  %-23s", "Number of mixed blocks ");
  	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
  		seq_printf(m, "%12s ", migratetype_names[mtype]);
  	seq_putc(m, '
  ');
727c080f0   Vinayak Menon   mm: avoid taking ...
1442
1443
  	walk_zones_in_node(m, pgdat, true, true,
  		pagetypeinfo_showmixedcount_print);
48c96a368   Joonsoo Kim   mm/page_owner: ke...
1444
1445
  #endif /* CONFIG_PAGE_OWNER */
  }
467c996c1   Mel Gorman   Print out statist...
1446
1447
1448
1449
1450
1451
1452
  /*
   * This prints out statistics in relation to grouping pages by mobility.
   * It is expensive to collect so do not constantly read the file.
   */
  static int pagetypeinfo_show(struct seq_file *m, void *arg)
  {
  	pg_data_t *pgdat = (pg_data_t *)arg;
41b25a378   KOSAKI Motohiro   /proc/pagetypeinf...
1453
  	/* check memoryless node */
a47b53c5f   Lai Jiangshan   vmstat: use N_MEM...
1454
  	if (!node_state(pgdat->node_id, N_MEMORY))
41b25a378   KOSAKI Motohiro   /proc/pagetypeinf...
1455
  		return 0;
467c996c1   Mel Gorman   Print out statist...
1456
1457
1458
1459
1460
1461
1462
1463
  	seq_printf(m, "Page block order: %d
  ", pageblock_order);
  	seq_printf(m, "Pages per block:  %lu
  ", pageblock_nr_pages);
  	seq_putc(m, '
  ');
  	pagetypeinfo_showfree(m, pgdat);
  	pagetypeinfo_showblockcount(m, pgdat);
48c96a368   Joonsoo Kim   mm/page_owner: ke...
1464
  	pagetypeinfo_showmixedcount(m, pgdat);
467c996c1   Mel Gorman   Print out statist...
1465

f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1466
1467
  	return 0;
  }
8f32f7e5a   Alexey Dobriyan   proc: move /proc/...
1468
  static const struct seq_operations fragmentation_op = {
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1469
1470
1471
1472
1473
  	.start	= frag_start,
  	.next	= frag_next,
  	.stop	= frag_stop,
  	.show	= frag_show,
  };
74e2e8e8c   Alexey Dobriyan   proc: move /proc/...
1474
  static const struct seq_operations pagetypeinfo_op = {
467c996c1   Mel Gorman   Print out statist...
1475
1476
1477
1478
1479
  	.start	= frag_start,
  	.next	= frag_next,
  	.stop	= frag_stop,
  	.show	= pagetypeinfo_show,
  };
e2ecc8a79   Mel Gorman   mm, vmstat: print...
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
  static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
  {
  	int zid;
  
  	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
  		struct zone *compare = &pgdat->node_zones[zid];
  
  		if (populated_zone(compare))
  			return zone == compare;
  	}
e2ecc8a79   Mel Gorman   mm, vmstat: print...
1490
1491
  	return false;
  }
467c996c1   Mel Gorman   Print out statist...
1492
1493
  static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
  							struct zone *zone)
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1494
  {
467c996c1   Mel Gorman   Print out statist...
1495
1496
  	int i;
  	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
e2ecc8a79   Mel Gorman   mm, vmstat: print...
1497
1498
1499
1500
1501
1502
  	if (is_zone_first_populated(pgdat, zone)) {
  		seq_printf(m, "
    per-node stats");
  		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
  			seq_printf(m, "
        %-12s %lu",
3a321d2a3   Kemi Wang   mm: change the ca...
1503
1504
  				vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
  				NR_VM_NUMA_STAT_ITEMS],
e2ecc8a79   Mel Gorman   mm, vmstat: print...
1505
1506
1507
  				node_page_state(pgdat, i));
  		}
  	}
467c996c1   Mel Gorman   Print out statist...
1508
1509
1510
1511
1512
1513
1514
1515
1516
  	seq_printf(m,
  		   "
    pages free     %lu"
  		   "
          min      %lu"
  		   "
          low      %lu"
  		   "
          high     %lu"
467c996c1   Mel Gorman   Print out statist...
1517
1518
  		   "
          spanned  %lu"
9feedc9d8   Jiang Liu   mm: introduce new...
1519
1520
1521
1522
  		   "
          present  %lu"
  		   "
          managed  %lu",
88f5acf88   Mel Gorman   mm: page allocato...
1523
  		   zone_page_state(zone, NR_FREE_PAGES),
418589663   Mel Gorman   page allocator: u...
1524
1525
1526
  		   min_wmark_pages(zone),
  		   low_wmark_pages(zone),
  		   high_wmark_pages(zone),
467c996c1   Mel Gorman   Print out statist...
1527
  		   zone->spanned_pages,
9feedc9d8   Jiang Liu   mm: introduce new...
1528
1529
  		   zone->present_pages,
  		   zone->managed_pages);
467c996c1   Mel Gorman   Print out statist...
1530

467c996c1   Mel Gorman   Print out statist...
1531
  	seq_printf(m,
3484b2de9   Mel Gorman   mm: rearrange zon...
1532
1533
  		   "
          protection: (%ld",
467c996c1   Mel Gorman   Print out statist...
1534
1535
  		   zone->lowmem_reserve[0]);
  	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
3484b2de9   Mel Gorman   mm: rearrange zon...
1536
  		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
7dfb8bf3b   David Rientjes   mm, vmstat: suppr...
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
  	seq_putc(m, ')');
  
  	/* If unpopulated, no other information is useful */
  	if (!populated_zone(zone)) {
  		seq_putc(m, '
  ');
  		return;
  	}
  
  	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  		seq_printf(m, "
        %-12s %lu", vmstat_text[i],
  				zone_page_state(zone, i));
3a321d2a3   Kemi Wang   mm: change the ca...
1550
1551
1552
1553
1554
  #ifdef CONFIG_NUMA
  	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
  		seq_printf(m, "
        %-12s %lu",
  				vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
638032224   Kemi Wang   mm: consider the ...
1555
  				zone_numa_state_snapshot(zone, i));
3a321d2a3   Kemi Wang   mm: change the ca...
1556
  #endif
7dfb8bf3b   David Rientjes   mm, vmstat: suppr...
1557
1558
  	seq_printf(m, "
    pagesets");
467c996c1   Mel Gorman   Print out statist...
1559
1560
  	for_each_online_cpu(i) {
  		struct per_cpu_pageset *pageset;
467c996c1   Mel Gorman   Print out statist...
1561

99dcc3e5a   Christoph Lameter   this_cpu: Page al...
1562
  		pageset = per_cpu_ptr(zone->pageset, i);
3dfa5721f   Christoph Lameter   Page allocator: g...
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
  		seq_printf(m,
  			   "
      cpu: %i"
  			   "
                count: %i"
  			   "
                high:  %i"
  			   "
                batch: %i",
  			   i,
  			   pageset->pcp.count,
  			   pageset->pcp.high,
  			   pageset->pcp.batch);
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
1576
  #ifdef CONFIG_SMP
467c996c1   Mel Gorman   Print out statist...
1577
1578
1579
  		seq_printf(m, "
    vm stats threshold: %d",
  				pageset->stat_threshold);
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
1580
  #endif
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1581
  	}
467c996c1   Mel Gorman   Print out statist...
1582
  	seq_printf(m,
599d0c954   Mel Gorman   mm, vmscan: move ...
1583
1584
  		   "
    node_unreclaimable:  %u"
3a50d14d0   Andrey Ryabinin   mm: remove unused...
1585
1586
  		   "
    start_pfn:           %lu",
c73322d09   Johannes Weiner   mm: fix 100% CPU ...
1587
  		   pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
3a50d14d0   Andrey Ryabinin   mm: remove unused...
1588
  		   zone->zone_start_pfn);
467c996c1   Mel Gorman   Print out statist...
1589
1590
1591
1592
1593
  	seq_putc(m, '
  ');
  }
  
  /*
b2bd85981   David Rientjes   mm, vmstat: print...
1594
1595
1596
1597
   * Output information about zones in @pgdat.  All zones are printed regardless
   * of whether they are populated or not: lowmem_reserve_ratio operates on the
   * set of all zones and userspace would not be aware of such zones if they are
   * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
467c996c1   Mel Gorman   Print out statist...
1598
1599
1600
1601
   */
  static int zoneinfo_show(struct seq_file *m, void *arg)
  {
  	pg_data_t *pgdat = (pg_data_t *)arg;
727c080f0   Vinayak Menon   mm: avoid taking ...
1602
  	walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1603
1604
  	return 0;
  }
5c9fe6281   Alexey Dobriyan   proc: move /proc/...
1605
  static const struct seq_operations zoneinfo_op = {
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1606
1607
1608
1609
1610
1611
  	.start	= frag_start, /* iterate over all zones. The same as in
  			       * fragmentation. */
  	.next	= frag_next,
  	.stop	= frag_stop,
  	.show	= zoneinfo_show,
  };
79da826ae   Michael Rubin   writeback: report...
1612
1613
1614
1615
1616
  enum writeback_stat_item {
  	NR_DIRTY_THRESHOLD,
  	NR_DIRTY_BG_THRESHOLD,
  	NR_VM_WRITEBACK_STAT_ITEMS,
  };
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1617
1618
  static void *vmstat_start(struct seq_file *m, loff_t *pos)
  {
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
1619
  	unsigned long *v;
79da826ae   Michael Rubin   writeback: report...
1620
  	int i, stat_items_size;
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1621
1622
1623
  
  	if (*pos >= ARRAY_SIZE(vmstat_text))
  		return NULL;
79da826ae   Michael Rubin   writeback: report...
1624
  	stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
3a321d2a3   Kemi Wang   mm: change the ca...
1625
  			  NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
75ef71840   Mel Gorman   mm, vmstat: add i...
1626
  			  NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
79da826ae   Michael Rubin   writeback: report...
1627
  			  NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1628

f8891e5e1   Christoph Lameter   [PATCH] Light wei...
1629
  #ifdef CONFIG_VM_EVENT_COUNTERS
79da826ae   Michael Rubin   writeback: report...
1630
  	stat_items_size += sizeof(struct vm_event_state);
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
1631
  #endif
79da826ae   Michael Rubin   writeback: report...
1632
1633
  
  	v = kmalloc(stat_items_size, GFP_KERNEL);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
1634
1635
  	m->private = v;
  	if (!v)
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1636
  		return ERR_PTR(-ENOMEM);
2244b95a7   Christoph Lameter   [PATCH] zoned vm ...
1637
  	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
c41f012ad   Michal Hocko   mm: rename global...
1638
  		v[i] = global_zone_page_state(i);
79da826ae   Michael Rubin   writeback: report...
1639
  	v += NR_VM_ZONE_STAT_ITEMS;
3a321d2a3   Kemi Wang   mm: change the ca...
1640
1641
1642
1643
1644
  #ifdef CONFIG_NUMA
  	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
  		v[i] = global_numa_state(i);
  	v += NR_VM_NUMA_STAT_ITEMS;
  #endif
75ef71840   Mel Gorman   mm, vmstat: add i...
1645
1646
1647
  	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
  		v[i] = global_node_page_state(i);
  	v += NR_VM_NODE_STAT_ITEMS;
79da826ae   Michael Rubin   writeback: report...
1648
1649
1650
  	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
  			    v + NR_DIRTY_THRESHOLD);
  	v += NR_VM_WRITEBACK_STAT_ITEMS;
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
1651
  #ifdef CONFIG_VM_EVENT_COUNTERS
79da826ae   Michael Rubin   writeback: report...
1652
1653
1654
  	all_vm_events(v);
  	v[PGPGIN] /= 2;		/* sectors -> kbytes */
  	v[PGPGOUT] /= 2;
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
1655
  #endif
ff8b16d7e   Wu Fengguang   vmstat: fix offse...
1656
  	return (unsigned long *)m->private + *pos;
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
  }
  
  static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
  {
  	(*pos)++;
  	if (*pos >= ARRAY_SIZE(vmstat_text))
  		return NULL;
  	return (unsigned long *)m->private + *pos;
  }
  
  static int vmstat_show(struct seq_file *m, void *arg)
  {
  	unsigned long *l = arg;
  	unsigned long off = l - (unsigned long *)m->private;
68ba0326b   Alexey Dobriyan   proc: much faster...
1671

7aaf77272   Roman Gushchin   mm: don't show nr...
1672
1673
1674
  	/* Skip hidden vmstat items. */
  	if (*vmstat_text[off] == '\0')
  		return 0;
68ba0326b   Alexey Dobriyan   proc: much faster...
1675
  	seq_puts(m, vmstat_text[off]);
75ba1d07f   Joe Perches   seq/proc: modify ...
1676
  	seq_put_decimal_ull(m, " ", *l);
68ba0326b   Alexey Dobriyan   proc: much faster...
1677
1678
  	seq_putc(m, '
  ');
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1679
1680
1681
1682
1683
1684
1685
1686
  	return 0;
  }
  
  static void vmstat_stop(struct seq_file *m, void *arg)
  {
  	kfree(m->private);
  	m->private = NULL;
  }
b6aa44ab6   Alexey Dobriyan   proc: move /proc/...
1687
  static const struct seq_operations vmstat_op = {
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1688
1689
1690
1691
1692
  	.start	= vmstat_start,
  	.next	= vmstat_next,
  	.stop	= vmstat_stop,
  	.show	= vmstat_show,
  };
f6ac2354d   Christoph Lameter   [PATCH] zoned vm ...
1693
  #endif /* CONFIG_PROC_FS */
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
1694
  #ifdef CONFIG_SMP
d1187ed21   Christoph Lameter   vmstat: use our o...
1695
  static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
77461ab33   Christoph Lameter   Make vm statistic...
1696
  int sysctl_stat_interval __read_mostly = HZ;
d1187ed21   Christoph Lameter   vmstat: use our o...
1697

52b6f46bc   Hugh Dickins   mm: /proc/sys/vm/...
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
  #ifdef CONFIG_PROC_FS
  static void refresh_vm_stats(struct work_struct *work)
  {
  	refresh_cpu_vm_stats(true);
  }
  
  int vmstat_refresh(struct ctl_table *table, int write,
  		   void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	long val;
  	int err;
  	int i;
  
  	/*
  	 * The regular update, every sysctl_stat_interval, may come later
  	 * than expected: leaving a significant amount in per_cpu buckets.
  	 * This is particularly misleading when checking a quantity of HUGE
  	 * pages, immediately after running a test.  /proc/sys/vm/stat_refresh,
  	 * which can equally be echo'ed to or cat'ted from (by root),
  	 * can be used to update the stats just before reading them.
  	 *
c41f012ad   Michal Hocko   mm: rename global...
1719
  	 * Oh, and since global_zone_page_state() etc. are so careful to hide
52b6f46bc   Hugh Dickins   mm: /proc/sys/vm/...
1720
1721
1722
1723
1724
1725
1726
  	 * transiently negative values, report an error here if any of
  	 * the stats is negative, so we know to go looking for imbalance.
  	 */
  	err = schedule_on_each_cpu(refresh_vm_stats);
  	if (err)
  		return err;
  	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
75ef71840   Mel Gorman   mm, vmstat: add i...
1727
  		val = atomic_long_read(&vm_zone_stat[i]);
52b6f46bc   Hugh Dickins   mm: /proc/sys/vm/...
1728
  		if (val < 0) {
c822f6223   Johannes Weiner   mm: delete NR_PAG...
1729
1730
1731
1732
  			pr_warn("%s: %s %ld
  ",
  				__func__, vmstat_text[i], val);
  			err = -EINVAL;
52b6f46bc   Hugh Dickins   mm: /proc/sys/vm/...
1733
1734
  		}
  	}
3a321d2a3   Kemi Wang   mm: change the ca...
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
  #ifdef CONFIG_NUMA
  	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
  		val = atomic_long_read(&vm_numa_stat[i]);
  		if (val < 0) {
  			pr_warn("%s: %s %ld
  ",
  				__func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
  			err = -EINVAL;
  		}
  	}
  #endif
52b6f46bc   Hugh Dickins   mm: /proc/sys/vm/...
1746
1747
1748
1749
1750
1751
1752
1753
1754
  	if (err)
  		return err;
  	if (write)
  		*ppos += *lenp;
  	else
  		*lenp = 0;
  	return 0;
  }
  #endif /* CONFIG_PROC_FS */
d1187ed21   Christoph Lameter   vmstat: use our o...
1755
1756
  static void vmstat_update(struct work_struct *w)
  {
0eb77e988   Christoph Lameter   vmstat: make vmst...
1757
  	if (refresh_cpu_vm_stats(true)) {
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1758
1759
1760
1761
1762
  		/*
  		 * Counters were updated so we expect more updates
  		 * to occur in the future. Keep on running the
  		 * update worker thread.
  		 */
ce612879d   Michal Hocko   mm: move pcp and ...
1763
  		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
f01f17d37   Michal Hocko   mm, vmstat: make ...
1764
1765
  				this_cpu_ptr(&vmstat_work),
  				round_jiffies_relative(sysctl_stat_interval));
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1766
1767
1768
1769
  	}
  }
  
  /*
0eb77e988   Christoph Lameter   vmstat: make vmst...
1770
1771
1772
1773
   * Switch off vmstat processing and then fold all the remaining differentials
   * until the diffs stay at zero. The function is used by NOHZ and can only be
   * invoked when tick processing is not active.
   */
0eb77e988   Christoph Lameter   vmstat: make vmst...
1774
  /*
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
   * Check if the diffs for a certain cpu indicate that
   * an update is needed.
   */
  static bool need_update(int cpu)
  {
  	struct zone *zone;
  
  	for_each_populated_zone(zone) {
  		struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
  
  		BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
3a321d2a3   Kemi Wang   mm: change the ca...
1786
  #ifdef CONFIG_NUMA
1d90ca897   Kemi Wang   mm: update NUMA c...
1787
  		BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
3a321d2a3   Kemi Wang   mm: change the ca...
1788
  #endif
638032224   Kemi Wang   mm: consider the ...
1789

7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1790
1791
1792
1793
1794
1795
  		/*
  		 * The fast way of checking if there are any vmstat diffs.
  		 * This works because the diffs are byte sized items.
  		 */
  		if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
  			return true;
3a321d2a3   Kemi Wang   mm: change the ca...
1796
1797
1798
1799
  #ifdef CONFIG_NUMA
  		if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS))
  			return true;
  #endif
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1800
1801
1802
  	}
  	return false;
  }
7b8da4c7f   Christoph Lameter   vmstat: get rid o...
1803
1804
1805
1806
1807
  /*
   * Switch off vmstat processing and then fold all the remaining differentials
   * until the diffs stay at zero. The function is used by NOHZ and can only be
   * invoked when tick processing is not active.
   */
f01f17d37   Michal Hocko   mm, vmstat: make ...
1808
1809
1810
1811
  void quiet_vmstat(void)
  {
  	if (system_state != SYSTEM_RUNNING)
  		return;
7b8da4c7f   Christoph Lameter   vmstat: get rid o...
1812
  	if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
f01f17d37   Michal Hocko   mm, vmstat: make ...
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
  		return;
  
  	if (!need_update(smp_processor_id()))
  		return;
  
  	/*
  	 * Just refresh counters and do not care about the pending delayed
  	 * vmstat_update. It doesn't fire that often to matter and canceling
  	 * it would be too expensive from this path.
  	 * vmstat_shepherd will take care about that for us.
  	 */
  	refresh_cpu_vm_stats(false);
  }
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1826
1827
1828
1829
1830
1831
1832
  /*
   * Shepherd worker thread that checks the
   * differentials of processors that have their worker
   * threads for vm statistics updates disabled because of
   * inactivity.
   */
  static void vmstat_shepherd(struct work_struct *w);
0eb77e988   Christoph Lameter   vmstat: make vmst...
1833
  static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1834
1835
1836
1837
1838
1839
1840
  
  static void vmstat_shepherd(struct work_struct *w)
  {
  	int cpu;
  
  	get_online_cpus();
  	/* Check processors whose vmstat worker threads have been disabled */
7b8da4c7f   Christoph Lameter   vmstat: get rid o...
1841
  	for_each_online_cpu(cpu) {
f01f17d37   Michal Hocko   mm, vmstat: make ...
1842
  		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1843

7b8da4c7f   Christoph Lameter   vmstat: get rid o...
1844
  		if (!delayed_work_pending(dw) && need_update(cpu))
ce612879d   Michal Hocko   mm: move pcp and ...
1845
  			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
f01f17d37   Michal Hocko   mm, vmstat: make ...
1846
  	}
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1847
1848
1849
  	put_online_cpus();
  
  	schedule_delayed_work(&shepherd,
98f4ebb29   Anton Blanchard   mm: align vmstat_...
1850
  		round_jiffies_relative(sysctl_stat_interval));
d1187ed21   Christoph Lameter   vmstat: use our o...
1851
  }
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1852
  static void __init start_shepherd_timer(void)
d1187ed21   Christoph Lameter   vmstat: use our o...
1853
  {
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1854
1855
1856
  	int cpu;
  
  	for_each_possible_cpu(cpu)
ccde8bd40   Michal Hocko   vmstat: make vmst...
1857
  		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1858
  			vmstat_update);
7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1859
1860
  	schedule_delayed_work(&shepherd,
  		round_jiffies_relative(sysctl_stat_interval));
d1187ed21   Christoph Lameter   vmstat: use our o...
1861
  }
03e86dba5   Tim Chen   cpu: fix node sta...
1862
1863
  static void __init init_cpu_node_state(void)
  {
4c501327b   Sebastian Andrzej Siewior   mm/vmstat: Avoid ...
1864
  	int node;
03e86dba5   Tim Chen   cpu: fix node sta...
1865

4c501327b   Sebastian Andrzej Siewior   mm/vmstat: Avoid ...
1866
1867
1868
1869
  	for_each_online_node(node) {
  		if (cpumask_weight(cpumask_of_node(node)) > 0)
  			node_set_state(node, N_CPU);
  	}
03e86dba5   Tim Chen   cpu: fix node sta...
1870
  }
5438da977   Sebastian Andrzej Siewior   mm/vmstat: Conver...
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
  static int vmstat_cpu_online(unsigned int cpu)
  {
  	refresh_zone_stat_thresholds();
  	node_set_state(cpu_to_node(cpu), N_CPU);
  	return 0;
  }
  
  static int vmstat_cpu_down_prep(unsigned int cpu)
  {
  	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
  	return 0;
  }
  
  static int vmstat_cpu_dead(unsigned int cpu)
807a1bd2b   Toshi Kani   mm: clear N_CPU f...
1885
  {
4c501327b   Sebastian Andrzej Siewior   mm/vmstat: Avoid ...
1886
  	const struct cpumask *node_cpus;
5438da977   Sebastian Andrzej Siewior   mm/vmstat: Conver...
1887
  	int node;
807a1bd2b   Toshi Kani   mm: clear N_CPU f...
1888

5438da977   Sebastian Andrzej Siewior   mm/vmstat: Conver...
1889
1890
1891
  	node = cpu_to_node(cpu);
  
  	refresh_zone_stat_thresholds();
4c501327b   Sebastian Andrzej Siewior   mm/vmstat: Avoid ...
1892
1893
  	node_cpus = cpumask_of_node(node);
  	if (cpumask_weight(node_cpus) > 0)
5438da977   Sebastian Andrzej Siewior   mm/vmstat: Conver...
1894
  		return 0;
807a1bd2b   Toshi Kani   mm: clear N_CPU f...
1895
1896
  
  	node_clear_state(node, N_CPU);
5438da977   Sebastian Andrzej Siewior   mm/vmstat: Conver...
1897
  	return 0;
807a1bd2b   Toshi Kani   mm: clear N_CPU f...
1898
  }
8f32f7e5a   Alexey Dobriyan   proc: move /proc/...
1899
  #endif
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
1900

ce612879d   Michal Hocko   mm: move pcp and ...
1901
  struct workqueue_struct *mm_percpu_wq;
597b7305d   Michal Hocko   mm: move mm_percp...
1902
  void __init init_mm_internals(void)
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
1903
  {
ce612879d   Michal Hocko   mm: move pcp and ...
1904
  	int ret __maybe_unused;
5438da977   Sebastian Andrzej Siewior   mm/vmstat: Conver...
1905

80d136e13   Michal Hocko   mm: make mm_percp...
1906
  	mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
ce612879d   Michal Hocko   mm: move pcp and ...
1907
1908
  
  #ifdef CONFIG_SMP
5438da977   Sebastian Andrzej Siewior   mm/vmstat: Conver...
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
  	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
  					NULL, vmstat_cpu_dead);
  	if (ret < 0)
  		pr_err("vmstat: failed to register 'dead' hotplug state
  ");
  
  	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
  					vmstat_cpu_online,
  					vmstat_cpu_down_prep);
  	if (ret < 0)
  		pr_err("vmstat: failed to register 'online' hotplug state
  ");
  
  	get_online_cpus();
03e86dba5   Tim Chen   cpu: fix node sta...
1923
  	init_cpu_node_state();
5438da977   Sebastian Andrzej Siewior   mm/vmstat: Conver...
1924
  	put_online_cpus();
d1187ed21   Christoph Lameter   vmstat: use our o...
1925

7cc36bbdd   Christoph Lameter   vmstat: on-demand...
1926
  	start_shepherd_timer();
8f32f7e5a   Alexey Dobriyan   proc: move /proc/...
1927
1928
  #endif
  #ifdef CONFIG_PROC_FS
fddda2b7b   Christoph Hellwig   proc: introduce p...
1929
1930
1931
1932
  	proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
  	proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op);
  	proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
  	proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
8f32f7e5a   Alexey Dobriyan   proc: move /proc/...
1933
  #endif
df9ecaba3   Christoph Lameter   [PATCH] ZVC: Scal...
1934
  }
d7a5752c0   Mel Gorman   mm: export unusab...
1935
1936
  
  #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
d7a5752c0   Mel Gorman   mm: export unusab...
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
  
  /*
   * Return an index indicating how much of the available free memory is
   * unusable for an allocation of the requested size.
   */
  static int unusable_free_index(unsigned int order,
  				struct contig_page_info *info)
  {
  	/* No free memory is interpreted as all free memory is unusable */
  	if (info->free_pages == 0)
  		return 1000;
  
  	/*
  	 * Index should be a value between 0 and 1. Return a value to 3
  	 * decimal places.
  	 *
  	 * 0 => no fragmentation
  	 * 1 => high fragmentation
  	 */
  	return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
  
  }
  
  static void unusable_show_print(struct seq_file *m,
  					pg_data_t *pgdat, struct zone *zone)
  {
  	unsigned int order;
  	int index;
  	struct contig_page_info info;
  
  	seq_printf(m, "Node %d, zone %8s ",
  				pgdat->node_id,
  				zone->name);
  	for (order = 0; order < MAX_ORDER; ++order) {
  		fill_contig_page_info(zone, order, &info);
  		index = unusable_free_index(order, &info);
  		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
  	}
  
  	seq_putc(m, '
  ');
  }
  
  /*
   * Display unusable free space index
   *
   * The unusable free space index measures how much of the available free
   * memory cannot be used to satisfy an allocation of a given size and is a
   * value between 0 and 1. The higher the value, the more of free memory is
   * unusable and by implication, the worse the external fragmentation is. This
   * can be expressed as a percentage by multiplying by 100.
   */
  static int unusable_show(struct seq_file *m, void *arg)
  {
  	pg_data_t *pgdat = (pg_data_t *)arg;
  
  	/* check memoryless node */
a47b53c5f   Lai Jiangshan   vmstat: use N_MEM...
1994
  	if (!node_state(pgdat->node_id, N_MEMORY))
d7a5752c0   Mel Gorman   mm: export unusab...
1995
  		return 0;
727c080f0   Vinayak Menon   mm: avoid taking ...
1996
  	walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
d7a5752c0   Mel Gorman   mm: export unusab...
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
  
  	return 0;
  }
  
  static const struct seq_operations unusable_op = {
  	.start	= frag_start,
  	.next	= frag_next,
  	.stop	= frag_stop,
  	.show	= unusable_show,
  };
  
  static int unusable_open(struct inode *inode, struct file *file)
  {
  	return seq_open(file, &unusable_op);
  }
  
  static const struct file_operations unusable_file_ops = {
  	.open		= unusable_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= seq_release,
  };
f1a5ab121   Mel Gorman   mm: export fragme...
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
  static void extfrag_show_print(struct seq_file *m,
  					pg_data_t *pgdat, struct zone *zone)
  {
  	unsigned int order;
  	int index;
  
  	/* Alloc on stack as interrupts are disabled for zone walk */
  	struct contig_page_info info;
  
  	seq_printf(m, "Node %d, zone %8s ",
  				pgdat->node_id,
  				zone->name);
  	for (order = 0; order < MAX_ORDER; ++order) {
  		fill_contig_page_info(zone, order, &info);
56de7263f   Mel Gorman   mm: compaction: d...
2033
  		index = __fragmentation_index(order, &info);
f1a5ab121   Mel Gorman   mm: export fragme...
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
  		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
  	}
  
  	seq_putc(m, '
  ');
  }
  
  /*
   * Display fragmentation index for orders that allocations would fail for
   */
  static int extfrag_show(struct seq_file *m, void *arg)
  {
  	pg_data_t *pgdat = (pg_data_t *)arg;
727c080f0   Vinayak Menon   mm: avoid taking ...
2047
  	walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
f1a5ab121   Mel Gorman   mm: export fragme...
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
  
  	return 0;
  }
  
  static const struct seq_operations extfrag_op = {
  	.start	= frag_start,
  	.next	= frag_next,
  	.stop	= frag_stop,
  	.show	= extfrag_show,
  };
  
  static int extfrag_open(struct inode *inode, struct file *file)
  {
  	return seq_open(file, &extfrag_op);
  }
  
  static const struct file_operations extfrag_file_ops = {
  	.open		= extfrag_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= seq_release,
  };
d7a5752c0   Mel Gorman   mm: export unusab...
2070
2071
  static int __init extfrag_debug_init(void)
  {
bde8bd8a1   Sasikantha babu   mm/vmstat.c: remo...
2072
  	struct dentry *extfrag_debug_root;
d7a5752c0   Mel Gorman   mm: export unusab...
2073
2074
2075
2076
2077
2078
  	extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
  	if (!extfrag_debug_root)
  		return -ENOMEM;
  
  	if (!debugfs_create_file("unusable_index", 0444,
  			extfrag_debug_root, NULL, &unusable_file_ops))
bde8bd8a1   Sasikantha babu   mm/vmstat.c: remo...
2079
  		goto fail;
d7a5752c0   Mel Gorman   mm: export unusab...
2080

f1a5ab121   Mel Gorman   mm: export fragme...
2081
2082
  	if (!debugfs_create_file("extfrag_index", 0444,
  			extfrag_debug_root, NULL, &extfrag_file_ops))
bde8bd8a1   Sasikantha babu   mm/vmstat.c: remo...
2083
  		goto fail;
f1a5ab121   Mel Gorman   mm: export fragme...
2084

d7a5752c0   Mel Gorman   mm: export unusab...
2085
  	return 0;
bde8bd8a1   Sasikantha babu   mm/vmstat.c: remo...
2086
2087
2088
  fail:
  	debugfs_remove_recursive(extfrag_debug_root);
  	return -ENOMEM;
d7a5752c0   Mel Gorman   mm: export unusab...
2089
2090
2091
2092
  }
  
  module_init(extfrag_debug_init);
  #endif