Blame view

mm/page_idle.c 5.46 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
33c3fc71c   Vladimir Davydov   mm: introduce idl...
2
  #include <linux/init.h>
57c8a661d   Mike Rapoport   mm: remove includ...
3
  #include <linux/memblock.h>
33c3fc71c   Vladimir Davydov   mm: introduce idl...
4
5
6
  #include <linux/fs.h>
  #include <linux/sysfs.h>
  #include <linux/kobject.h>
92fb1db26   SeongJae Park   mm/page_idle.c: s...
7
  #include <linux/memory_hotplug.h>
33c3fc71c   Vladimir Davydov   mm: introduce idl...
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
  #include <linux/mm.h>
  #include <linux/mmzone.h>
  #include <linux/pagemap.h>
  #include <linux/rmap.h>
  #include <linux/mmu_notifier.h>
  #include <linux/page_ext.h>
  #include <linux/page_idle.h>
  
  #define BITMAP_CHUNK_SIZE	sizeof(u64)
  #define BITMAP_CHUNK_BITS	(BITMAP_CHUNK_SIZE * BITS_PER_BYTE)
  
  /*
   * Idle page tracking only considers user memory pages, for other types of
   * pages the idle flag is always unset and an attempt to set it is silently
   * ignored.
   *
   * We treat a page as a user memory page if it is on an LRU list, because it is
   * always safe to pass such a page to rmap_walk(), which is essential for idle
   * page tracking. With such an indicator of user pages we can skip isolated
   * pages, but since there are not usually many of them, it will hardly affect
   * the overall result.
   *
   * This function tries to get a user memory page by pfn as described above.
   */
  static struct page *page_idle_get_page(unsigned long pfn)
  {
92fb1db26   SeongJae Park   mm/page_idle.c: s...
34
  	struct page *page = pfn_to_online_page(pfn);
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
35
  	pg_data_t *pgdat;
33c3fc71c   Vladimir Davydov   mm: introduce idl...
36

33c3fc71c   Vladimir Davydov   mm: introduce idl...
37
38
39
  	if (!page || !PageLRU(page) ||
  	    !get_page_unless_zero(page))
  		return NULL;
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
40
41
  	pgdat = page_pgdat(page);
  	spin_lock_irq(&pgdat->lru_lock);
33c3fc71c   Vladimir Davydov   mm: introduce idl...
42
43
44
45
  	if (unlikely(!PageLRU(page))) {
  		put_page(page);
  		page = NULL;
  	}
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
46
  	spin_unlock_irq(&pgdat->lru_lock);
33c3fc71c   Vladimir Davydov   mm: introduce idl...
47
48
  	return page;
  }
e4b822227   Minchan Kim   mm: make rmap_one...
49
  static bool page_idle_clear_pte_refs_one(struct page *page,
33c3fc71c   Vladimir Davydov   mm: introduce idl...
50
51
52
  					struct vm_area_struct *vma,
  					unsigned long addr, void *arg)
  {
699fa2168   Kirill A. Shutemov   mm: fix handling ...
53
54
55
56
57
  	struct page_vma_mapped_walk pvmw = {
  		.page = page,
  		.vma = vma,
  		.address = addr,
  	};
33c3fc71c   Vladimir Davydov   mm: introduce idl...
58
  	bool referenced = false;
699fa2168   Kirill A. Shutemov   mm: fix handling ...
59
60
61
  	while (page_vma_mapped_walk(&pvmw)) {
  		addr = pvmw.address;
  		if (pvmw.pte) {
f0849ac0b   Yang Shi   mm: thp: fix pote...
62
63
64
65
66
67
  			/*
  			 * For PTE-mapped THP, one sub page is referenced,
  			 * the whole THP is referenced.
  			 */
  			if (ptep_clear_young_notify(vma, addr, pvmw.pte))
  				referenced = true;
699fa2168   Kirill A. Shutemov   mm: fix handling ...
68
  		} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
f0849ac0b   Yang Shi   mm: thp: fix pote...
69
70
  			if (pmdp_clear_young_notify(vma, addr, pvmw.pmd))
  				referenced = true;
699fa2168   Kirill A. Shutemov   mm: fix handling ...
71
72
73
74
  		} else {
  			/* unexpected pmd-mapped page? */
  			WARN_ON_ONCE(1);
  		}
33c3fc71c   Vladimir Davydov   mm: introduce idl...
75
  	}
b20ce5e03   Kirill A. Shutemov   mm: prepare page_...
76

33c3fc71c   Vladimir Davydov   mm: introduce idl...
77
78
79
80
81
82
83
84
85
  	if (referenced) {
  		clear_page_idle(page);
  		/*
  		 * We cleared the referenced bit in a mapping to this page. To
  		 * avoid interference with page reclaim, mark it young so that
  		 * page_referenced() will return > 0.
  		 */
  		set_page_young(page);
  	}
e4b822227   Minchan Kim   mm: make rmap_one...
86
  	return true;
33c3fc71c   Vladimir Davydov   mm: introduce idl...
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  }
  
  static void page_idle_clear_pte_refs(struct page *page)
  {
  	/*
  	 * Since rwc.arg is unused, rwc is effectively immutable, so we
  	 * can make it static const to save some cycles and stack.
  	 */
  	static const struct rmap_walk_control rwc = {
  		.rmap_one = page_idle_clear_pte_refs_one,
  		.anon_lock = page_lock_anon_vma_read,
  	};
  	bool need_lock;
  
  	if (!page_mapped(page) ||
  	    !page_rmapping(page))
  		return;
  
  	need_lock = !PageAnon(page) || PageKsm(page);
  	if (need_lock && !trylock_page(page))
  		return;
  
  	rmap_walk(page, (struct rmap_walk_control *)&rwc);
  
  	if (need_lock)
  		unlock_page(page);
  }
  
  static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj,
  				     struct bin_attribute *attr, char *buf,
  				     loff_t pos, size_t count)
  {
  	u64 *out = (u64 *)buf;
  	struct page *page;
  	unsigned long pfn, end_pfn;
  	int bit;
  
  	if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE)
  		return -EINVAL;
  
  	pfn = pos * BITS_PER_BYTE;
  	if (pfn >= max_pfn)
  		return 0;
  
  	end_pfn = pfn + count * BITS_PER_BYTE;
  	if (end_pfn > max_pfn)
7298e3b0a   Colin Ian King   mm/page_idle.c: f...
133
  		end_pfn = max_pfn;
33c3fc71c   Vladimir Davydov   mm: introduce idl...
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
  
  	for (; pfn < end_pfn; pfn++) {
  		bit = pfn % BITMAP_CHUNK_BITS;
  		if (!bit)
  			*out = 0ULL;
  		page = page_idle_get_page(pfn);
  		if (page) {
  			if (page_is_idle(page)) {
  				/*
  				 * The page might have been referenced via a
  				 * pte, in which case it is not idle. Clear
  				 * refs and recheck.
  				 */
  				page_idle_clear_pte_refs(page);
  				if (page_is_idle(page))
  					*out |= 1ULL << bit;
  			}
  			put_page(page);
  		}
  		if (bit == BITMAP_CHUNK_BITS - 1)
  			out++;
  		cond_resched();
  	}
  	return (char *)out - buf;
  }
  
  static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj,
  				      struct bin_attribute *attr, char *buf,
  				      loff_t pos, size_t count)
  {
  	const u64 *in = (u64 *)buf;
  	struct page *page;
  	unsigned long pfn, end_pfn;
  	int bit;
  
  	if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE)
  		return -EINVAL;
  
  	pfn = pos * BITS_PER_BYTE;
  	if (pfn >= max_pfn)
  		return -ENXIO;
  
  	end_pfn = pfn + count * BITS_PER_BYTE;
  	if (end_pfn > max_pfn)
7298e3b0a   Colin Ian King   mm/page_idle.c: f...
178
  		end_pfn = max_pfn;
33c3fc71c   Vladimir Davydov   mm: introduce idl...
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
  
  	for (; pfn < end_pfn; pfn++) {
  		bit = pfn % BITMAP_CHUNK_BITS;
  		if ((*in >> bit) & 1) {
  			page = page_idle_get_page(pfn);
  			if (page) {
  				page_idle_clear_pte_refs(page);
  				set_page_idle(page);
  				put_page(page);
  			}
  		}
  		if (bit == BITMAP_CHUNK_BITS - 1)
  			in++;
  		cond_resched();
  	}
  	return (char *)in - buf;
  }
  
  static struct bin_attribute page_idle_bitmap_attr =
0825a6f98   Joe Perches   mm: use octal not...
198
  		__BIN_ATTR(bitmap, 0600,
33c3fc71c   Vladimir Davydov   mm: introduce idl...
199
200
201
202
203
204
  			   page_idle_bitmap_read, page_idle_bitmap_write, 0);
  
  static struct bin_attribute *page_idle_bin_attrs[] = {
  	&page_idle_bitmap_attr,
  	NULL,
  };
fd147cbb6   Arvind Yadav   mm/page_idle.c: c...
205
  static const struct attribute_group page_idle_attr_group = {
33c3fc71c   Vladimir Davydov   mm: introduce idl...
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
  	.bin_attrs = page_idle_bin_attrs,
  	.name = "page_idle",
  };
  
  #ifndef CONFIG_64BIT
  static bool need_page_idle(void)
  {
  	return true;
  }
  struct page_ext_operations page_idle_ops = {
  	.need = need_page_idle,
  };
  #endif
  
  static int __init page_idle_init(void)
  {
  	int err;
  
  	err = sysfs_create_group(mm_kobj, &page_idle_attr_group);
  	if (err) {
  		pr_err("page_idle: register sysfs failed
  ");
  		return err;
  	}
  	return 0;
  }
  subsys_initcall(page_idle_init);