Blame view

mm/page_cgroup.c 10 KB
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1
2
3
4
5
6
  #include <linux/mm.h>
  #include <linux/mmzone.h>
  #include <linux/bootmem.h>
  #include <linux/bit_spinlock.h>
  #include <linux/page_cgroup.h>
  #include <linux/hash.h>
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
7
  #include <linux/slab.h>
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
8
  #include <linux/memory.h>
4c8210427   Paul Mundt   mm: page_cgroup n...
9
  #include <linux/vmalloc.h>
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
10
  #include <linux/cgroup.h>
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
11
  #include <linux/swapops.h>
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
12
13
14
15
16
17
18
  
  static void __meminit
  __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
  {
  	pc->flags = 0;
  	pc->mem_cgroup = NULL;
  	pc->page = pfn_to_page(pfn);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
19
  	INIT_LIST_HEAD(&pc->lru);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
20
21
22
23
  }
  static unsigned long total_usage;
  
  #if !defined(CONFIG_SPARSEMEM)
31168481c   Al Viro   meminit section w...
24
  void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  {
  	pgdat->node_page_cgroup = NULL;
  }
  
  struct page_cgroup *lookup_page_cgroup(struct page *page)
  {
  	unsigned long pfn = page_to_pfn(page);
  	unsigned long offset;
  	struct page_cgroup *base;
  
  	base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
  	if (unlikely(!base))
  		return NULL;
  
  	offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
  	return base + offset;
  }
  
  static int __init alloc_node_page_cgroup(int nid)
  {
  	struct page_cgroup *base, *pc;
  	unsigned long table_size;
  	unsigned long start_pfn, nr_pages, index;
  
  	start_pfn = NODE_DATA(nid)->node_start_pfn;
  	nr_pages = NODE_DATA(nid)->node_spanned_pages;
653d22c0f   KAMEZAWA Hiroyuki   page_cgroup shoul...
51
52
  	if (!nr_pages)
  		return 0;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
53
  	table_size = sizeof(struct page_cgroup) * nr_pages;
ca371c0d7   KAMEZAWA Hiroyuki   memcg: fix page_c...
54
55
56
57
  
  	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
  			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
  	if (!base)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
58
59
60
61
62
63
64
65
66
  		return -ENOMEM;
  	for (index = 0; index < nr_pages; index++) {
  		pc = base + index;
  		__init_page_cgroup(pc, start_pfn + index);
  	}
  	NODE_DATA(nid)->node_page_cgroup = base;
  	total_usage += table_size;
  	return 0;
  }
ca371c0d7   KAMEZAWA Hiroyuki   memcg: fix page_c...
67
  void __init page_cgroup_init_flatmem(void)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
68
69
70
  {
  
  	int nid, fail;
f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
71
  	if (mem_cgroup_disabled())
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
72
  		return;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
73
74
75
76
77
78
79
  	for_each_online_node(nid)  {
  		fail = alloc_node_page_cgroup(nid);
  		if (fail)
  			goto fail;
  	}
  	printk(KERN_INFO "allocated %ld bytes of page_cgroup
  ", total_usage);
8ca739e36   Randy Dunlap   cgroups: make mes...
80
81
82
  	printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
  	" don't want memory cgroups
  ");
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
83
84
  	return;
  fail:
8ca739e36   Randy Dunlap   cgroups: make mes...
85
86
87
88
  	printk(KERN_CRIT "allocation of page_cgroup failed.
  ");
  	printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option
  ");
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
89
90
91
92
93
94
95
96
97
  	panic("Out of memory");
  }
  
  #else /* CONFIG_FLAT_NODE_MEM_MAP */
  
  struct page_cgroup *lookup_page_cgroup(struct page *page)
  {
  	unsigned long pfn = page_to_pfn(page);
  	struct mem_section *section = __pfn_to_section(pfn);
d69b042f3   Balbir Singh   memcg: add file-b...
98
99
  	if (!section->page_cgroup)
  		return NULL;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
100
101
  	return section->page_cgroup + pfn;
  }
31168481c   Al Viro   meminit section w...
102
  /* __alloc_bootmem...() is protected by !slab_available() */
feb166948   KOSAKI Motohiro   mm: make init_sec...
103
  static int __init_refok init_section_page_cgroup(unsigned long pfn)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
104
  {
0753b0ef3   Fernando Luis Vazquez Cao   memcg: do not rec...
105
  	struct mem_section *section = __pfn_to_section(pfn);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
106
107
108
  	struct page_cgroup *base, *pc;
  	unsigned long table_size;
  	int nid, index;
dc19f9db3   KAMEZAWA Hiroyuki   memcg: memory hot...
109
110
111
  	if (!section->page_cgroup) {
  		nid = page_to_nid(pfn_to_page(pfn));
  		table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
ca371c0d7   KAMEZAWA Hiroyuki   memcg: fix page_c...
112
113
114
115
116
  		VM_BUG_ON(!slab_is_available());
  		base = kmalloc_node(table_size,
  				GFP_KERNEL | __GFP_NOWARN, nid);
  		if (!base)
  			base = vmalloc_node(table_size, nid);
dc19f9db3   KAMEZAWA Hiroyuki   memcg: memory hot...
117
118
119
120
121
122
123
124
125
126
127
  	} else {
  		/*
   		 * We don't have to allocate page_cgroup again, but
  		 * address of memmap may be changed. So, we have to initialize
  		 * again.
  		 */
  		base = section->page_cgroup + pfn;
  		table_size = 0;
  		/* check address of memmap is changed or not. */
  		if (base->page == pfn_to_page(pfn))
  			return 0;
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
128
  	}
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
129
130
131
132
133
134
135
136
137
138
139
  
  	if (!base) {
  		printk(KERN_ERR "page cgroup allocation failure
  ");
  		return -ENOMEM;
  	}
  
  	for (index = 0; index < PAGES_PER_SECTION; index++) {
  		pc = base + index;
  		__init_page_cgroup(pc, pfn + index);
  	}
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  	section->page_cgroup = base - pfn;
  	total_usage += table_size;
  	return 0;
  }
  #ifdef CONFIG_MEMORY_HOTPLUG
  void __free_page_cgroup(unsigned long pfn)
  {
  	struct mem_section *ms;
  	struct page_cgroup *base;
  
  	ms = __pfn_to_section(pfn);
  	if (!ms || !ms->page_cgroup)
  		return;
  	base = ms->page_cgroup + pfn;
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
154
  	if (is_vmalloc_addr(base)) {
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
155
  		vfree(base);
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
156
157
158
159
160
161
162
163
  		ms->page_cgroup = NULL;
  	} else {
  		struct page *page = virt_to_page(base);
  		if (!PageReserved(page)) { /* Is bootmem ? */
  			kfree(base);
  			ms->page_cgroup = NULL;
  		}
  	}
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
164
  }
31168481c   Al Viro   meminit section w...
165
  int __meminit online_page_cgroup(unsigned long start_pfn,
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
166
167
168
169
170
  			unsigned long nr_pages,
  			int nid)
  {
  	unsigned long start, end, pfn;
  	int fail = 0;
33c5d3d64   KAMEZAWA Hiroyuki   memcg: bugfix for...
171
  	start = start_pfn & ~(PAGES_PER_SECTION - 1);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
  	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
  
  	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
  		if (!pfn_present(pfn))
  			continue;
  		fail = init_section_page_cgroup(pfn);
  	}
  	if (!fail)
  		return 0;
  
  	/* rollback */
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
  		__free_page_cgroup(pfn);
  
  	return -ENOMEM;
  }
31168481c   Al Viro   meminit section w...
188
  int __meminit offline_page_cgroup(unsigned long start_pfn,
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
189
190
191
  		unsigned long nr_pages, int nid)
  {
  	unsigned long start, end, pfn;
33c5d3d64   KAMEZAWA Hiroyuki   memcg: bugfix for...
192
  	start = start_pfn & ~(PAGES_PER_SECTION - 1);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
193
194
195
196
197
198
199
  	end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
  
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
  		__free_page_cgroup(pfn);
  	return 0;
  
  }
31168481c   Al Viro   meminit section w...
200
  static int __meminit page_cgroup_callback(struct notifier_block *self,
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
201
202
203
204
205
206
207
208
209
  			       unsigned long action, void *arg)
  {
  	struct memory_notify *mn = arg;
  	int ret = 0;
  	switch (action) {
  	case MEM_GOING_ONLINE:
  		ret = online_page_cgroup(mn->start_pfn,
  				   mn->nr_pages, mn->status_change_nid);
  		break;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
210
211
212
213
  	case MEM_OFFLINE:
  		offline_page_cgroup(mn->start_pfn,
  				mn->nr_pages, mn->status_change_nid);
  		break;
dc19f9db3   KAMEZAWA Hiroyuki   memcg: memory hot...
214
  	case MEM_CANCEL_ONLINE:
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
215
216
217
218
219
220
  	case MEM_GOING_OFFLINE:
  		break;
  	case MEM_ONLINE:
  	case MEM_CANCEL_OFFLINE:
  		break;
  	}
dc19f9db3   KAMEZAWA Hiroyuki   memcg: memory hot...
221
222
223
224
225
  
  	if (ret)
  		ret = notifier_from_errno(ret);
  	else
  		ret = NOTIFY_OK;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
226
227
228
229
230
231
232
233
234
  	return ret;
  }
  
  #endif
  
  void __init page_cgroup_init(void)
  {
  	unsigned long pfn;
  	int fail = 0;
f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
235
  	if (mem_cgroup_disabled())
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
236
  		return;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
237
238
239
240
241
242
  	for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
  		if (!pfn_present(pfn))
  			continue;
  		fail = init_section_page_cgroup(pfn);
  	}
  	if (fail) {
8ca739e36   Randy Dunlap   cgroups: make mes...
243
244
  		printk(KERN_CRIT "try 'cgroup_disable=memory' boot option
  ");
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
245
246
247
248
249
250
  		panic("Out of memory");
  	} else {
  		hotplug_memory_notifier(page_cgroup_callback, 0);
  	}
  	printk(KERN_INFO "allocated %ld bytes of page_cgroup
  ", total_usage);
8ca739e36   Randy Dunlap   cgroups: make mes...
251
252
253
  	printk(KERN_INFO "please try 'cgroup_disable=memory' option if you don't"
  	" want memory cgroups
  ");
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
254
  }
31168481c   Al Viro   meminit section w...
255
  void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
256
257
258
259
260
  {
  	return;
  }
  
  #endif
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
261
262
263
264
265
266
267
268
269
270
271
  
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  
  static DEFINE_MUTEX(swap_cgroup_mutex);
  struct swap_cgroup_ctrl {
  	struct page **map;
  	unsigned long length;
  };
  
  struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
272
  struct swap_cgroup {
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
273
  	unsigned short		id;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
  };
  #define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
  #define SC_POS_MASK	(SC_PER_PAGE - 1)
  
  /*
   * SwapCgroup implements "lookup" and "exchange" operations.
   * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
   * against SwapCache. At swap_free(), this is accessed directly from swap.
   *
   * This means,
   *  - we have no race in "exchange" when we're accessed via SwapCache because
   *    SwapCache(and its swp_entry) is under lock.
   *  - When called via swap_free(), there is no user of this entry and no race.
   * Then, we don't need lock around "exchange".
   *
   * TODO: we can push these buffers out to HIGHMEM.
   */
  
  /*
   * allocate buffer for swap_cgroup.
   */
  static int swap_cgroup_prepare(int type)
  {
  	struct page *page;
  	struct swap_cgroup_ctrl *ctrl;
  	unsigned long idx, max;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
  	ctrl = &swap_cgroup_ctrl[type];
  
  	for (idx = 0; idx < ctrl->length; idx++) {
  		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  		if (!page)
  			goto not_enough_page;
  		ctrl->map[idx] = page;
  	}
  	return 0;
  not_enough_page:
  	max = idx;
  	for (idx = 0; idx < max; idx++)
  		__free_page(ctrl->map[idx]);
  
  	return -ENOMEM;
  }
  
  /**
   * swap_cgroup_record - record mem_cgroup for this swp_entry.
   * @ent: swap entry to be recorded into
   * @mem: mem_cgroup to be recorded
   *
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
322
323
   * Returns old value at success, 0 at failure.
   * (Of course, old value can be 0.)
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
324
   */
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
325
  unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
326
327
328
329
330
331
332
333
  {
  	int type = swp_type(ent);
  	unsigned long offset = swp_offset(ent);
  	unsigned long idx = offset / SC_PER_PAGE;
  	unsigned long pos = offset & SC_POS_MASK;
  	struct swap_cgroup_ctrl *ctrl;
  	struct page *mappage;
  	struct swap_cgroup *sc;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
334
  	unsigned short old;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
335

27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
336
337
338
339
340
  	ctrl = &swap_cgroup_ctrl[type];
  
  	mappage = ctrl->map[idx];
  	sc = page_address(mappage);
  	sc += pos;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
341
342
  	old = sc->id;
  	sc->id = id;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
343
344
345
346
347
348
349
350
  
  	return old;
  }
  
  /**
   * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
   * @ent: swap entry to be looked up.
   *
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
351
   * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
352
   */
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
353
  unsigned short lookup_swap_cgroup(swp_entry_t ent)
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
354
355
356
357
358
359
360
361
  {
  	int type = swp_type(ent);
  	unsigned long offset = swp_offset(ent);
  	unsigned long idx = offset / SC_PER_PAGE;
  	unsigned long pos = offset & SC_POS_MASK;
  	struct swap_cgroup_ctrl *ctrl;
  	struct page *mappage;
  	struct swap_cgroup *sc;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
362
  	unsigned short ret;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
363

27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
364
365
366
367
  	ctrl = &swap_cgroup_ctrl[type];
  	mappage = ctrl->map[idx];
  	sc = page_address(mappage);
  	sc += pos;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
368
  	ret = sc->id;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
  	return ret;
  }
  
  int swap_cgroup_swapon(int type, unsigned long max_pages)
  {
  	void *array;
  	unsigned long array_size;
  	unsigned long length;
  	struct swap_cgroup_ctrl *ctrl;
  
  	if (!do_swap_account)
  		return 0;
  
  	length = ((max_pages/SC_PER_PAGE) + 1);
  	array_size = length * sizeof(void *);
  
  	array = vmalloc(array_size);
  	if (!array)
  		goto nomem;
  
  	memset(array, 0, array_size);
  	ctrl = &swap_cgroup_ctrl[type];
  	mutex_lock(&swap_cgroup_mutex);
  	ctrl->length = length;
  	ctrl->map = array;
  	if (swap_cgroup_prepare(type)) {
  		/* memory shortage */
  		ctrl->map = NULL;
  		ctrl->length = 0;
  		vfree(array);
  		mutex_unlock(&swap_cgroup_mutex);
  		goto nomem;
  	}
  	mutex_unlock(&swap_cgroup_mutex);
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
  	return 0;
  nomem:
  	printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.
  ");
  	printk(KERN_INFO
  		"swap_cgroup can be disabled by noswapaccount boot option
  ");
  	return -ENOMEM;
  }
  
  void swap_cgroup_swapoff(int type)
  {
  	int i;
  	struct swap_cgroup_ctrl *ctrl;
  
  	if (!do_swap_account)
  		return;
  
  	mutex_lock(&swap_cgroup_mutex);
  	ctrl = &swap_cgroup_ctrl[type];
  	if (ctrl->map) {
  		for (i = 0; i < ctrl->length; i++) {
  			struct page *page = ctrl->map[i];
  			if (page)
  				__free_page(page);
  		}
  		vfree(ctrl->map);
  		ctrl->map = NULL;
  		ctrl->length = 0;
  	}
  	mutex_unlock(&swap_cgroup_mutex);
  }
  
  #endif