Blame view

mm/page_cgroup.c 12.9 KB
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1
2
3
4
5
6
  #include <linux/mm.h>
  #include <linux/mmzone.h>
  #include <linux/bootmem.h>
  #include <linux/bit_spinlock.h>
  #include <linux/page_cgroup.h>
  #include <linux/hash.h>
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
7
  #include <linux/slab.h>
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
8
  #include <linux/memory.h>
4c8210427   Paul Mundt   mm: page_cgroup n...
9
  #include <linux/vmalloc.h>
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
10
  #include <linux/cgroup.h>
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
11
  #include <linux/swapops.h>
7952f9881   Catalin Marinas   kmemleak: Annotat...
12
  #include <linux/kmemleak.h>
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
13

6b3ae58ef   Johannes Weiner   memcg: remove dir...
14
  static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
15
16
  {
  	pc->flags = 0;
6b3ae58ef   Johannes Weiner   memcg: remove dir...
17
  	set_page_cgroup_array_id(pc, id);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
18
  	pc->mem_cgroup = NULL;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
19
  	INIT_LIST_HEAD(&pc->lru);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
20
21
22
23
  }
  static unsigned long total_usage;
  
  #if !defined(CONFIG_SPARSEMEM)
31168481c   Al Viro   meminit section w...
24
  void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
  {
  	pgdat->node_page_cgroup = NULL;
  }
  
  struct page_cgroup *lookup_page_cgroup(struct page *page)
  {
  	unsigned long pfn = page_to_pfn(page);
  	unsigned long offset;
  	struct page_cgroup *base;
  
  	base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
  	if (unlikely(!base))
  		return NULL;
  
  	offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
  	return base + offset;
  }
6b3ae58ef   Johannes Weiner   memcg: remove dir...
42
43
44
45
46
47
48
49
50
51
52
53
  struct page *lookup_cgroup_page(struct page_cgroup *pc)
  {
  	unsigned long pfn;
  	struct page *page;
  	pg_data_t *pgdat;
  
  	pgdat = NODE_DATA(page_cgroup_array_id(pc));
  	pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
  	page = pfn_to_page(pfn);
  	VM_BUG_ON(pc != lookup_page_cgroup(page));
  	return page;
  }
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
54
55
56
57
58
59
60
61
  static int __init alloc_node_page_cgroup(int nid)
  {
  	struct page_cgroup *base, *pc;
  	unsigned long table_size;
  	unsigned long start_pfn, nr_pages, index;
  
  	start_pfn = NODE_DATA(nid)->node_start_pfn;
  	nr_pages = NODE_DATA(nid)->node_spanned_pages;
653d22c0f   KAMEZAWA Hiroyuki   page_cgroup shoul...
62
63
  	if (!nr_pages)
  		return 0;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
64
  	table_size = sizeof(struct page_cgroup) * nr_pages;
ca371c0d7   KAMEZAWA Hiroyuki   memcg: fix page_c...
65
66
67
68
  
  	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
  			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
  	if (!base)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
69
70
71
  		return -ENOMEM;
  	for (index = 0; index < nr_pages; index++) {
  		pc = base + index;
6b3ae58ef   Johannes Weiner   memcg: remove dir...
72
  		init_page_cgroup(pc, nid);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
73
74
75
76
77
  	}
  	NODE_DATA(nid)->node_page_cgroup = base;
  	total_usage += table_size;
  	return 0;
  }
ca371c0d7   KAMEZAWA Hiroyuki   memcg: fix page_c...
78
  void __init page_cgroup_init_flatmem(void)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
79
80
81
  {
  
  	int nid, fail;
f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
82
  	if (mem_cgroup_disabled())
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
83
  		return;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
84
85
86
87
88
89
90
  	for_each_online_node(nid)  {
  		fail = alloc_node_page_cgroup(nid);
  		if (fail)
  			goto fail;
  	}
  	printk(KERN_INFO "allocated %ld bytes of page_cgroup
  ", total_usage);
8ca739e36   Randy Dunlap   cgroups: make mes...
91
92
93
  	printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
  	" don't want memory cgroups
  ");
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
94
95
  	return;
  fail:
8ca739e36   Randy Dunlap   cgroups: make mes...
96
97
98
99
  	printk(KERN_CRIT "allocation of page_cgroup failed.
  ");
  	printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option
  ");
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
100
101
102
103
104
105
106
107
108
  	panic("Out of memory");
  }
  
  #else /* CONFIG_FLAT_NODE_MEM_MAP */
  
  struct page_cgroup *lookup_page_cgroup(struct page *page)
  {
  	unsigned long pfn = page_to_pfn(page);
  	struct mem_section *section = __pfn_to_section(pfn);
d69b042f3   Balbir Singh   memcg: add file-b...
109
110
  	if (!section->page_cgroup)
  		return NULL;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
111
112
  	return section->page_cgroup + pfn;
  }
6b3ae58ef   Johannes Weiner   memcg: remove dir...
113
114
115
116
117
118
119
120
121
122
123
124
  struct page *lookup_cgroup_page(struct page_cgroup *pc)
  {
  	struct mem_section *section;
  	struct page *page;
  	unsigned long nr;
  
  	nr = page_cgroup_array_id(pc);
  	section = __nr_to_section(nr);
  	page = pfn_to_page(pc - section->page_cgroup);
  	VM_BUG_ON(pc != lookup_page_cgroup(page));
  	return page;
  }
268433b8e   Namhyung Kim   memcg: mark init_...
125
  static void *__meminit alloc_page_cgroup(size_t size, int nid)
dde79e005   Michal Hocko   page_cgroup: redu...
126
127
  {
  	void *addr = NULL;
21a3c9646   Andi Kleen   memcg: allocate m...
128
  	addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN);
dde79e005   Michal Hocko   page_cgroup: redu...
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
  	if (addr)
  		return addr;
  
  	if (node_state(nid, N_HIGH_MEMORY))
  		addr = vmalloc_node(size, nid);
  	else
  		addr = vmalloc(size);
  
  	return addr;
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
  static void free_page_cgroup(void *addr)
  {
  	if (is_vmalloc_addr(addr)) {
  		vfree(addr);
  	} else {
  		struct page *page = virt_to_page(addr);
6cfddb261   Michal Hocko   memcg: page_cgrou...
147
148
149
150
151
  		size_t table_size =
  			sizeof(struct page_cgroup) * PAGES_PER_SECTION;
  
  		BUG_ON(PageReserved(page));
  		free_pages_exact(addr, table_size);
dde79e005   Michal Hocko   page_cgroup: redu...
152
153
154
  	}
  }
  #endif
37573e8c7   KAMEZAWA Hiroyuki   memcg: fix init_p...
155
  static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
156
  {
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
157
  	struct page_cgroup *base, *pc;
6b3ae58ef   Johannes Weiner   memcg: remove dir...
158
  	struct mem_section *section;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
159
  	unsigned long table_size;
6b3ae58ef   Johannes Weiner   memcg: remove dir...
160
  	unsigned long nr;
37573e8c7   KAMEZAWA Hiroyuki   memcg: fix init_p...
161
  	int index;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
162

6b3ae58ef   Johannes Weiner   memcg: remove dir...
163
164
165
166
167
  	nr = pfn_to_section_nr(pfn);
  	section = __nr_to_section(nr);
  
  	if (section->page_cgroup)
  		return 0;
6b3ae58ef   Johannes Weiner   memcg: remove dir...
168
  	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
dde79e005   Michal Hocko   page_cgroup: redu...
169
  	base = alloc_page_cgroup(table_size, nid);
6b3ae58ef   Johannes Weiner   memcg: remove dir...
170
171
172
173
174
175
  	/*
  	 * The value stored in section->page_cgroup is (base - pfn)
  	 * and it does not point to the memory block allocated above,
  	 * causing kmemleak false positives.
  	 */
  	kmemleak_not_leak(base);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
176
177
178
179
180
181
182
183
184
  
  	if (!base) {
  		printk(KERN_ERR "page cgroup allocation failure
  ");
  		return -ENOMEM;
  	}
  
  	for (index = 0; index < PAGES_PER_SECTION; index++) {
  		pc = base + index;
6b3ae58ef   Johannes Weiner   memcg: remove dir...
185
  		init_page_cgroup(pc, nr);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
186
  	}
37573e8c7   KAMEZAWA Hiroyuki   memcg: fix init_p...
187
188
189
190
191
  	/*
  	 * The passed "pfn" may not be aligned to SECTION.  For the calculation
  	 * we need to apply a mask.
  	 */
  	pfn &= PAGE_SECTION_MASK;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
192
193
194
195
196
197
198
199
200
201
202
203
204
205
  	section->page_cgroup = base - pfn;
  	total_usage += table_size;
  	return 0;
  }
  #ifdef CONFIG_MEMORY_HOTPLUG
  void __free_page_cgroup(unsigned long pfn)
  {
  	struct mem_section *ms;
  	struct page_cgroup *base;
  
  	ms = __pfn_to_section(pfn);
  	if (!ms || !ms->page_cgroup)
  		return;
  	base = ms->page_cgroup + pfn;
dde79e005   Michal Hocko   page_cgroup: redu...
206
207
  	free_page_cgroup(base);
  	ms->page_cgroup = NULL;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
208
  }
31168481c   Al Viro   meminit section w...
209
  int __meminit online_page_cgroup(unsigned long start_pfn,
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
210
211
212
213
214
  			unsigned long nr_pages,
  			int nid)
  {
  	unsigned long start, end, pfn;
  	int fail = 0;
1bb36fbd4   Daniel Kiper   mm/page_cgroup.c:...
215
216
  	start = SECTION_ALIGN_DOWN(start_pfn);
  	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
217

37573e8c7   KAMEZAWA Hiroyuki   memcg: fix init_p...
218
219
220
221
222
223
224
225
226
  	if (nid == -1) {
  		/*
  		 * In this case, "nid" already exists and contains valid memory.
  		 * "start_pfn" passed to us is a pfn which is an arg for
  		 * online__pages(), and start_pfn should exist.
  		 */
  		nid = pfn_to_nid(start_pfn);
  		VM_BUG_ON(!node_state(nid, N_ONLINE));
  	}
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
227
228
229
  	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
  		if (!pfn_present(pfn))
  			continue;
37573e8c7   KAMEZAWA Hiroyuki   memcg: fix init_p...
230
  		fail = init_section_page_cgroup(pfn, nid);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
231
232
233
234
235
236
237
238
239
240
  	}
  	if (!fail)
  		return 0;
  
  	/* rollback */
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
  		__free_page_cgroup(pfn);
  
  	return -ENOMEM;
  }
31168481c   Al Viro   meminit section w...
241
  int __meminit offline_page_cgroup(unsigned long start_pfn,
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
242
243
244
  		unsigned long nr_pages, int nid)
  {
  	unsigned long start, end, pfn;
1bb36fbd4   Daniel Kiper   mm/page_cgroup.c:...
245
246
  	start = SECTION_ALIGN_DOWN(start_pfn);
  	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
247
248
249
250
251
252
  
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
  		__free_page_cgroup(pfn);
  	return 0;
  
  }
31168481c   Al Viro   meminit section w...
253
  static int __meminit page_cgroup_callback(struct notifier_block *self,
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
254
255
256
257
258
259
260
261
262
  			       unsigned long action, void *arg)
  {
  	struct memory_notify *mn = arg;
  	int ret = 0;
  	switch (action) {
  	case MEM_GOING_ONLINE:
  		ret = online_page_cgroup(mn->start_pfn,
  				   mn->nr_pages, mn->status_change_nid);
  		break;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
263
264
265
266
  	case MEM_OFFLINE:
  		offline_page_cgroup(mn->start_pfn,
  				mn->nr_pages, mn->status_change_nid);
  		break;
dc19f9db3   KAMEZAWA Hiroyuki   memcg: memory hot...
267
  	case MEM_CANCEL_ONLINE:
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
268
269
270
271
272
273
  	case MEM_GOING_OFFLINE:
  		break;
  	case MEM_ONLINE:
  	case MEM_CANCEL_OFFLINE:
  		break;
  	}
dc19f9db3   KAMEZAWA Hiroyuki   memcg: memory hot...
274

5fda1bd5b   Prarit Bhargava   mm: notifier_from...
275
  	return notifier_from_errno(ret);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
276
277
278
279
280
281
282
  }
  
  #endif
  
  void __init page_cgroup_init(void)
  {
  	unsigned long pfn;
37573e8c7   KAMEZAWA Hiroyuki   memcg: fix init_p...
283
  	int nid;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
284

f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
285
  	if (mem_cgroup_disabled())
94b6da5ab   KAMEZAWA Hiroyuki   memcg: fix page_c...
286
  		return;
37573e8c7   KAMEZAWA Hiroyuki   memcg: fix init_p...
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
  	for_each_node_state(nid, N_HIGH_MEMORY) {
  		unsigned long start_pfn, end_pfn;
  
  		start_pfn = node_start_pfn(nid);
  		end_pfn = node_end_pfn(nid);
  		/*
  		 * start_pfn and end_pfn may not be aligned to SECTION and the
  		 * page->flags of out of node pages are not initialized.  So we
  		 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
  		 */
  		for (pfn = start_pfn;
  		     pfn < end_pfn;
                       pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
  
  			if (!pfn_valid(pfn))
  				continue;
  			/*
  			 * Nodes's pfns can be overlapping.
  			 * We know some arch can have a nodes layout such as
  			 * -------------pfn-------------->
  			 * N0 | N1 | N2 | N0 | N1 | N2|....
  			 */
  			if (pfn_to_nid(pfn) != nid)
  				continue;
  			if (init_section_page_cgroup(pfn, nid))
  				goto oom;
  		}
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
314
  	}
37573e8c7   KAMEZAWA Hiroyuki   memcg: fix init_p...
315
  	hotplug_memory_notifier(page_cgroup_callback, 0);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
316
317
  	printk(KERN_INFO "allocated %ld bytes of page_cgroup
  ", total_usage);
37573e8c7   KAMEZAWA Hiroyuki   memcg: fix init_p...
318
319
320
321
322
323
324
325
  	printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
  			 "don't want memory cgroups
  ");
  	return;
  oom:
  	printk(KERN_CRIT "try 'cgroup_disable=memory' boot option
  ");
  	panic("Out of memory");
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
326
  }
31168481c   Al Viro   meminit section w...
327
  void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
328
329
330
331
332
  {
  	return;
  }
  
  #endif
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
333
334
335
336
337
338
339
340
  
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  
  static DEFINE_MUTEX(swap_cgroup_mutex);
  struct swap_cgroup_ctrl {
  	struct page **map;
  	unsigned long length;
e9e58a4ec   KAMEZAWA Hiroyuki   memcg: avoid use ...
341
  	spinlock_t	lock;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
342
343
344
  };
  
  struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
345
  struct swap_cgroup {
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
346
  	unsigned short		id;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
  };
  #define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
  #define SC_POS_MASK	(SC_PER_PAGE - 1)
  
  /*
   * SwapCgroup implements "lookup" and "exchange" operations.
   * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
   * against SwapCache. At swap_free(), this is accessed directly from swap.
   *
   * This means,
   *  - we have no race in "exchange" when we're accessed via SwapCache because
   *    SwapCache(and its swp_entry) is under lock.
   *  - When called via swap_free(), there is no user of this entry and no race.
   * Then, we don't need lock around "exchange".
   *
   * TODO: we can push these buffers out to HIGHMEM.
   */
  
  /*
   * allocate buffer for swap_cgroup.
   */
  static int swap_cgroup_prepare(int type)
  {
  	struct page *page;
  	struct swap_cgroup_ctrl *ctrl;
  	unsigned long idx, max;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
  	ctrl = &swap_cgroup_ctrl[type];
  
  	for (idx = 0; idx < ctrl->length; idx++) {
  		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  		if (!page)
  			goto not_enough_page;
  		ctrl->map[idx] = page;
  	}
  	return 0;
  not_enough_page:
  	max = idx;
  	for (idx = 0; idx < max; idx++)
  		__free_page(ctrl->map[idx]);
  
  	return -ENOMEM;
  }
  
  /**
024914477   Daisuke Nishimura   memcg: move charg...
391
392
393
394
395
396
   * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
   * @end: swap entry to be cmpxchged
   * @old: old id
   * @new: new id
   *
   * Returns old id at success, 0 at failure.
25985edce   Lucas De Marchi   Fix common misspe...
397
   * (There is no mem_cgroup using 0 as its id)
024914477   Daisuke Nishimura   memcg: move charg...
398
399
400
401
402
403
404
405
406
407
408
   */
  unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
  					unsigned short old, unsigned short new)
  {
  	int type = swp_type(ent);
  	unsigned long offset = swp_offset(ent);
  	unsigned long idx = offset / SC_PER_PAGE;
  	unsigned long pos = offset & SC_POS_MASK;
  	struct swap_cgroup_ctrl *ctrl;
  	struct page *mappage;
  	struct swap_cgroup *sc;
e9e58a4ec   KAMEZAWA Hiroyuki   memcg: avoid use ...
409
410
  	unsigned long flags;
  	unsigned short retval;
024914477   Daisuke Nishimura   memcg: move charg...
411
412
413
414
415
416
  
  	ctrl = &swap_cgroup_ctrl[type];
  
  	mappage = ctrl->map[idx];
  	sc = page_address(mappage);
  	sc += pos;
e9e58a4ec   KAMEZAWA Hiroyuki   memcg: avoid use ...
417
418
419
420
  	spin_lock_irqsave(&ctrl->lock, flags);
  	retval = sc->id;
  	if (retval == old)
  		sc->id = new;
024914477   Daisuke Nishimura   memcg: move charg...
421
  	else
e9e58a4ec   KAMEZAWA Hiroyuki   memcg: avoid use ...
422
423
424
  		retval = 0;
  	spin_unlock_irqrestore(&ctrl->lock, flags);
  	return retval;
024914477   Daisuke Nishimura   memcg: move charg...
425
426
427
  }
  
  /**
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
428
429
430
431
   * swap_cgroup_record - record mem_cgroup for this swp_entry.
   * @ent: swap entry to be recorded into
   * @mem: mem_cgroup to be recorded
   *
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
432
433
   * Returns old value at success, 0 at failure.
   * (Of course, old value can be 0.)
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
434
   */
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
435
  unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
436
437
438
439
440
441
442
443
  {
  	int type = swp_type(ent);
  	unsigned long offset = swp_offset(ent);
  	unsigned long idx = offset / SC_PER_PAGE;
  	unsigned long pos = offset & SC_POS_MASK;
  	struct swap_cgroup_ctrl *ctrl;
  	struct page *mappage;
  	struct swap_cgroup *sc;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
444
  	unsigned short old;
e9e58a4ec   KAMEZAWA Hiroyuki   memcg: avoid use ...
445
  	unsigned long flags;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
446

27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
447
448
449
450
451
  	ctrl = &swap_cgroup_ctrl[type];
  
  	mappage = ctrl->map[idx];
  	sc = page_address(mappage);
  	sc += pos;
e9e58a4ec   KAMEZAWA Hiroyuki   memcg: avoid use ...
452
453
454
455
  	spin_lock_irqsave(&ctrl->lock, flags);
  	old = sc->id;
  	sc->id = id;
  	spin_unlock_irqrestore(&ctrl->lock, flags);
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
456
457
458
459
460
461
462
463
  
  	return old;
  }
  
  /**
   * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
   * @ent: swap entry to be looked up.
   *
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
464
   * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
465
   */
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
466
  unsigned short lookup_swap_cgroup(swp_entry_t ent)
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
467
468
469
470
471
472
473
474
  {
  	int type = swp_type(ent);
  	unsigned long offset = swp_offset(ent);
  	unsigned long idx = offset / SC_PER_PAGE;
  	unsigned long pos = offset & SC_POS_MASK;
  	struct swap_cgroup_ctrl *ctrl;
  	struct page *mappage;
  	struct swap_cgroup *sc;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
475
  	unsigned short ret;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
476

27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
477
478
479
480
  	ctrl = &swap_cgroup_ctrl[type];
  	mappage = ctrl->map[idx];
  	sc = page_address(mappage);
  	sc += pos;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
481
  	ret = sc->id;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
482
483
484
485
486
487
488
489
490
491
492
493
  	return ret;
  }
  
  int swap_cgroup_swapon(int type, unsigned long max_pages)
  {
  	void *array;
  	unsigned long array_size;
  	unsigned long length;
  	struct swap_cgroup_ctrl *ctrl;
  
  	if (!do_swap_account)
  		return 0;
33278f7f0   Namhyung Kim   memcg: fix off-by...
494
  	length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
495
496
497
498
499
500
501
502
503
504
505
  	array_size = length * sizeof(void *);
  
  	array = vmalloc(array_size);
  	if (!array)
  		goto nomem;
  
  	memset(array, 0, array_size);
  	ctrl = &swap_cgroup_ctrl[type];
  	mutex_lock(&swap_cgroup_mutex);
  	ctrl->length = length;
  	ctrl->map = array;
e9e58a4ec   KAMEZAWA Hiroyuki   memcg: avoid use ...
506
  	spin_lock_init(&ctrl->lock);
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
507
508
509
510
  	if (swap_cgroup_prepare(type)) {
  		/* memory shortage */
  		ctrl->map = NULL;
  		ctrl->length = 0;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
511
  		mutex_unlock(&swap_cgroup_mutex);
6a5b18d2b   Namhyung Kim   memcg: move page-...
512
  		vfree(array);
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
513
514
515
  		goto nomem;
  	}
  	mutex_unlock(&swap_cgroup_mutex);
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
516
517
518
519
520
  	return 0;
  nomem:
  	printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.
  ");
  	printk(KERN_INFO
00a66d297   WANG Cong   mm: remove the le...
521
522
  		"swap_cgroup can be disabled by swapaccount=0 boot option
  ");
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
523
524
525
526
527
  	return -ENOMEM;
  }
  
  void swap_cgroup_swapoff(int type)
  {
6a5b18d2b   Namhyung Kim   memcg: move page-...
528
529
  	struct page **map;
  	unsigned long i, length;
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
530
531
532
533
534
535
536
  	struct swap_cgroup_ctrl *ctrl;
  
  	if (!do_swap_account)
  		return;
  
  	mutex_lock(&swap_cgroup_mutex);
  	ctrl = &swap_cgroup_ctrl[type];
6a5b18d2b   Namhyung Kim   memcg: move page-...
537
538
539
540
541
542
543
544
545
  	map = ctrl->map;
  	length = ctrl->length;
  	ctrl->map = NULL;
  	ctrl->length = 0;
  	mutex_unlock(&swap_cgroup_mutex);
  
  	if (map) {
  		for (i = 0; i < length; i++) {
  			struct page *page = map[i];
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
546
547
548
  			if (page)
  				__free_page(page);
  		}
6a5b18d2b   Namhyung Kim   memcg: move page-...
549
  		vfree(map);
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
550
  	}
27a7faa07   KAMEZAWA Hiroyuki   memcg: swap cgrou...
551
552
553
  }
  
  #endif