Blame view

mm/zsmalloc.c 62 KB
61989a80f   Nitin Gupta   staging: zsmalloc...
1
2
3
4
  /*
   * zsmalloc memory allocator
   *
   * Copyright (C) 2011  Nitin Gupta
31fc00bb7   Minchan Kim   zsmalloc: add cop...
5
   * Copyright (C) 2012, 2013 Minchan Kim
61989a80f   Nitin Gupta   staging: zsmalloc...
6
7
8
9
10
11
12
   *
   * This code is released using a dual license strategy: BSD/GPL
   * You can choose the license that better fits your requirements.
   *
   * Released under the terms of 3-clause BSD License
   * Released under the terms of GNU General Public License Version 2.0
   */
2db51dae5   Nitin Gupta   staging: zsmalloc...
13
  /*
2db51dae5   Nitin Gupta   staging: zsmalloc...
14
15
16
17
   * Following is how we use various fields and flags of underlying
   * struct page(s) to form a zspage.
   *
   * Usage of struct page fields:
3783689a1   Minchan Kim   zsmalloc: introdu...
18
   *	page->private: points to zspage
48b4800a1   Minchan Kim   zsmalloc: page mi...
19
20
21
   *	page->freelist(index): links together all component pages of a zspage
   *		For the huge page, this is always 0, so we use this field
   *		to store handle.
fd8544639   Ganesh Mahendran   mm/zsmalloc: keep...
22
   *	page->units: first object offset in a subpage of zspage
2db51dae5   Nitin Gupta   staging: zsmalloc...
23
24
25
   *
   * Usage of struct page flags:
   *	PG_private: identifies the first component page
399d8eebe   Xishi Qiu   mm: fix some typo...
26
   *	PG_owner_priv_1: identifies the huge component page
2db51dae5   Nitin Gupta   staging: zsmalloc...
27
28
   *
   */
4abaac9b7   Dan Streetman   update "mm/zsmall...
29
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
61989a80f   Nitin Gupta   staging: zsmalloc...
30
31
  #include <linux/module.h>
  #include <linux/kernel.h>
312fcae22   Minchan Kim   zsmalloc: support...
32
  #include <linux/sched.h>
50d34394c   Ingo Molnar   sched/headers: Pr...
33
  #include <linux/magic.h>
61989a80f   Nitin Gupta   staging: zsmalloc...
34
35
36
  #include <linux/bitops.h>
  #include <linux/errno.h>
  #include <linux/highmem.h>
61989a80f   Nitin Gupta   staging: zsmalloc...
37
38
39
40
41
42
  #include <linux/string.h>
  #include <linux/slab.h>
  #include <asm/tlbflush.h>
  #include <asm/pgtable.h>
  #include <linux/cpumask.h>
  #include <linux/cpu.h>
0cbb613fa   Seth Jennings   staging: fix powe...
43
  #include <linux/vmalloc.h>
759b26b29   Sergey Senozhatsky   zsmalloc: use pre...
44
  #include <linux/preempt.h>
0959c63f1   Seth Jennings   zsmalloc: collaps...
45
  #include <linux/spinlock.h>
93144ca35   Aliaksei Karaliou   mm/zsmalloc: simp...
46
  #include <linux/shrinker.h>
0959c63f1   Seth Jennings   zsmalloc: collaps...
47
  #include <linux/types.h>
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
48
  #include <linux/debugfs.h>
bcf1647d0   Minchan Kim   zsmalloc: move it...
49
  #include <linux/zsmalloc.h>
c795779df   Dan Streetman   mm/zpool: zbud/zs...
50
  #include <linux/zpool.h>
48b4800a1   Minchan Kim   zsmalloc: page mi...
51
  #include <linux/mount.h>
8e9231f81   David Howells   vfs: Convert zsma...
52
  #include <linux/pseudo_fs.h>
dd4123f32   Minchan Kim   mm: fix build war...
53
  #include <linux/migrate.h>
701d67859   Henry Burns   mm/zsmalloc.c: fi...
54
  #include <linux/wait.h>
48b4800a1   Minchan Kim   zsmalloc: page mi...
55
  #include <linux/pagemap.h>
cdc346b36   Sergey Senozhatsky   mm/zsmalloc.c: in...
56
  #include <linux/fs.h>
48b4800a1   Minchan Kim   zsmalloc: page mi...
57
58
  
  #define ZSPAGE_MAGIC	0x58
0959c63f1   Seth Jennings   zsmalloc: collaps...
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
  
  /*
   * This must be power of 2 and greater than of equal to sizeof(link_free).
   * These two conditions ensure that any 'struct link_free' itself doesn't
   * span more than 1 page which avoids complex case of mapping 2 pages simply
   * to restore link_free pointer values.
   */
  #define ZS_ALIGN		8
  
  /*
   * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
   * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
   */
  #define ZS_MAX_ZSPAGE_ORDER 2
  #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
2e40e163a   Minchan Kim   zsmalloc: decoupl...
74
  #define ZS_HANDLE_SIZE (sizeof(unsigned long))
0959c63f1   Seth Jennings   zsmalloc: collaps...
75
76
  /*
   * Object location (<PFN>, <obj_idx>) is encoded as
c3e3e88ad   Nitin Cupta   zsmalloc: add mor...
77
   * as single (unsigned long) handle value.
0959c63f1   Seth Jennings   zsmalloc: collaps...
78
   *
bfd093f5e   Minchan Kim   zsmalloc: use fre...
79
   * Note that object index <obj_idx> starts from 0.
0959c63f1   Seth Jennings   zsmalloc: collaps...
80
81
82
   *
   * This is made more complicated by various memory models and PAE.
   */
02390b87a   Kirill A. Shutemov   mm/zsmalloc: Prep...
83
84
85
86
  #ifndef MAX_POSSIBLE_PHYSMEM_BITS
  #ifdef MAX_PHYSMEM_BITS
  #define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
  #else
0959c63f1   Seth Jennings   zsmalloc: collaps...
87
88
89
90
  /*
   * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
   * be PAGE_SHIFT
   */
02390b87a   Kirill A. Shutemov   mm/zsmalloc: Prep...
91
  #define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
0959c63f1   Seth Jennings   zsmalloc: collaps...
92
93
  #endif
  #endif
02390b87a   Kirill A. Shutemov   mm/zsmalloc: Prep...
94
95
  
  #define _PFN_BITS		(MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
312fcae22   Minchan Kim   zsmalloc: support...
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
  
  /*
   * Memory for allocating for handle keeps object position by
   * encoding <page, obj_idx> and the encoded value has a room
   * in least bit(ie, look at obj_to_location).
   * We use the bit to synchronize between object access by
   * user and migration.
   */
  #define HANDLE_PIN_BIT	0
  
  /*
   * Head in allocated object should have OBJ_ALLOCATED_TAG
   * to identify the object was allocated or not.
   * It's okay to add the status bit in the least bit because
   * header keeps handle which is 4byte-aligned address so we
   * have room for two bit at least.
   */
  #define OBJ_ALLOCATED_TAG 1
  #define OBJ_TAG_BITS 1
  #define OBJ_INDEX_BITS	(BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
0959c63f1   Seth Jennings   zsmalloc: collaps...
116
  #define OBJ_INDEX_MASK	((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
117
118
119
120
  #define FULLNESS_BITS	2
  #define CLASS_BITS	8
  #define ISOLATED_BITS	3
  #define MAGIC_VAL_BITS	8
0959c63f1   Seth Jennings   zsmalloc: collaps...
121
122
123
124
  #define MAX(a, b) ((a) >= (b) ? (a) : (b))
  /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
  #define ZS_MIN_ALLOC_SIZE \
  	MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
2e40e163a   Minchan Kim   zsmalloc: decoupl...
125
  /* each chunk includes extra space to keep handle */
7b60a6852   Minchan Kim   zsmalloc: record ...
126
  #define ZS_MAX_ALLOC_SIZE	PAGE_SIZE
0959c63f1   Seth Jennings   zsmalloc: collaps...
127
128
  
  /*
7eb52512a   Weijie Yang   zsmalloc: fixup t...
129
   * On systems with 4K page size, this gives 255 size classes! There is a
0959c63f1   Seth Jennings   zsmalloc: collaps...
130
131
132
133
134
135
136
137
138
139
140
   * trader-off here:
   *  - Large number of size classes is potentially wasteful as free page are
   *    spread across these classes
   *  - Small number of size classes causes large internal fragmentation
   *  - Probably its better to use specific size classes (empirically
   *    determined). NOTE: all those class sizes must be set as multiple of
   *    ZS_ALIGN to make sure link_free itself never has to span 2 pages.
   *
   *  ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
   *  (reason above)
   */
3783689a1   Minchan Kim   zsmalloc: introdu...
141
  #define ZS_SIZE_CLASS_DELTA	(PAGE_SIZE >> CLASS_BITS)
cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
142
143
  #define ZS_SIZE_CLASSES	(DIV_ROUND_UP(ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE, \
  				      ZS_SIZE_CLASS_DELTA) + 1)
0959c63f1   Seth Jennings   zsmalloc: collaps...
144

0959c63f1   Seth Jennings   zsmalloc: collaps...
145
  enum fullness_group {
0959c63f1   Seth Jennings   zsmalloc: collaps...
146
  	ZS_EMPTY,
48b4800a1   Minchan Kim   zsmalloc: page mi...
147
148
149
150
  	ZS_ALMOST_EMPTY,
  	ZS_ALMOST_FULL,
  	ZS_FULL,
  	NR_ZS_FULLNESS,
0959c63f1   Seth Jennings   zsmalloc: collaps...
151
  };
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
152
  enum zs_stat_type {
48b4800a1   Minchan Kim   zsmalloc: page mi...
153
154
155
156
  	CLASS_EMPTY,
  	CLASS_ALMOST_EMPTY,
  	CLASS_ALMOST_FULL,
  	CLASS_FULL,
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
157
158
  	OBJ_ALLOCATED,
  	OBJ_USED,
48b4800a1   Minchan Kim   zsmalloc: page mi...
159
  	NR_ZS_STAT_TYPE,
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
160
  };
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
161
162
163
  struct zs_size_stat {
  	unsigned long objs[NR_ZS_STAT_TYPE];
  };
572445941   Sergey Senozhatsky   zsmalloc: always ...
164
165
  #ifdef CONFIG_ZSMALLOC_STAT
  static struct dentry *zs_stat_root;
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
166
  #endif
48b4800a1   Minchan Kim   zsmalloc: page mi...
167
168
169
  #ifdef CONFIG_COMPACTION
  static struct vfsmount *zsmalloc_mnt;
  #endif
0959c63f1   Seth Jennings   zsmalloc: collaps...
170
171
172
173
174
  /*
   * We assign a page to ZS_ALMOST_EMPTY fullness group when:
   *	n <= N / f, where
   * n = number of allocated objects
   * N = total number of objects zspage can store
6dd9737e3   Wang Sheng-Hui   mm/zsmalloc.c: co...
175
   * f = fullness_threshold_frac
0959c63f1   Seth Jennings   zsmalloc: collaps...
176
177
178
179
180
181
182
183
184
   *
   * Similarly, we assign zspage to:
   *	ZS_ALMOST_FULL	when n > N / f
   *	ZS_EMPTY	when n == 0
   *	ZS_FULL		when n == N
   *
   * (see: fix_fullness_group())
   */
  static const int fullness_threshold_frac = 4;
010b495e2   Sergey Senozhatsky   zsmalloc: introdu...
185
  static size_t huge_class_size;
0959c63f1   Seth Jennings   zsmalloc: collaps...
186
187
  
  struct size_class {
572445941   Sergey Senozhatsky   zsmalloc: always ...
188
  	spinlock_t lock;
48b4800a1   Minchan Kim   zsmalloc: page mi...
189
  	struct list_head fullness_list[NR_ZS_FULLNESS];
0959c63f1   Seth Jennings   zsmalloc: collaps...
190
191
192
193
194
  	/*
  	 * Size of objects stored in this class. Must be multiple
  	 * of ZS_ALIGN.
  	 */
  	int size;
1fc6e27d7   Minchan Kim   zsmalloc: keep ma...
195
  	int objs_per_zspage;
7dfa46122   Weijie Yang   zsmalloc: reorgan...
196
197
  	/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
  	int pages_per_zspage;
48b4800a1   Minchan Kim   zsmalloc: page mi...
198
199
200
  
  	unsigned int index;
  	struct zs_size_stat stats;
0959c63f1   Seth Jennings   zsmalloc: collaps...
201
  };
48b4800a1   Minchan Kim   zsmalloc: page mi...
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
  /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
  static void SetPageHugeObject(struct page *page)
  {
  	SetPageOwnerPriv1(page);
  }
  
  static void ClearPageHugeObject(struct page *page)
  {
  	ClearPageOwnerPriv1(page);
  }
  
  static int PageHugeObject(struct page *page)
  {
  	return PageOwnerPriv1(page);
  }
0959c63f1   Seth Jennings   zsmalloc: collaps...
217
218
  /*
   * Placed within free objects to form a singly linked list.
3783689a1   Minchan Kim   zsmalloc: introdu...
219
   * For every zspage, zspage->freeobj gives head of this list.
0959c63f1   Seth Jennings   zsmalloc: collaps...
220
221
222
223
   *
   * This must be power of 2 and less than or equal to ZS_ALIGN
   */
  struct link_free {
2e40e163a   Minchan Kim   zsmalloc: decoupl...
224
225
  	union {
  		/*
bfd093f5e   Minchan Kim   zsmalloc: use fre...
226
  		 * Free object index;
2e40e163a   Minchan Kim   zsmalloc: decoupl...
227
228
  		 * It's valid for non-allocated object
  		 */
bfd093f5e   Minchan Kim   zsmalloc: use fre...
229
  		unsigned long next;
2e40e163a   Minchan Kim   zsmalloc: decoupl...
230
231
232
233
234
  		/*
  		 * Handle of allocated object.
  		 */
  		unsigned long handle;
  	};
0959c63f1   Seth Jennings   zsmalloc: collaps...
235
236
237
  };
  
  struct zs_pool {
6f3526d6d   Sergey SENOZHATSKY   mm: zsmalloc: con...
238
  	const char *name;
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
239

cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
240
  	struct size_class *size_class[ZS_SIZE_CLASSES];
2e40e163a   Minchan Kim   zsmalloc: decoupl...
241
  	struct kmem_cache *handle_cachep;
3783689a1   Minchan Kim   zsmalloc: introdu...
242
  	struct kmem_cache *zspage_cachep;
0959c63f1   Seth Jennings   zsmalloc: collaps...
243

13de8933c   Minchan Kim   zsmalloc: move pa...
244
  	atomic_long_t pages_allocated;
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
245

7d3f39382   Sergey Senozhatsky   zsmalloc/zram: in...
246
  	struct zs_pool_stats stats;
ab9d306d9   Sergey Senozhatsky   zsmalloc: use shr...
247
248
249
  
  	/* Compact classes */
  	struct shrinker shrinker;
93144ca35   Aliaksei Karaliou   mm/zsmalloc: simp...
250

0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
251
252
253
  #ifdef CONFIG_ZSMALLOC_STAT
  	struct dentry *stat_dentry;
  #endif
48b4800a1   Minchan Kim   zsmalloc: page mi...
254
255
256
  #ifdef CONFIG_COMPACTION
  	struct inode *inode;
  	struct work_struct free_work;
701d67859   Henry Burns   mm/zsmalloc.c: fi...
257
258
259
260
  	/* A wait queue for when migration races with async_free_zspage() */
  	struct wait_queue_head migration_wait;
  	atomic_long_t isolated_pages;
  	bool destroying;
48b4800a1   Minchan Kim   zsmalloc: page mi...
261
  #endif
0959c63f1   Seth Jennings   zsmalloc: collaps...
262
  };
61989a80f   Nitin Gupta   staging: zsmalloc...
263

3783689a1   Minchan Kim   zsmalloc: introdu...
264
265
266
  struct zspage {
  	struct {
  		unsigned int fullness:FULLNESS_BITS;
85d492f28   Minchan Kim   zsmalloc: expand ...
267
  		unsigned int class:CLASS_BITS + 1;
48b4800a1   Minchan Kim   zsmalloc: page mi...
268
269
  		unsigned int isolated:ISOLATED_BITS;
  		unsigned int magic:MAGIC_VAL_BITS;
3783689a1   Minchan Kim   zsmalloc: introdu...
270
271
  	};
  	unsigned int inuse;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
272
  	unsigned int freeobj;
3783689a1   Minchan Kim   zsmalloc: introdu...
273
274
  	struct page *first_page;
  	struct list_head list; /* fullness list */
48b4800a1   Minchan Kim   zsmalloc: page mi...
275
276
277
  #ifdef CONFIG_COMPACTION
  	rwlock_t lock;
  #endif
3783689a1   Minchan Kim   zsmalloc: introdu...
278
  };
61989a80f   Nitin Gupta   staging: zsmalloc...
279

f553646a6   Seth Jennings   staging: zsmalloc...
280
  struct mapping_area {
1b945aeef   Minchan Kim   zsmalloc: add Kco...
281
  #ifdef CONFIG_PGTABLE_MAPPING
f553646a6   Seth Jennings   staging: zsmalloc...
282
283
284
285
286
287
288
  	struct vm_struct *vm; /* vm area for mapping object that span pages */
  #else
  	char *vm_buf; /* copy buffer for objects that span pages */
  #endif
  	char *vm_addr; /* address of kmap_atomic()'ed pages */
  	enum zs_mapmode vm_mm; /* mapping mode */
  };
48b4800a1   Minchan Kim   zsmalloc: page mi...
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
  #ifdef CONFIG_COMPACTION
  static int zs_register_migration(struct zs_pool *pool);
  static void zs_unregister_migration(struct zs_pool *pool);
  static void migrate_lock_init(struct zspage *zspage);
  static void migrate_read_lock(struct zspage *zspage);
  static void migrate_read_unlock(struct zspage *zspage);
  static void kick_deferred_free(struct zs_pool *pool);
  static void init_deferred_free(struct zs_pool *pool);
  static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
  #else
  static int zsmalloc_mount(void) { return 0; }
  static void zsmalloc_unmount(void) {}
  static int zs_register_migration(struct zs_pool *pool) { return 0; }
  static void zs_unregister_migration(struct zs_pool *pool) {}
  static void migrate_lock_init(struct zspage *zspage) {}
  static void migrate_read_lock(struct zspage *zspage) {}
  static void migrate_read_unlock(struct zspage *zspage) {}
  static void kick_deferred_free(struct zs_pool *pool) {}
  static void init_deferred_free(struct zs_pool *pool) {}
  static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
  #endif
3783689a1   Minchan Kim   zsmalloc: introdu...
310
  static int create_cache(struct zs_pool *pool)
2e40e163a   Minchan Kim   zsmalloc: decoupl...
311
312
313
  {
  	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
  					0, 0, NULL);
3783689a1   Minchan Kim   zsmalloc: introdu...
314
315
316
317
318
319
320
321
322
323
324
325
  	if (!pool->handle_cachep)
  		return 1;
  
  	pool->zspage_cachep = kmem_cache_create("zspage", sizeof(struct zspage),
  					0, 0, NULL);
  	if (!pool->zspage_cachep) {
  		kmem_cache_destroy(pool->handle_cachep);
  		pool->handle_cachep = NULL;
  		return 1;
  	}
  
  	return 0;
2e40e163a   Minchan Kim   zsmalloc: decoupl...
326
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
327
  static void destroy_cache(struct zs_pool *pool)
2e40e163a   Minchan Kim   zsmalloc: decoupl...
328
  {
cd10add00   Sergey Senozhatsky   zsmalloc: remove ...
329
  	kmem_cache_destroy(pool->handle_cachep);
3783689a1   Minchan Kim   zsmalloc: introdu...
330
  	kmem_cache_destroy(pool->zspage_cachep);
2e40e163a   Minchan Kim   zsmalloc: decoupl...
331
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
332
  static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
2e40e163a   Minchan Kim   zsmalloc: decoupl...
333
334
  {
  	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
48b4800a1   Minchan Kim   zsmalloc: page mi...
335
  			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
2e40e163a   Minchan Kim   zsmalloc: decoupl...
336
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
337
  static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
2e40e163a   Minchan Kim   zsmalloc: decoupl...
338
339
340
  {
  	kmem_cache_free(pool->handle_cachep, (void *)handle);
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
341
342
  static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
343
344
  	return kmem_cache_alloc(pool->zspage_cachep,
  			flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
399d8eebe   Xishi Qiu   mm: fix some typo...
345
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
346
347
348
349
350
  
  static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
  {
  	kmem_cache_free(pool->zspage_cachep, zspage);
  }
2e40e163a   Minchan Kim   zsmalloc: decoupl...
351
352
  static void record_obj(unsigned long handle, unsigned long obj)
  {
c102f07ca   Junil Lee   zsmalloc: fix mig...
353
354
355
356
357
358
  	/*
  	 * lsb of @obj represents handle lock while other bits
  	 * represent object value the handle is pointing so
  	 * updating shouldn't do store tearing.
  	 */
  	WRITE_ONCE(*(unsigned long *)handle, obj);
2e40e163a   Minchan Kim   zsmalloc: decoupl...
359
  }
c795779df   Dan Streetman   mm/zpool: zbud/zs...
360
361
362
  /* zpool driver */
  
  #ifdef CONFIG_ZPOOL
6f3526d6d   Sergey SENOZHATSKY   mm: zsmalloc: con...
363
  static void *zs_zpool_create(const char *name, gfp_t gfp,
786727799   Krzysztof Kozlowski   mm: zpool: consti...
364
  			     const struct zpool_ops *zpool_ops,
479305fd7   Dan Streetman   zpool: remove zpo...
365
  			     struct zpool *zpool)
c795779df   Dan Streetman   mm/zpool: zbud/zs...
366
  {
d0d8da2dc   Sergey Senozhatsky   zsmalloc: require...
367
368
369
370
371
372
  	/*
  	 * Ignore global gfp flags: zs_malloc() may be invoked from
  	 * different contexts and its caller must provide a valid
  	 * gfp mask.
  	 */
  	return zs_create_pool(name);
c795779df   Dan Streetman   mm/zpool: zbud/zs...
373
374
375
376
377
378
379
380
381
382
  }
  
  static void zs_zpool_destroy(void *pool)
  {
  	zs_destroy_pool(pool);
  }
  
  static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
  			unsigned long *handle)
  {
d0d8da2dc   Sergey Senozhatsky   zsmalloc: require...
383
  	*handle = zs_malloc(pool, size, gfp);
c795779df   Dan Streetman   mm/zpool: zbud/zs...
384
385
386
387
388
389
  	return *handle ? 0 : -1;
  }
  static void zs_zpool_free(void *pool, unsigned long handle)
  {
  	zs_free(pool, handle);
  }
c795779df   Dan Streetman   mm/zpool: zbud/zs...
390
391
392
393
394
395
396
397
398
399
400
401
  static void *zs_zpool_map(void *pool, unsigned long handle,
  			enum zpool_mapmode mm)
  {
  	enum zs_mapmode zs_mm;
  
  	switch (mm) {
  	case ZPOOL_MM_RO:
  		zs_mm = ZS_MM_RO;
  		break;
  	case ZPOOL_MM_WO:
  		zs_mm = ZS_MM_WO;
  		break;
61855f021   Gustavo A. R. Silva   mm/zsmalloc.c: fi...
402
  	case ZPOOL_MM_RW: /* fall through */
c795779df   Dan Streetman   mm/zpool: zbud/zs...
403
404
405
406
407
408
409
410
411
412
413
414
415
416
  	default:
  		zs_mm = ZS_MM_RW;
  		break;
  	}
  
  	return zs_map_object(pool, handle, zs_mm);
  }
  static void zs_zpool_unmap(void *pool, unsigned long handle)
  {
  	zs_unmap_object(pool, handle);
  }
  
  static u64 zs_zpool_total_size(void *pool)
  {
722cdc172   Minchan Kim   zsmalloc: change ...
417
  	return zs_get_total_pages(pool) << PAGE_SHIFT;
c795779df   Dan Streetman   mm/zpool: zbud/zs...
418
419
420
  }
  
  static struct zpool_driver zs_zpool_driver = {
c165f25d2   Hui Zhu   zpool: add malloc...
421
422
423
424
425
426
427
428
429
430
  	.type =			  "zsmalloc",
  	.owner =		  THIS_MODULE,
  	.create =		  zs_zpool_create,
  	.destroy =		  zs_zpool_destroy,
  	.malloc_support_movable = true,
  	.malloc =		  zs_zpool_malloc,
  	.free =			  zs_zpool_free,
  	.map =			  zs_zpool_map,
  	.unmap =		  zs_zpool_unmap,
  	.total_size =		  zs_zpool_total_size,
c795779df   Dan Streetman   mm/zpool: zbud/zs...
431
  };
137f8cff5   Kees Cook   mm/zpool: use pre...
432
  MODULE_ALIAS("zpool-zsmalloc");
c795779df   Dan Streetman   mm/zpool: zbud/zs...
433
  #endif /* CONFIG_ZPOOL */
61989a80f   Nitin Gupta   staging: zsmalloc...
434
435
  /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
  static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
48b4800a1   Minchan Kim   zsmalloc: page mi...
436
437
438
439
  static bool is_zspage_isolated(struct zspage *zspage)
  {
  	return zspage->isolated;
  }
3457f4147   Nick Desaulniers   mm/zsmalloc.c: fi...
440
  static __maybe_unused int is_first_page(struct page *page)
61989a80f   Nitin Gupta   staging: zsmalloc...
441
  {
a27545bf0   Minchan Kim   zsmalloc: use Pag...
442
  	return PagePrivate(page);
61989a80f   Nitin Gupta   staging: zsmalloc...
443
  }
48b4800a1   Minchan Kim   zsmalloc: page mi...
444
  /* Protected by class->lock */
3783689a1   Minchan Kim   zsmalloc: introdu...
445
  static inline int get_zspage_inuse(struct zspage *zspage)
4f42047bb   Minchan Kim   zsmalloc: use acc...
446
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
447
  	return zspage->inuse;
4f42047bb   Minchan Kim   zsmalloc: use acc...
448
  }
4f42047bb   Minchan Kim   zsmalloc: use acc...
449

3783689a1   Minchan Kim   zsmalloc: introdu...
450
  static inline void mod_zspage_inuse(struct zspage *zspage, int val)
4f42047bb   Minchan Kim   zsmalloc: use acc...
451
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
452
  	zspage->inuse += val;
4f42047bb   Minchan Kim   zsmalloc: use acc...
453
  }
48b4800a1   Minchan Kim   zsmalloc: page mi...
454
  static inline struct page *get_first_page(struct zspage *zspage)
4f42047bb   Minchan Kim   zsmalloc: use acc...
455
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
456
  	struct page *first_page = zspage->first_page;
3783689a1   Minchan Kim   zsmalloc: introdu...
457

48b4800a1   Minchan Kim   zsmalloc: page mi...
458
459
  	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
  	return first_page;
4f42047bb   Minchan Kim   zsmalloc: use acc...
460
  }
48b4800a1   Minchan Kim   zsmalloc: page mi...
461
  static inline int get_first_obj_offset(struct page *page)
4f42047bb   Minchan Kim   zsmalloc: use acc...
462
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
463
464
  	return page->units;
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
465

48b4800a1   Minchan Kim   zsmalloc: page mi...
466
467
468
  static inline void set_first_obj_offset(struct page *page, int offset)
  {
  	page->units = offset;
4f42047bb   Minchan Kim   zsmalloc: use acc...
469
  }
bfd093f5e   Minchan Kim   zsmalloc: use fre...
470
  static inline unsigned int get_freeobj(struct zspage *zspage)
4f42047bb   Minchan Kim   zsmalloc: use acc...
471
  {
bfd093f5e   Minchan Kim   zsmalloc: use fre...
472
  	return zspage->freeobj;
4f42047bb   Minchan Kim   zsmalloc: use acc...
473
  }
bfd093f5e   Minchan Kim   zsmalloc: use fre...
474
  static inline void set_freeobj(struct zspage *zspage, unsigned int obj)
4f42047bb   Minchan Kim   zsmalloc: use acc...
475
  {
bfd093f5e   Minchan Kim   zsmalloc: use fre...
476
  	zspage->freeobj = obj;
4f42047bb   Minchan Kim   zsmalloc: use acc...
477
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
478
  static void get_zspage_mapping(struct zspage *zspage,
a42094676   Minchan Kim   zsmalloc: use fir...
479
  				unsigned int *class_idx,
61989a80f   Nitin Gupta   staging: zsmalloc...
480
481
  				enum fullness_group *fullness)
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
482
  	BUG_ON(zspage->magic != ZSPAGE_MAGIC);
3783689a1   Minchan Kim   zsmalloc: introdu...
483
484
  	*fullness = zspage->fullness;
  	*class_idx = zspage->class;
61989a80f   Nitin Gupta   staging: zsmalloc...
485
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
486
  static void set_zspage_mapping(struct zspage *zspage,
a42094676   Minchan Kim   zsmalloc: use fir...
487
  				unsigned int class_idx,
61989a80f   Nitin Gupta   staging: zsmalloc...
488
489
  				enum fullness_group fullness)
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
490
491
  	zspage->class = class_idx;
  	zspage->fullness = fullness;
61989a80f   Nitin Gupta   staging: zsmalloc...
492
  }
c3e3e88ad   Nitin Cupta   zsmalloc: add mor...
493
494
495
496
497
498
499
  /*
   * zsmalloc divides the pool into various size classes where each
   * class maintains a list of zspages where each zspage is divided
   * into equal sized chunks. Each allocation falls into one of these
   * classes depending on its size. This function returns index of the
   * size class which has chunk size big enough to hold the give size.
   */
61989a80f   Nitin Gupta   staging: zsmalloc...
500
501
502
503
504
505
506
  static int get_size_class_index(int size)
  {
  	int idx = 0;
  
  	if (likely(size > ZS_MIN_ALLOC_SIZE))
  		idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
  				ZS_SIZE_CLASS_DELTA);
cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
507
  	return min_t(int, ZS_SIZE_CLASSES - 1, idx);
61989a80f   Nitin Gupta   staging: zsmalloc...
508
  }
3eb95feac   Matthias Kaehlcke   mm/zsmalloc.c: ch...
509
  /* type can be of enum type zs_stat_type or fullness_group */
248ca1b05   Minchan Kim   zsmalloc: add ful...
510
  static inline void zs_stat_inc(struct size_class *class,
3eb95feac   Matthias Kaehlcke   mm/zsmalloc.c: ch...
511
  				int type, unsigned long cnt)
248ca1b05   Minchan Kim   zsmalloc: add ful...
512
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
513
  	class->stats.objs[type] += cnt;
248ca1b05   Minchan Kim   zsmalloc: add ful...
514
  }
3eb95feac   Matthias Kaehlcke   mm/zsmalloc.c: ch...
515
  /* type can be of enum type zs_stat_type or fullness_group */
248ca1b05   Minchan Kim   zsmalloc: add ful...
516
  static inline void zs_stat_dec(struct size_class *class,
3eb95feac   Matthias Kaehlcke   mm/zsmalloc.c: ch...
517
  				int type, unsigned long cnt)
248ca1b05   Minchan Kim   zsmalloc: add ful...
518
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
519
  	class->stats.objs[type] -= cnt;
248ca1b05   Minchan Kim   zsmalloc: add ful...
520
  }
3eb95feac   Matthias Kaehlcke   mm/zsmalloc.c: ch...
521
  /* type can be of enum type zs_stat_type or fullness_group */
248ca1b05   Minchan Kim   zsmalloc: add ful...
522
  static inline unsigned long zs_stat_get(struct size_class *class,
3eb95feac   Matthias Kaehlcke   mm/zsmalloc.c: ch...
523
  				int type)
248ca1b05   Minchan Kim   zsmalloc: add ful...
524
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
525
  	return class->stats.objs[type];
248ca1b05   Minchan Kim   zsmalloc: add ful...
526
  }
572445941   Sergey Senozhatsky   zsmalloc: always ...
527
  #ifdef CONFIG_ZSMALLOC_STAT
4abaac9b7   Dan Streetman   update "mm/zsmall...
528
  static void __init zs_stat_init(void)
248ca1b05   Minchan Kim   zsmalloc: add ful...
529
  {
4abaac9b7   Dan Streetman   update "mm/zsmall...
530
531
532
533
534
  	if (!debugfs_initialized()) {
  		pr_warn("debugfs not available, stat dir not created
  ");
  		return;
  	}
248ca1b05   Minchan Kim   zsmalloc: add ful...
535
536
  
  	zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
248ca1b05   Minchan Kim   zsmalloc: add ful...
537
538
539
540
541
542
  }
  
  static void __exit zs_stat_exit(void)
  {
  	debugfs_remove_recursive(zs_stat_root);
  }
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
543
  static unsigned long zs_can_compact(struct size_class *class);
248ca1b05   Minchan Kim   zsmalloc: add ful...
544
545
546
547
548
549
550
  static int zs_stats_size_show(struct seq_file *s, void *v)
  {
  	int i;
  	struct zs_pool *pool = s->private;
  	struct size_class *class;
  	int objs_per_zspage;
  	unsigned long class_almost_full, class_almost_empty;
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
551
  	unsigned long obj_allocated, obj_used, pages_used, freeable;
248ca1b05   Minchan Kim   zsmalloc: add ful...
552
553
  	unsigned long total_class_almost_full = 0, total_class_almost_empty = 0;
  	unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
554
  	unsigned long total_freeable = 0;
248ca1b05   Minchan Kim   zsmalloc: add ful...
555

1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
556
557
  	seq_printf(s, " %5s %5s %11s %12s %13s %10s %10s %16s %8s
  ",
248ca1b05   Minchan Kim   zsmalloc: add ful...
558
559
  			"class", "size", "almost_full", "almost_empty",
  			"obj_allocated", "obj_used", "pages_used",
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
560
  			"pages_per_zspage", "freeable");
248ca1b05   Minchan Kim   zsmalloc: add ful...
561

cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
562
  	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
248ca1b05   Minchan Kim   zsmalloc: add ful...
563
564
565
566
567
568
569
570
571
572
  		class = pool->size_class[i];
  
  		if (class->index != i)
  			continue;
  
  		spin_lock(&class->lock);
  		class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL);
  		class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY);
  		obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
  		obj_used = zs_stat_get(class, OBJ_USED);
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
573
  		freeable = zs_can_compact(class);
248ca1b05   Minchan Kim   zsmalloc: add ful...
574
  		spin_unlock(&class->lock);
b4fd07a08   Ganesh Mahendran   mm/zsmalloc: use ...
575
  		objs_per_zspage = class->objs_per_zspage;
248ca1b05   Minchan Kim   zsmalloc: add ful...
576
577
  		pages_used = obj_allocated / objs_per_zspage *
  				class->pages_per_zspage;
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
578
579
580
  		seq_printf(s, " %5u %5u %11lu %12lu %13lu"
  				" %10lu %10lu %16d %8lu
  ",
248ca1b05   Minchan Kim   zsmalloc: add ful...
581
582
  			i, class->size, class_almost_full, class_almost_empty,
  			obj_allocated, obj_used, pages_used,
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
583
  			class->pages_per_zspage, freeable);
248ca1b05   Minchan Kim   zsmalloc: add ful...
584
585
586
587
588
589
  
  		total_class_almost_full += class_almost_full;
  		total_class_almost_empty += class_almost_empty;
  		total_objs += obj_allocated;
  		total_used_objs += obj_used;
  		total_pages += pages_used;
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
590
  		total_freeable += freeable;
248ca1b05   Minchan Kim   zsmalloc: add ful...
591
592
593
594
  	}
  
  	seq_puts(s, "
  ");
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
595
596
  	seq_printf(s, " %5s %5s %11lu %12lu %13lu %10lu %10lu %16s %8lu
  ",
248ca1b05   Minchan Kim   zsmalloc: add ful...
597
598
  			"Total", "", total_class_almost_full,
  			total_class_almost_empty, total_objs,
1120ed548   Sergey Senozhatsky   mm/zsmalloc: add ...
599
  			total_used_objs, total_pages, "", total_freeable);
248ca1b05   Minchan Kim   zsmalloc: add ful...
600
601
602
  
  	return 0;
  }
5ad350936   Andy Shevchenko   mm: reuse DEFINE_...
603
  DEFINE_SHOW_ATTRIBUTE(zs_stats_size);
248ca1b05   Minchan Kim   zsmalloc: add ful...
604

d34f61572   Dan Streetman   mm/zsmalloc: don'...
605
  static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
248ca1b05   Minchan Kim   zsmalloc: add ful...
606
  {
4abaac9b7   Dan Streetman   update "mm/zsmall...
607
608
609
  	if (!zs_stat_root) {
  		pr_warn("no root stat dir, not creating <%s> stat dir
  ", name);
d34f61572   Dan Streetman   mm/zsmalloc: don'...
610
  		return;
4abaac9b7   Dan Streetman   update "mm/zsmall...
611
  	}
248ca1b05   Minchan Kim   zsmalloc: add ful...
612

4268509a3   Greg Kroah-Hartman   zsmalloc: no need...
613
614
615
616
  	pool->stat_dentry = debugfs_create_dir(name, zs_stat_root);
  
  	debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool,
  			    &zs_stats_size_fops);
248ca1b05   Minchan Kim   zsmalloc: add ful...
617
618
619
620
621
622
623
624
  }
  
  static void zs_pool_stat_destroy(struct zs_pool *pool)
  {
  	debugfs_remove_recursive(pool->stat_dentry);
  }
  
  #else /* CONFIG_ZSMALLOC_STAT */
4abaac9b7   Dan Streetman   update "mm/zsmall...
625
  static void __init zs_stat_init(void)
248ca1b05   Minchan Kim   zsmalloc: add ful...
626
  {
248ca1b05   Minchan Kim   zsmalloc: add ful...
627
628
629
630
631
  }
  
  static void __exit zs_stat_exit(void)
  {
  }
d34f61572   Dan Streetman   mm/zsmalloc: don'...
632
  static inline void zs_pool_stat_create(struct zs_pool *pool, const char *name)
248ca1b05   Minchan Kim   zsmalloc: add ful...
633
  {
248ca1b05   Minchan Kim   zsmalloc: add ful...
634
635
636
637
638
  }
  
  static inline void zs_pool_stat_destroy(struct zs_pool *pool)
  {
  }
248ca1b05   Minchan Kim   zsmalloc: add ful...
639
  #endif
48b4800a1   Minchan Kim   zsmalloc: page mi...
640

c3e3e88ad   Nitin Cupta   zsmalloc: add mor...
641
642
643
644
645
646
647
  /*
   * For each size class, zspages are divided into different groups
   * depending on how "full" they are. This was done so that we could
   * easily find empty or nearly empty zspages when we try to shrink
   * the pool (not yet implemented). This function returns fullness
   * status of the given page.
   */
1fc6e27d7   Minchan Kim   zsmalloc: keep ma...
648
  static enum fullness_group get_fullness_group(struct size_class *class,
3783689a1   Minchan Kim   zsmalloc: introdu...
649
  						struct zspage *zspage)
61989a80f   Nitin Gupta   staging: zsmalloc...
650
  {
1fc6e27d7   Minchan Kim   zsmalloc: keep ma...
651
  	int inuse, objs_per_zspage;
61989a80f   Nitin Gupta   staging: zsmalloc...
652
  	enum fullness_group fg;
830e4bc5b   Minchan Kim   zsmalloc: clean u...
653

3783689a1   Minchan Kim   zsmalloc: introdu...
654
  	inuse = get_zspage_inuse(zspage);
1fc6e27d7   Minchan Kim   zsmalloc: keep ma...
655
  	objs_per_zspage = class->objs_per_zspage;
61989a80f   Nitin Gupta   staging: zsmalloc...
656
657
658
  
  	if (inuse == 0)
  		fg = ZS_EMPTY;
1fc6e27d7   Minchan Kim   zsmalloc: keep ma...
659
  	else if (inuse == objs_per_zspage)
61989a80f   Nitin Gupta   staging: zsmalloc...
660
  		fg = ZS_FULL;
1fc6e27d7   Minchan Kim   zsmalloc: keep ma...
661
  	else if (inuse <= 3 * objs_per_zspage / fullness_threshold_frac)
61989a80f   Nitin Gupta   staging: zsmalloc...
662
663
664
665
666
667
  		fg = ZS_ALMOST_EMPTY;
  	else
  		fg = ZS_ALMOST_FULL;
  
  	return fg;
  }
c3e3e88ad   Nitin Cupta   zsmalloc: add mor...
668
669
670
671
672
673
  /*
   * Each size class maintains various freelists and zspages are assigned
   * to one of these freelists based on the number of live objects they
   * have. This functions inserts the given zspage into the freelist
   * identified by <class, fullness_group>.
   */
251cbb951   Minchan Kim   zsmalloc: reorder...
674
  static void insert_zspage(struct size_class *class,
3783689a1   Minchan Kim   zsmalloc: introdu...
675
676
  				struct zspage *zspage,
  				enum fullness_group fullness)
61989a80f   Nitin Gupta   staging: zsmalloc...
677
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
678
  	struct zspage *head;
61989a80f   Nitin Gupta   staging: zsmalloc...
679

48b4800a1   Minchan Kim   zsmalloc: page mi...
680
  	zs_stat_inc(class, fullness, 1);
3783689a1   Minchan Kim   zsmalloc: introdu...
681
682
  	head = list_first_entry_or_null(&class->fullness_list[fullness],
  					struct zspage, list);
58f171174   Sergey Senozhatsky   zsmalloc: partial...
683
  	/*
3783689a1   Minchan Kim   zsmalloc: introdu...
684
685
  	 * We want to see more ZS_FULL pages and less almost empty/full.
  	 * Put pages with higher ->inuse first.
58f171174   Sergey Senozhatsky   zsmalloc: partial...
686
  	 */
3783689a1   Minchan Kim   zsmalloc: introdu...
687
688
689
690
691
692
693
  	if (head) {
  		if (get_zspage_inuse(zspage) < get_zspage_inuse(head)) {
  			list_add(&zspage->list, &head->list);
  			return;
  		}
  	}
  	list_add(&zspage->list, &class->fullness_list[fullness]);
61989a80f   Nitin Gupta   staging: zsmalloc...
694
  }
c3e3e88ad   Nitin Cupta   zsmalloc: add mor...
695
696
697
698
  /*
   * This function removes the given zspage from the freelist identified
   * by <class, fullness_group>.
   */
251cbb951   Minchan Kim   zsmalloc: reorder...
699
  static void remove_zspage(struct size_class *class,
3783689a1   Minchan Kim   zsmalloc: introdu...
700
701
  				struct zspage *zspage,
  				enum fullness_group fullness)
61989a80f   Nitin Gupta   staging: zsmalloc...
702
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
703
  	VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
48b4800a1   Minchan Kim   zsmalloc: page mi...
704
  	VM_BUG_ON(is_zspage_isolated(zspage));
61989a80f   Nitin Gupta   staging: zsmalloc...
705

3783689a1   Minchan Kim   zsmalloc: introdu...
706
  	list_del_init(&zspage->list);
48b4800a1   Minchan Kim   zsmalloc: page mi...
707
  	zs_stat_dec(class, fullness, 1);
61989a80f   Nitin Gupta   staging: zsmalloc...
708
  }
c3e3e88ad   Nitin Cupta   zsmalloc: add mor...
709
710
711
712
713
714
715
716
717
  /*
   * Each size class maintains zspages in different fullness groups depending
   * on the number of live objects they contain. When allocating or freeing
   * objects, the fullness status of the page can change, say, from ALMOST_FULL
   * to ALMOST_EMPTY when freeing an object. This function checks if such
   * a status change has occurred for the given page and accordingly moves the
   * page from the freelist of the old fullness group to that of the new
   * fullness group.
   */
c78062612   Minchan Kim   zsmalloc: factor ...
718
  static enum fullness_group fix_fullness_group(struct size_class *class,
3783689a1   Minchan Kim   zsmalloc: introdu...
719
  						struct zspage *zspage)
61989a80f   Nitin Gupta   staging: zsmalloc...
720
721
  {
  	int class_idx;
61989a80f   Nitin Gupta   staging: zsmalloc...
722
  	enum fullness_group currfg, newfg;
3783689a1   Minchan Kim   zsmalloc: introdu...
723
724
  	get_zspage_mapping(zspage, &class_idx, &currfg);
  	newfg = get_fullness_group(class, zspage);
61989a80f   Nitin Gupta   staging: zsmalloc...
725
726
  	if (newfg == currfg)
  		goto out;
48b4800a1   Minchan Kim   zsmalloc: page mi...
727
728
729
730
  	if (!is_zspage_isolated(zspage)) {
  		remove_zspage(class, zspage, currfg);
  		insert_zspage(class, zspage, newfg);
  	}
3783689a1   Minchan Kim   zsmalloc: introdu...
731
  	set_zspage_mapping(zspage, class_idx, newfg);
61989a80f   Nitin Gupta   staging: zsmalloc...
732
733
734
735
736
737
738
739
740
741
  
  out:
  	return newfg;
  }
  
  /*
   * We have to decide on how many pages to link together
   * to form a zspage for each size class. This is important
   * to reduce wastage due to unusable space left at end of
   * each zspage which is given as:
888fa374e   Yinghao Xie   mm/zsmalloc.c: fi...
742
743
   *     wastage = Zp % class_size
   *     usage = Zp - wastage
61989a80f   Nitin Gupta   staging: zsmalloc...
744
745
746
747
748
749
   * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...
   *
   * For example, for size class of 3/8 * PAGE_SIZE, we should
   * link together 3 PAGE_SIZE sized pages to form a zspage
   * since then we can perfectly fit in 8 such objects.
   */
2e3b61547   Minchan Kim   staging: zsmalloc...
750
  static int get_pages_per_zspage(int class_size)
61989a80f   Nitin Gupta   staging: zsmalloc...
751
752
753
754
  {
  	int i, max_usedpc = 0;
  	/* zspage order which gives maximum used size per KB */
  	int max_usedpc_order = 1;
84d4faaba   Seth Jennings   staging: zsmalloc...
755
  	for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {
61989a80f   Nitin Gupta   staging: zsmalloc...
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
  		int zspage_size;
  		int waste, usedpc;
  
  		zspage_size = i * PAGE_SIZE;
  		waste = zspage_size % class_size;
  		usedpc = (zspage_size - waste) * 100 / zspage_size;
  
  		if (usedpc > max_usedpc) {
  			max_usedpc = usedpc;
  			max_usedpc_order = i;
  		}
  	}
  
  	return max_usedpc_order;
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
771
  static struct zspage *get_zspage(struct page *page)
61989a80f   Nitin Gupta   staging: zsmalloc...
772
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
773
774
775
776
  	struct zspage *zspage = (struct zspage *)page->private;
  
  	BUG_ON(zspage->magic != ZSPAGE_MAGIC);
  	return zspage;
61989a80f   Nitin Gupta   staging: zsmalloc...
777
778
779
780
  }
  
  static struct page *get_next_page(struct page *page)
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
781
782
783
784
  	if (unlikely(PageHugeObject(page)))
  		return NULL;
  
  	return page->freelist;
61989a80f   Nitin Gupta   staging: zsmalloc...
785
  }
bfd093f5e   Minchan Kim   zsmalloc: use fre...
786
787
  /**
   * obj_to_location - get (<page>, <obj_idx>) from encoded object value
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
788
   * @obj: the encoded object value
bfd093f5e   Minchan Kim   zsmalloc: use fre...
789
790
   * @page: page object resides in zspage
   * @obj_idx: object index
67296874e   Olav Haugan   staging: zsmalloc...
791
   */
bfd093f5e   Minchan Kim   zsmalloc: use fre...
792
793
  static void obj_to_location(unsigned long obj, struct page **page,
  				unsigned int *obj_idx)
61989a80f   Nitin Gupta   staging: zsmalloc...
794
  {
bfd093f5e   Minchan Kim   zsmalloc: use fre...
795
796
797
798
  	obj >>= OBJ_TAG_BITS;
  	*page = pfn_to_page(obj >> OBJ_INDEX_BITS);
  	*obj_idx = (obj & OBJ_INDEX_MASK);
  }
61989a80f   Nitin Gupta   staging: zsmalloc...
799

bfd093f5e   Minchan Kim   zsmalloc: use fre...
800
801
802
803
804
805
806
807
  /**
   * location_to_obj - get obj value encoded from (<page>, <obj_idx>)
   * @page: page object resides in zspage
   * @obj_idx: object index
   */
  static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
  {
  	unsigned long obj;
61989a80f   Nitin Gupta   staging: zsmalloc...
808

312fcae22   Minchan Kim   zsmalloc: support...
809
  	obj = page_to_pfn(page) << OBJ_INDEX_BITS;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
810
  	obj |= obj_idx & OBJ_INDEX_MASK;
312fcae22   Minchan Kim   zsmalloc: support...
811
  	obj <<= OBJ_TAG_BITS;
61989a80f   Nitin Gupta   staging: zsmalloc...
812

bfd093f5e   Minchan Kim   zsmalloc: use fre...
813
  	return obj;
61989a80f   Nitin Gupta   staging: zsmalloc...
814
  }
2e40e163a   Minchan Kim   zsmalloc: decoupl...
815
816
817
818
  static unsigned long handle_to_obj(unsigned long handle)
  {
  	return *(unsigned long *)handle;
  }
48b4800a1   Minchan Kim   zsmalloc: page mi...
819
  static unsigned long obj_to_head(struct page *page, void *obj)
312fcae22   Minchan Kim   zsmalloc: support...
820
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
821
  	if (unlikely(PageHugeObject(page))) {
830e4bc5b   Minchan Kim   zsmalloc: clean u...
822
  		VM_BUG_ON_PAGE(!is_first_page(page), page);
3783689a1   Minchan Kim   zsmalloc: introdu...
823
  		return page->index;
7b60a6852   Minchan Kim   zsmalloc: record ...
824
825
  	} else
  		return *(unsigned long *)obj;
312fcae22   Minchan Kim   zsmalloc: support...
826
  }
48b4800a1   Minchan Kim   zsmalloc: page mi...
827
828
829
830
  static inline int testpin_tag(unsigned long handle)
  {
  	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
  }
312fcae22   Minchan Kim   zsmalloc: support...
831
832
  static inline int trypin_tag(unsigned long handle)
  {
1b8320b62   Minchan Kim   zsmalloc: use bit...
833
  	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
312fcae22   Minchan Kim   zsmalloc: support...
834
835
836
837
  }
  
  static void pin_tag(unsigned long handle)
  {
1b8320b62   Minchan Kim   zsmalloc: use bit...
838
  	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
312fcae22   Minchan Kim   zsmalloc: support...
839
840
841
842
  }
  
  static void unpin_tag(unsigned long handle)
  {
1b8320b62   Minchan Kim   zsmalloc: use bit...
843
  	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
312fcae22   Minchan Kim   zsmalloc: support...
844
  }
f4477e90b   Nitin Gupta   staging: zsmalloc...
845
846
  static void reset_page(struct page *page)
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
847
  	__ClearPageMovable(page);
18fd06bf7   Ganesh Mahendran   mm/zsmalloc: use ...
848
  	ClearPagePrivate(page);
f4477e90b   Nitin Gupta   staging: zsmalloc...
849
  	set_page_private(page, 0);
48b4800a1   Minchan Kim   zsmalloc: page mi...
850
851
852
853
  	page_mapcount_reset(page);
  	ClearPageHugeObject(page);
  	page->freelist = NULL;
  }
4d0a5402f   Colin Ian King   mm/zsmalloc.c: ma...
854
  static int trylock_zspage(struct zspage *zspage)
48b4800a1   Minchan Kim   zsmalloc: page mi...
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
  {
  	struct page *cursor, *fail;
  
  	for (cursor = get_first_page(zspage); cursor != NULL; cursor =
  					get_next_page(cursor)) {
  		if (!trylock_page(cursor)) {
  			fail = cursor;
  			goto unlock;
  		}
  	}
  
  	return 1;
  unlock:
  	for (cursor = get_first_page(zspage); cursor != fail; cursor =
  					get_next_page(cursor))
  		unlock_page(cursor);
  
  	return 0;
f4477e90b   Nitin Gupta   staging: zsmalloc...
873
  }
48b4800a1   Minchan Kim   zsmalloc: page mi...
874
875
  static void __free_zspage(struct zs_pool *pool, struct size_class *class,
  				struct zspage *zspage)
61989a80f   Nitin Gupta   staging: zsmalloc...
876
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
877
  	struct page *page, *next;
48b4800a1   Minchan Kim   zsmalloc: page mi...
878
879
880
881
882
883
  	enum fullness_group fg;
  	unsigned int class_idx;
  
  	get_zspage_mapping(zspage, &class_idx, &fg);
  
  	assert_spin_locked(&class->lock);
61989a80f   Nitin Gupta   staging: zsmalloc...
884

3783689a1   Minchan Kim   zsmalloc: introdu...
885
  	VM_BUG_ON(get_zspage_inuse(zspage));
48b4800a1   Minchan Kim   zsmalloc: page mi...
886
  	VM_BUG_ON(fg != ZS_EMPTY);
61989a80f   Nitin Gupta   staging: zsmalloc...
887

48b4800a1   Minchan Kim   zsmalloc: page mi...
888
  	next = page = get_first_page(zspage);
3783689a1   Minchan Kim   zsmalloc: introdu...
889
  	do {
48b4800a1   Minchan Kim   zsmalloc: page mi...
890
891
  		VM_BUG_ON_PAGE(!PageLocked(page), page);
  		next = get_next_page(page);
3783689a1   Minchan Kim   zsmalloc: introdu...
892
  		reset_page(page);
48b4800a1   Minchan Kim   zsmalloc: page mi...
893
  		unlock_page(page);
91537fee0   Minchan Kim   mm: add NR_ZSMALL...
894
  		dec_zone_page_state(page, NR_ZSPAGES);
3783689a1   Minchan Kim   zsmalloc: introdu...
895
896
897
  		put_page(page);
  		page = next;
  	} while (page != NULL);
61989a80f   Nitin Gupta   staging: zsmalloc...
898

3783689a1   Minchan Kim   zsmalloc: introdu...
899
  	cache_free_zspage(pool, zspage);
48b4800a1   Minchan Kim   zsmalloc: page mi...
900

b4fd07a08   Ganesh Mahendran   mm/zsmalloc: use ...
901
  	zs_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage);
48b4800a1   Minchan Kim   zsmalloc: page mi...
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
  	atomic_long_sub(class->pages_per_zspage,
  					&pool->pages_allocated);
  }
  
  static void free_zspage(struct zs_pool *pool, struct size_class *class,
  				struct zspage *zspage)
  {
  	VM_BUG_ON(get_zspage_inuse(zspage));
  	VM_BUG_ON(list_empty(&zspage->list));
  
  	if (!trylock_zspage(zspage)) {
  		kick_deferred_free(pool);
  		return;
  	}
  
  	remove_zspage(class, zspage, ZS_EMPTY);
  	__free_zspage(pool, class, zspage);
61989a80f   Nitin Gupta   staging: zsmalloc...
919
920
921
  }
  
  /* Initialize a newly allocated zspage */
3783689a1   Minchan Kim   zsmalloc: introdu...
922
  static void init_zspage(struct size_class *class, struct zspage *zspage)
61989a80f   Nitin Gupta   staging: zsmalloc...
923
  {
bfd093f5e   Minchan Kim   zsmalloc: use fre...
924
  	unsigned int freeobj = 1;
61989a80f   Nitin Gupta   staging: zsmalloc...
925
  	unsigned long off = 0;
48b4800a1   Minchan Kim   zsmalloc: page mi...
926
  	struct page *page = get_first_page(zspage);
830e4bc5b   Minchan Kim   zsmalloc: clean u...
927

61989a80f   Nitin Gupta   staging: zsmalloc...
928
929
930
  	while (page) {
  		struct page *next_page;
  		struct link_free *link;
af4ee5e97   Minchan Kim   zsmalloc: correct...
931
  		void *vaddr;
61989a80f   Nitin Gupta   staging: zsmalloc...
932

3783689a1   Minchan Kim   zsmalloc: introdu...
933
  		set_first_obj_offset(page, off);
61989a80f   Nitin Gupta   staging: zsmalloc...
934

af4ee5e97   Minchan Kim   zsmalloc: correct...
935
936
  		vaddr = kmap_atomic(page);
  		link = (struct link_free *)vaddr + off / sizeof(*link);
5538c5623   Dan Streetman   zsmalloc: simplif...
937
938
  
  		while ((off += class->size) < PAGE_SIZE) {
3b1d9ca65   Minchan Kim   zsmalloc: use OBJ...
939
  			link->next = freeobj++ << OBJ_TAG_BITS;
5538c5623   Dan Streetman   zsmalloc: simplif...
940
  			link += class->size / sizeof(*link);
61989a80f   Nitin Gupta   staging: zsmalloc...
941
942
943
944
945
946
947
948
  		}
  
  		/*
  		 * We now come to the last (full or partial) object on this
  		 * page, which must point to the first object on the next
  		 * page (if present)
  		 */
  		next_page = get_next_page(page);
bfd093f5e   Minchan Kim   zsmalloc: use fre...
949
  		if (next_page) {
3b1d9ca65   Minchan Kim   zsmalloc: use OBJ...
950
  			link->next = freeobj++ << OBJ_TAG_BITS;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
951
952
  		} else {
  			/*
3b1d9ca65   Minchan Kim   zsmalloc: use OBJ...
953
  			 * Reset OBJ_TAG_BITS bit to last link to tell
bfd093f5e   Minchan Kim   zsmalloc: use fre...
954
955
  			 * whether it's allocated object or not.
  			 */
01a6ad9ac   Nick Desaulniers   zsmalloc: use U s...
956
  			link->next = -1UL << OBJ_TAG_BITS;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
957
  		}
af4ee5e97   Minchan Kim   zsmalloc: correct...
958
  		kunmap_atomic(vaddr);
61989a80f   Nitin Gupta   staging: zsmalloc...
959
  		page = next_page;
5538c5623   Dan Streetman   zsmalloc: simplif...
960
  		off %= PAGE_SIZE;
61989a80f   Nitin Gupta   staging: zsmalloc...
961
  	}
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
962

bfd093f5e   Minchan Kim   zsmalloc: use fre...
963
  	set_freeobj(zspage, 0);
61989a80f   Nitin Gupta   staging: zsmalloc...
964
  }
48b4800a1   Minchan Kim   zsmalloc: page mi...
965
966
  static void create_page_chain(struct size_class *class, struct zspage *zspage,
  				struct page *pages[])
61989a80f   Nitin Gupta   staging: zsmalloc...
967
  {
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
968
969
970
  	int i;
  	struct page *page;
  	struct page *prev_page = NULL;
48b4800a1   Minchan Kim   zsmalloc: page mi...
971
  	int nr_pages = class->pages_per_zspage;
61989a80f   Nitin Gupta   staging: zsmalloc...
972
973
974
  
  	/*
  	 * Allocate individual pages and link them together as:
48b4800a1   Minchan Kim   zsmalloc: page mi...
975
  	 * 1. all pages are linked together using page->freelist
3783689a1   Minchan Kim   zsmalloc: introdu...
976
  	 * 2. each sub-page point to zspage using page->private
61989a80f   Nitin Gupta   staging: zsmalloc...
977
  	 *
3783689a1   Minchan Kim   zsmalloc: introdu...
978
  	 * we set PG_private to identify the first page (i.e. no other sub-page
22c5cef16   Yisheng Xie   mm/zsmalloc: remo...
979
  	 * has this flag set).
61989a80f   Nitin Gupta   staging: zsmalloc...
980
  	 */
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
981
982
  	for (i = 0; i < nr_pages; i++) {
  		page = pages[i];
3783689a1   Minchan Kim   zsmalloc: introdu...
983
  		set_page_private(page, (unsigned long)zspage);
48b4800a1   Minchan Kim   zsmalloc: page mi...
984
  		page->freelist = NULL;
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
985
  		if (i == 0) {
3783689a1   Minchan Kim   zsmalloc: introdu...
986
  			zspage->first_page = page;
a27545bf0   Minchan Kim   zsmalloc: use Pag...
987
  			SetPagePrivate(page);
48b4800a1   Minchan Kim   zsmalloc: page mi...
988
989
990
  			if (unlikely(class->objs_per_zspage == 1 &&
  					class->pages_per_zspage == 1))
  				SetPageHugeObject(page);
3783689a1   Minchan Kim   zsmalloc: introdu...
991
  		} else {
48b4800a1   Minchan Kim   zsmalloc: page mi...
992
  			prev_page->freelist = page;
61989a80f   Nitin Gupta   staging: zsmalloc...
993
  		}
61989a80f   Nitin Gupta   staging: zsmalloc...
994
995
  		prev_page = page;
  	}
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
996
  }
61989a80f   Nitin Gupta   staging: zsmalloc...
997

bdb0af7ca   Minchan Kim   zsmalloc: factor ...
998
999
1000
  /*
   * Allocate a zspage for the given size class
   */
3783689a1   Minchan Kim   zsmalloc: introdu...
1001
1002
1003
  static struct zspage *alloc_zspage(struct zs_pool *pool,
  					struct size_class *class,
  					gfp_t gfp)
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
1004
1005
  {
  	int i;
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
1006
  	struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE];
3783689a1   Minchan Kim   zsmalloc: introdu...
1007
1008
1009
1010
1011
1012
  	struct zspage *zspage = cache_alloc_zspage(pool, gfp);
  
  	if (!zspage)
  		return NULL;
  
  	memset(zspage, 0, sizeof(struct zspage));
48b4800a1   Minchan Kim   zsmalloc: page mi...
1013
1014
  	zspage->magic = ZSPAGE_MAGIC;
  	migrate_lock_init(zspage);
61989a80f   Nitin Gupta   staging: zsmalloc...
1015

bdb0af7ca   Minchan Kim   zsmalloc: factor ...
1016
1017
  	for (i = 0; i < class->pages_per_zspage; i++) {
  		struct page *page;
61989a80f   Nitin Gupta   staging: zsmalloc...
1018

3783689a1   Minchan Kim   zsmalloc: introdu...
1019
  		page = alloc_page(gfp);
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
1020
  		if (!page) {
91537fee0   Minchan Kim   mm: add NR_ZSMALL...
1021
1022
  			while (--i >= 0) {
  				dec_zone_page_state(pages[i], NR_ZSPAGES);
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
1023
  				__free_page(pages[i]);
91537fee0   Minchan Kim   mm: add NR_ZSMALL...
1024
  			}
3783689a1   Minchan Kim   zsmalloc: introdu...
1025
  			cache_free_zspage(pool, zspage);
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
1026
1027
  			return NULL;
  		}
91537fee0   Minchan Kim   mm: add NR_ZSMALL...
1028
1029
  
  		inc_zone_page_state(page, NR_ZSPAGES);
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
1030
  		pages[i] = page;
61989a80f   Nitin Gupta   staging: zsmalloc...
1031
  	}
48b4800a1   Minchan Kim   zsmalloc: page mi...
1032
  	create_page_chain(class, zspage, pages);
3783689a1   Minchan Kim   zsmalloc: introdu...
1033
  	init_zspage(class, zspage);
bdb0af7ca   Minchan Kim   zsmalloc: factor ...
1034

3783689a1   Minchan Kim   zsmalloc: introdu...
1035
  	return zspage;
61989a80f   Nitin Gupta   staging: zsmalloc...
1036
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
1037
  static struct zspage *find_get_zspage(struct size_class *class)
61989a80f   Nitin Gupta   staging: zsmalloc...
1038
1039
  {
  	int i;
3783689a1   Minchan Kim   zsmalloc: introdu...
1040
  	struct zspage *zspage;
61989a80f   Nitin Gupta   staging: zsmalloc...
1041

48b4800a1   Minchan Kim   zsmalloc: page mi...
1042
  	for (i = ZS_ALMOST_FULL; i >= ZS_EMPTY; i--) {
3783689a1   Minchan Kim   zsmalloc: introdu...
1043
1044
1045
  		zspage = list_first_entry_or_null(&class->fullness_list[i],
  				struct zspage, list);
  		if (zspage)
61989a80f   Nitin Gupta   staging: zsmalloc...
1046
1047
  			break;
  	}
3783689a1   Minchan Kim   zsmalloc: introdu...
1048
  	return zspage;
61989a80f   Nitin Gupta   staging: zsmalloc...
1049
  }
1b945aeef   Minchan Kim   zsmalloc: add Kco...
1050
  #ifdef CONFIG_PGTABLE_MAPPING
f553646a6   Seth Jennings   staging: zsmalloc...
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
  static inline int __zs_cpu_up(struct mapping_area *area)
  {
  	/*
  	 * Make sure we don't leak memory if a cpu UP notification
  	 * and zs_init() race and both call zs_cpu_up() on the same cpu
  	 */
  	if (area->vm)
  		return 0;
  	area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
  	if (!area->vm)
  		return -ENOMEM;
  	return 0;
  }
  
  static inline void __zs_cpu_down(struct mapping_area *area)
  {
  	if (area->vm)
  		free_vm_area(area->vm);
  	area->vm = NULL;
  }
  
  static inline void *__zs_map_object(struct mapping_area *area,
  				struct page *pages[2], int off, int size)
  {
f6f8ed473   WANG Chao   mm/vmalloc.c: cle...
1075
  	BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
f553646a6   Seth Jennings   staging: zsmalloc...
1076
1077
1078
1079
1080
1081
1082
1083
  	area->vm_addr = area->vm->addr;
  	return area->vm_addr + off;
  }
  
  static inline void __zs_unmap_object(struct mapping_area *area,
  				struct page *pages[2], int off, int size)
  {
  	unsigned long addr = (unsigned long)area->vm_addr;
f553646a6   Seth Jennings   staging: zsmalloc...
1084

d95abbbb2   Joerg Roedel   staging: zsmalloc...
1085
  	unmap_kernel_range(addr, PAGE_SIZE * 2);
f553646a6   Seth Jennings   staging: zsmalloc...
1086
  }
1b945aeef   Minchan Kim   zsmalloc: add Kco...
1087
  #else /* CONFIG_PGTABLE_MAPPING */
f553646a6   Seth Jennings   staging: zsmalloc...
1088
1089
1090
1091
1092
1093
1094
1095
1096
  
  static inline int __zs_cpu_up(struct mapping_area *area)
  {
  	/*
  	 * Make sure we don't leak memory if a cpu UP notification
  	 * and zs_init() race and both call zs_cpu_up() on the same cpu
  	 */
  	if (area->vm_buf)
  		return 0;
40f9fb8cf   Mahendran Ganesh   mm/zsmalloc: supp...
1097
  	area->vm_buf = kmalloc(ZS_MAX_ALLOC_SIZE, GFP_KERNEL);
f553646a6   Seth Jennings   staging: zsmalloc...
1098
1099
1100
1101
1102
1103
1104
  	if (!area->vm_buf)
  		return -ENOMEM;
  	return 0;
  }
  
  static inline void __zs_cpu_down(struct mapping_area *area)
  {
40f9fb8cf   Mahendran Ganesh   mm/zsmalloc: supp...
1105
  	kfree(area->vm_buf);
f553646a6   Seth Jennings   staging: zsmalloc...
1106
1107
1108
1109
1110
  	area->vm_buf = NULL;
  }
  
  static void *__zs_map_object(struct mapping_area *area,
  			struct page *pages[2], int off, int size)
5f601902c   Seth Jennings   staging: zsmalloc...
1111
  {
5f601902c   Seth Jennings   staging: zsmalloc...
1112
1113
  	int sizes[2];
  	void *addr;
f553646a6   Seth Jennings   staging: zsmalloc...
1114
  	char *buf = area->vm_buf;
5f601902c   Seth Jennings   staging: zsmalloc...
1115

f553646a6   Seth Jennings   staging: zsmalloc...
1116
1117
1118
1119
1120
1121
  	/* disable page faults to match kmap_atomic() return conditions */
  	pagefault_disable();
  
  	/* no read fastpath */
  	if (area->vm_mm == ZS_MM_WO)
  		goto out;
5f601902c   Seth Jennings   staging: zsmalloc...
1122
1123
1124
  
  	sizes[0] = PAGE_SIZE - off;
  	sizes[1] = size - sizes[0];
5f601902c   Seth Jennings   staging: zsmalloc...
1125
1126
1127
1128
1129
1130
1131
  	/* copy object to per-cpu buffer */
  	addr = kmap_atomic(pages[0]);
  	memcpy(buf, addr + off, sizes[0]);
  	kunmap_atomic(addr);
  	addr = kmap_atomic(pages[1]);
  	memcpy(buf + sizes[0], addr, sizes[1]);
  	kunmap_atomic(addr);
f553646a6   Seth Jennings   staging: zsmalloc...
1132
1133
  out:
  	return area->vm_buf;
5f601902c   Seth Jennings   staging: zsmalloc...
1134
  }
f553646a6   Seth Jennings   staging: zsmalloc...
1135
1136
  static void __zs_unmap_object(struct mapping_area *area,
  			struct page *pages[2], int off, int size)
5f601902c   Seth Jennings   staging: zsmalloc...
1137
  {
5f601902c   Seth Jennings   staging: zsmalloc...
1138
1139
  	int sizes[2];
  	void *addr;
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1140
  	char *buf;
5f601902c   Seth Jennings   staging: zsmalloc...
1141

f553646a6   Seth Jennings   staging: zsmalloc...
1142
1143
1144
  	/* no write fastpath */
  	if (area->vm_mm == ZS_MM_RO)
  		goto out;
5f601902c   Seth Jennings   staging: zsmalloc...
1145

7b60a6852   Minchan Kim   zsmalloc: record ...
1146
  	buf = area->vm_buf;
a82cbf071   YiPing Xu   zsmalloc: drop un...
1147
1148
1149
  	buf = buf + ZS_HANDLE_SIZE;
  	size -= ZS_HANDLE_SIZE;
  	off += ZS_HANDLE_SIZE;
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1150

5f601902c   Seth Jennings   staging: zsmalloc...
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
  	sizes[0] = PAGE_SIZE - off;
  	sizes[1] = size - sizes[0];
  
  	/* copy per-cpu buffer to object */
  	addr = kmap_atomic(pages[0]);
  	memcpy(addr + off, buf, sizes[0]);
  	kunmap_atomic(addr);
  	addr = kmap_atomic(pages[1]);
  	memcpy(addr, buf + sizes[0], sizes[1]);
  	kunmap_atomic(addr);
f553646a6   Seth Jennings   staging: zsmalloc...
1161
1162
1163
1164
  
  out:
  	/* enable page faults to match kunmap_atomic() return conditions */
  	pagefault_enable();
5f601902c   Seth Jennings   staging: zsmalloc...
1165
  }
61989a80f   Nitin Gupta   staging: zsmalloc...
1166

1b945aeef   Minchan Kim   zsmalloc: add Kco...
1167
  #endif /* CONFIG_PGTABLE_MAPPING */
f553646a6   Seth Jennings   staging: zsmalloc...
1168

215c89d05   Sebastian Andrzej Siewior   mm/zsmalloc: Conv...
1169
  static int zs_cpu_prepare(unsigned int cpu)
61989a80f   Nitin Gupta   staging: zsmalloc...
1170
  {
61989a80f   Nitin Gupta   staging: zsmalloc...
1171
  	struct mapping_area *area;
215c89d05   Sebastian Andrzej Siewior   mm/zsmalloc: Conv...
1172
1173
  	area = &per_cpu(zs_map_area, cpu);
  	return __zs_cpu_up(area);
61989a80f   Nitin Gupta   staging: zsmalloc...
1174
  }
215c89d05   Sebastian Andrzej Siewior   mm/zsmalloc: Conv...
1175
  static int zs_cpu_dead(unsigned int cpu)
61989a80f   Nitin Gupta   staging: zsmalloc...
1176
  {
215c89d05   Sebastian Andrzej Siewior   mm/zsmalloc: Conv...
1177
  	struct mapping_area *area;
40f9fb8cf   Mahendran Ganesh   mm/zsmalloc: supp...
1178

215c89d05   Sebastian Andrzej Siewior   mm/zsmalloc: Conv...
1179
1180
1181
  	area = &per_cpu(zs_map_area, cpu);
  	__zs_cpu_down(area);
  	return 0;
b1b00a5b8   Sergey Senozhatsky   zsmalloc: fix zs_...
1182
  }
64d90465f   Ganesh Mahendran   mm/zsmalloc: avoi...
1183
1184
  static bool can_merge(struct size_class *prev, int pages_per_zspage,
  					int objs_per_zspage)
9eec4cd53   Joonsoo Kim   zsmalloc: merge s...
1185
  {
64d90465f   Ganesh Mahendran   mm/zsmalloc: avoi...
1186
1187
1188
  	if (prev->pages_per_zspage == pages_per_zspage &&
  		prev->objs_per_zspage == objs_per_zspage)
  		return true;
9eec4cd53   Joonsoo Kim   zsmalloc: merge s...
1189

64d90465f   Ganesh Mahendran   mm/zsmalloc: avoi...
1190
  	return false;
9eec4cd53   Joonsoo Kim   zsmalloc: merge s...
1191
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
1192
  static bool zspage_full(struct size_class *class, struct zspage *zspage)
312fcae22   Minchan Kim   zsmalloc: support...
1193
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
1194
  	return get_zspage_inuse(zspage) == class->objs_per_zspage;
312fcae22   Minchan Kim   zsmalloc: support...
1195
  }
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1196
1197
1198
1199
1200
  unsigned long zs_get_total_pages(struct zs_pool *pool)
  {
  	return atomic_long_read(&pool->pages_allocated);
  }
  EXPORT_SYMBOL_GPL(zs_get_total_pages);
4bbc0bc06   Davidlohr Bueso   staging: zsmalloc...
1201
  /**
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1202
1203
1204
   * zs_map_object - get address of allocated object from handle.
   * @pool: pool from which the object was allocated
   * @handle: handle returned from zs_malloc
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
1205
   * @mm: maping mode to use
4bbc0bc06   Davidlohr Bueso   staging: zsmalloc...
1206
   *
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1207
1208
1209
   * Before using an object allocated from zs_malloc, it must be mapped using
   * this function. When done with the object, it must be unmapped using
   * zs_unmap_object.
4bbc0bc06   Davidlohr Bueso   staging: zsmalloc...
1210
   *
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1211
1212
1213
1214
   * Only one object can be mapped per cpu at a time. There is no protection
   * against nested mappings.
   *
   * This function returns with preemption and page faults disabled.
4bbc0bc06   Davidlohr Bueso   staging: zsmalloc...
1215
   */
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1216
1217
  void *zs_map_object(struct zs_pool *pool, unsigned long handle,
  			enum zs_mapmode mm)
61989a80f   Nitin Gupta   staging: zsmalloc...
1218
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
1219
  	struct zspage *zspage;
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1220
  	struct page *page;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1221
1222
  	unsigned long obj, off;
  	unsigned int obj_idx;
61989a80f   Nitin Gupta   staging: zsmalloc...
1223

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1224
1225
1226
1227
1228
  	unsigned int class_idx;
  	enum fullness_group fg;
  	struct size_class *class;
  	struct mapping_area *area;
  	struct page *pages[2];
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1229
  	void *ret;
61989a80f   Nitin Gupta   staging: zsmalloc...
1230

9eec4cd53   Joonsoo Kim   zsmalloc: merge s...
1231
  	/*
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1232
1233
1234
  	 * Because we use per-cpu mapping areas shared among the
  	 * pools/users, we can't allow mapping in interrupt context
  	 * because it can corrupt another users mappings.
9eec4cd53   Joonsoo Kim   zsmalloc: merge s...
1235
  	 */
1aedcafbf   Sergey Senozhatsky   zsmalloc: calling...
1236
  	BUG_ON(in_interrupt());
61989a80f   Nitin Gupta   staging: zsmalloc...
1237

312fcae22   Minchan Kim   zsmalloc: support...
1238
1239
  	/* From now on, migration cannot move the object */
  	pin_tag(handle);
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1240
1241
  	obj = handle_to_obj(handle);
  	obj_to_location(obj, &page, &obj_idx);
3783689a1   Minchan Kim   zsmalloc: introdu...
1242
  	zspage = get_zspage(page);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1243
1244
1245
  
  	/* migration cannot move any subpage in this zspage */
  	migrate_read_lock(zspage);
3783689a1   Minchan Kim   zsmalloc: introdu...
1246
  	get_zspage_mapping(zspage, &class_idx, &fg);
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1247
  	class = pool->size_class[class_idx];
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1248
  	off = (class->size * obj_idx) & ~PAGE_MASK;
df8b5bb99   Ganesh Mahendran   mm/zsmalloc: avoi...
1249

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1250
1251
1252
1253
1254
  	area = &get_cpu_var(zs_map_area);
  	area->vm_mm = mm;
  	if (off + class->size <= PAGE_SIZE) {
  		/* this object is contained entirely within a page */
  		area->vm_addr = kmap_atomic(page);
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1255
1256
  		ret = area->vm_addr + off;
  		goto out;
61989a80f   Nitin Gupta   staging: zsmalloc...
1257
  	}
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1258
1259
1260
1261
  	/* this object spans two pages */
  	pages[0] = page;
  	pages[1] = get_next_page(page);
  	BUG_ON(!pages[1]);
9eec4cd53   Joonsoo Kim   zsmalloc: merge s...
1262

2e40e163a   Minchan Kim   zsmalloc: decoupl...
1263
1264
  	ret = __zs_map_object(area, pages, off, class->size);
  out:
48b4800a1   Minchan Kim   zsmalloc: page mi...
1265
  	if (likely(!PageHugeObject(page)))
7b60a6852   Minchan Kim   zsmalloc: record ...
1266
1267
1268
  		ret += ZS_HANDLE_SIZE;
  
  	return ret;
61989a80f   Nitin Gupta   staging: zsmalloc...
1269
  }
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1270
  EXPORT_SYMBOL_GPL(zs_map_object);
61989a80f   Nitin Gupta   staging: zsmalloc...
1271

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1272
  void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
61989a80f   Nitin Gupta   staging: zsmalloc...
1273
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
1274
  	struct zspage *zspage;
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1275
  	struct page *page;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1276
1277
  	unsigned long obj, off;
  	unsigned int obj_idx;
61989a80f   Nitin Gupta   staging: zsmalloc...
1278

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1279
1280
1281
1282
  	unsigned int class_idx;
  	enum fullness_group fg;
  	struct size_class *class;
  	struct mapping_area *area;
9eec4cd53   Joonsoo Kim   zsmalloc: merge s...
1283

2e40e163a   Minchan Kim   zsmalloc: decoupl...
1284
1285
  	obj = handle_to_obj(handle);
  	obj_to_location(obj, &page, &obj_idx);
3783689a1   Minchan Kim   zsmalloc: introdu...
1286
1287
  	zspage = get_zspage(page);
  	get_zspage_mapping(zspage, &class_idx, &fg);
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1288
  	class = pool->size_class[class_idx];
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1289
  	off = (class->size * obj_idx) & ~PAGE_MASK;
61989a80f   Nitin Gupta   staging: zsmalloc...
1290

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1291
1292
1293
1294
1295
  	area = this_cpu_ptr(&zs_map_area);
  	if (off + class->size <= PAGE_SIZE)
  		kunmap_atomic(area->vm_addr);
  	else {
  		struct page *pages[2];
40f9fb8cf   Mahendran Ganesh   mm/zsmalloc: supp...
1296

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1297
1298
1299
1300
1301
1302
1303
  		pages[0] = page;
  		pages[1] = get_next_page(page);
  		BUG_ON(!pages[1]);
  
  		__zs_unmap_object(area, pages, off, class->size);
  	}
  	put_cpu_var(zs_map_area);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1304
1305
  
  	migrate_read_unlock(zspage);
312fcae22   Minchan Kim   zsmalloc: support...
1306
  	unpin_tag(handle);
61989a80f   Nitin Gupta   staging: zsmalloc...
1307
  }
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
1308
  EXPORT_SYMBOL_GPL(zs_unmap_object);
61989a80f   Nitin Gupta   staging: zsmalloc...
1309

010b495e2   Sergey Senozhatsky   zsmalloc: introdu...
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
  /**
   * zs_huge_class_size() - Returns the size (in bytes) of the first huge
   *                        zsmalloc &size_class.
   * @pool: zsmalloc pool to use
   *
   * The function returns the size of the first huge class - any object of equal
   * or bigger size will be stored in zspage consisting of a single physical
   * page.
   *
   * Context: Any context.
   *
   * Return: the size (in bytes) of the first huge zsmalloc &size_class.
   */
  size_t zs_huge_class_size(struct zs_pool *pool)
  {
  	return huge_class_size;
  }
  EXPORT_SYMBOL_GPL(zs_huge_class_size);
251cbb951   Minchan Kim   zsmalloc: reorder...
1328
  static unsigned long obj_malloc(struct size_class *class,
3783689a1   Minchan Kim   zsmalloc: introdu...
1329
  				struct zspage *zspage, unsigned long handle)
c78062612   Minchan Kim   zsmalloc: factor ...
1330
  {
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1331
  	int i, nr_page, offset;
c78062612   Minchan Kim   zsmalloc: factor ...
1332
1333
1334
1335
  	unsigned long obj;
  	struct link_free *link;
  
  	struct page *m_page;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1336
  	unsigned long m_offset;
c78062612   Minchan Kim   zsmalloc: factor ...
1337
  	void *vaddr;
312fcae22   Minchan Kim   zsmalloc: support...
1338
  	handle |= OBJ_ALLOCATED_TAG;
3783689a1   Minchan Kim   zsmalloc: introdu...
1339
  	obj = get_freeobj(zspage);
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1340
1341
1342
1343
1344
1345
1346
1347
  
  	offset = obj * class->size;
  	nr_page = offset >> PAGE_SHIFT;
  	m_offset = offset & ~PAGE_MASK;
  	m_page = get_first_page(zspage);
  
  	for (i = 0; i < nr_page; i++)
  		m_page = get_next_page(m_page);
c78062612   Minchan Kim   zsmalloc: factor ...
1348
1349
1350
  
  	vaddr = kmap_atomic(m_page);
  	link = (struct link_free *)vaddr + m_offset / sizeof(*link);
3b1d9ca65   Minchan Kim   zsmalloc: use OBJ...
1351
  	set_freeobj(zspage, link->next >> OBJ_TAG_BITS);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1352
  	if (likely(!PageHugeObject(m_page)))
7b60a6852   Minchan Kim   zsmalloc: record ...
1353
1354
1355
  		/* record handle in the header of allocated chunk */
  		link->handle = handle;
  	else
3783689a1   Minchan Kim   zsmalloc: introdu...
1356
1357
  		/* record handle to page->index */
  		zspage->first_page->index = handle;
c78062612   Minchan Kim   zsmalloc: factor ...
1358
  	kunmap_atomic(vaddr);
3783689a1   Minchan Kim   zsmalloc: introdu...
1359
  	mod_zspage_inuse(zspage, 1);
c78062612   Minchan Kim   zsmalloc: factor ...
1360
  	zs_stat_inc(class, OBJ_USED, 1);
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1361
  	obj = location_to_obj(m_page, obj);
c78062612   Minchan Kim   zsmalloc: factor ...
1362
1363
  	return obj;
  }
61989a80f   Nitin Gupta   staging: zsmalloc...
1364
1365
1366
1367
  /**
   * zs_malloc - Allocate block of given size from pool.
   * @pool: pool to allocate from
   * @size: size of block to allocate
fd8544639   Ganesh Mahendran   mm/zsmalloc: keep...
1368
   * @gfp: gfp flags when allocating object
61989a80f   Nitin Gupta   staging: zsmalloc...
1369
   *
00a61d861   Minchan Kim   staging: zsmalloc...
1370
   * On success, handle to the allocated object is returned,
c23443483   Minchan Kim   staging: zsmalloc...
1371
   * otherwise 0.
61989a80f   Nitin Gupta   staging: zsmalloc...
1372
1373
   * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
   */
d0d8da2dc   Sergey Senozhatsky   zsmalloc: require...
1374
  unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
61989a80f   Nitin Gupta   staging: zsmalloc...
1375
  {
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1376
  	unsigned long handle, obj;
61989a80f   Nitin Gupta   staging: zsmalloc...
1377
  	struct size_class *class;
48b4800a1   Minchan Kim   zsmalloc: page mi...
1378
  	enum fullness_group newfg;
3783689a1   Minchan Kim   zsmalloc: introdu...
1379
  	struct zspage *zspage;
61989a80f   Nitin Gupta   staging: zsmalloc...
1380

7b60a6852   Minchan Kim   zsmalloc: record ...
1381
  	if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1382
  		return 0;
3783689a1   Minchan Kim   zsmalloc: introdu...
1383
  	handle = cache_alloc_handle(pool, gfp);
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1384
  	if (!handle)
c23443483   Minchan Kim   staging: zsmalloc...
1385
  		return 0;
61989a80f   Nitin Gupta   staging: zsmalloc...
1386

2e40e163a   Minchan Kim   zsmalloc: decoupl...
1387
1388
  	/* extra space in chunk to keep the handle */
  	size += ZS_HANDLE_SIZE;
9eec4cd53   Joonsoo Kim   zsmalloc: merge s...
1389
  	class = pool->size_class[get_size_class_index(size)];
61989a80f   Nitin Gupta   staging: zsmalloc...
1390
1391
  
  	spin_lock(&class->lock);
3783689a1   Minchan Kim   zsmalloc: introdu...
1392
  	zspage = find_get_zspage(class);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1393
1394
1395
1396
1397
  	if (likely(zspage)) {
  		obj = obj_malloc(class, zspage, handle);
  		/* Now move the zspage to another fullness group, if required */
  		fix_fullness_group(class, zspage);
  		record_obj(handle, obj);
61989a80f   Nitin Gupta   staging: zsmalloc...
1398
  		spin_unlock(&class->lock);
61989a80f   Nitin Gupta   staging: zsmalloc...
1399

48b4800a1   Minchan Kim   zsmalloc: page mi...
1400
1401
  		return handle;
  	}
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
1402

48b4800a1   Minchan Kim   zsmalloc: page mi...
1403
1404
1405
1406
1407
1408
  	spin_unlock(&class->lock);
  
  	zspage = alloc_zspage(pool, class, gfp);
  	if (!zspage) {
  		cache_free_handle(pool, handle);
  		return 0;
61989a80f   Nitin Gupta   staging: zsmalloc...
1409
  	}
48b4800a1   Minchan Kim   zsmalloc: page mi...
1410
  	spin_lock(&class->lock);
3783689a1   Minchan Kim   zsmalloc: introdu...
1411
  	obj = obj_malloc(class, zspage, handle);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1412
1413
1414
  	newfg = get_fullness_group(class, zspage);
  	insert_zspage(class, zspage, newfg);
  	set_zspage_mapping(zspage, class->index, newfg);
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1415
  	record_obj(handle, obj);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1416
1417
  	atomic_long_add(class->pages_per_zspage,
  				&pool->pages_allocated);
b4fd07a08   Ganesh Mahendran   mm/zsmalloc: use ...
1418
  	zs_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1419
1420
1421
  
  	/* We completely set up zspage so mark them as movable */
  	SetZsPageMovable(pool, zspage);
61989a80f   Nitin Gupta   staging: zsmalloc...
1422
  	spin_unlock(&class->lock);
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1423
  	return handle;
61989a80f   Nitin Gupta   staging: zsmalloc...
1424
1425
  }
  EXPORT_SYMBOL_GPL(zs_malloc);
1ee471658   Minchan Kim   zsmalloc: remove ...
1426
  static void obj_free(struct size_class *class, unsigned long obj)
61989a80f   Nitin Gupta   staging: zsmalloc...
1427
1428
  {
  	struct link_free *link;
3783689a1   Minchan Kim   zsmalloc: introdu...
1429
1430
  	struct zspage *zspage;
  	struct page *f_page;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1431
1432
  	unsigned long f_offset;
  	unsigned int f_objidx;
af4ee5e97   Minchan Kim   zsmalloc: correct...
1433
  	void *vaddr;
61989a80f   Nitin Gupta   staging: zsmalloc...
1434

312fcae22   Minchan Kim   zsmalloc: support...
1435
  	obj &= ~OBJ_ALLOCATED_TAG;
2e40e163a   Minchan Kim   zsmalloc: decoupl...
1436
  	obj_to_location(obj, &f_page, &f_objidx);
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1437
  	f_offset = (class->size * f_objidx) & ~PAGE_MASK;
3783689a1   Minchan Kim   zsmalloc: introdu...
1438
  	zspage = get_zspage(f_page);
61989a80f   Nitin Gupta   staging: zsmalloc...
1439

c78062612   Minchan Kim   zsmalloc: factor ...
1440
  	vaddr = kmap_atomic(f_page);
61989a80f   Nitin Gupta   staging: zsmalloc...
1441
1442
  
  	/* Insert this object in containing zspage's freelist */
af4ee5e97   Minchan Kim   zsmalloc: correct...
1443
  	link = (struct link_free *)(vaddr + f_offset);
3b1d9ca65   Minchan Kim   zsmalloc: use OBJ...
1444
  	link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
af4ee5e97   Minchan Kim   zsmalloc: correct...
1445
  	kunmap_atomic(vaddr);
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1446
  	set_freeobj(zspage, f_objidx);
3783689a1   Minchan Kim   zsmalloc: introdu...
1447
  	mod_zspage_inuse(zspage, -1);
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
1448
  	zs_stat_dec(class, OBJ_USED, 1);
c78062612   Minchan Kim   zsmalloc: factor ...
1449
1450
1451
1452
  }
  
  void zs_free(struct zs_pool *pool, unsigned long handle)
  {
3783689a1   Minchan Kim   zsmalloc: introdu...
1453
1454
  	struct zspage *zspage;
  	struct page *f_page;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1455
1456
  	unsigned long obj;
  	unsigned int f_objidx;
c78062612   Minchan Kim   zsmalloc: factor ...
1457
1458
1459
  	int class_idx;
  	struct size_class *class;
  	enum fullness_group fullness;
48b4800a1   Minchan Kim   zsmalloc: page mi...
1460
  	bool isolated;
c78062612   Minchan Kim   zsmalloc: factor ...
1461
1462
1463
  
  	if (unlikely(!handle))
  		return;
312fcae22   Minchan Kim   zsmalloc: support...
1464
  	pin_tag(handle);
c78062612   Minchan Kim   zsmalloc: factor ...
1465
  	obj = handle_to_obj(handle);
c78062612   Minchan Kim   zsmalloc: factor ...
1466
  	obj_to_location(obj, &f_page, &f_objidx);
3783689a1   Minchan Kim   zsmalloc: introdu...
1467
  	zspage = get_zspage(f_page);
c78062612   Minchan Kim   zsmalloc: factor ...
1468

48b4800a1   Minchan Kim   zsmalloc: page mi...
1469
  	migrate_read_lock(zspage);
3783689a1   Minchan Kim   zsmalloc: introdu...
1470
  	get_zspage_mapping(zspage, &class_idx, &fullness);
c78062612   Minchan Kim   zsmalloc: factor ...
1471
1472
1473
  	class = pool->size_class[class_idx];
  
  	spin_lock(&class->lock);
1ee471658   Minchan Kim   zsmalloc: remove ...
1474
  	obj_free(class, obj);
3783689a1   Minchan Kim   zsmalloc: introdu...
1475
  	fullness = fix_fullness_group(class, zspage);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1476
1477
1478
  	if (fullness != ZS_EMPTY) {
  		migrate_read_unlock(zspage);
  		goto out;
312fcae22   Minchan Kim   zsmalloc: support...
1479
  	}
48b4800a1   Minchan Kim   zsmalloc: page mi...
1480
1481
1482
1483
1484
1485
1486
  
  	isolated = is_zspage_isolated(zspage);
  	migrate_read_unlock(zspage);
  	/* If zspage is isolated, zs_page_putback will free the zspage */
  	if (likely(!isolated))
  		free_zspage(pool, class, zspage);
  out:
61989a80f   Nitin Gupta   staging: zsmalloc...
1487
  	spin_unlock(&class->lock);
312fcae22   Minchan Kim   zsmalloc: support...
1488
  	unpin_tag(handle);
3783689a1   Minchan Kim   zsmalloc: introdu...
1489
  	cache_free_handle(pool, handle);
312fcae22   Minchan Kim   zsmalloc: support...
1490
1491
  }
  EXPORT_SYMBOL_GPL(zs_free);
251cbb951   Minchan Kim   zsmalloc: reorder...
1492
1493
  static void zs_object_copy(struct size_class *class, unsigned long dst,
  				unsigned long src)
312fcae22   Minchan Kim   zsmalloc: support...
1494
1495
  {
  	struct page *s_page, *d_page;
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1496
  	unsigned int s_objidx, d_objidx;
312fcae22   Minchan Kim   zsmalloc: support...
1497
1498
1499
1500
1501
1502
1503
1504
1505
  	unsigned long s_off, d_off;
  	void *s_addr, *d_addr;
  	int s_size, d_size, size;
  	int written = 0;
  
  	s_size = d_size = class->size;
  
  	obj_to_location(src, &s_page, &s_objidx);
  	obj_to_location(dst, &d_page, &d_objidx);
bfd093f5e   Minchan Kim   zsmalloc: use fre...
1506
1507
  	s_off = (class->size * s_objidx) & ~PAGE_MASK;
  	d_off = (class->size * d_objidx) & ~PAGE_MASK;
312fcae22   Minchan Kim   zsmalloc: support...
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
  
  	if (s_off + class->size > PAGE_SIZE)
  		s_size = PAGE_SIZE - s_off;
  
  	if (d_off + class->size > PAGE_SIZE)
  		d_size = PAGE_SIZE - d_off;
  
  	s_addr = kmap_atomic(s_page);
  	d_addr = kmap_atomic(d_page);
  
  	while (1) {
  		size = min(s_size, d_size);
  		memcpy(d_addr + d_off, s_addr + s_off, size);
  		written += size;
  
  		if (written == class->size)
  			break;
495819ead   Sergey Senozhatsky   zsmalloc: micro-o...
1525
1526
1527
1528
1529
1530
  		s_off += size;
  		s_size -= size;
  		d_off += size;
  		d_size -= size;
  
  		if (s_off >= PAGE_SIZE) {
312fcae22   Minchan Kim   zsmalloc: support...
1531
1532
1533
  			kunmap_atomic(d_addr);
  			kunmap_atomic(s_addr);
  			s_page = get_next_page(s_page);
312fcae22   Minchan Kim   zsmalloc: support...
1534
1535
1536
1537
  			s_addr = kmap_atomic(s_page);
  			d_addr = kmap_atomic(d_page);
  			s_size = class->size - written;
  			s_off = 0;
312fcae22   Minchan Kim   zsmalloc: support...
1538
  		}
495819ead   Sergey Senozhatsky   zsmalloc: micro-o...
1539
  		if (d_off >= PAGE_SIZE) {
312fcae22   Minchan Kim   zsmalloc: support...
1540
1541
  			kunmap_atomic(d_addr);
  			d_page = get_next_page(d_page);
312fcae22   Minchan Kim   zsmalloc: support...
1542
1543
1544
  			d_addr = kmap_atomic(d_page);
  			d_size = class->size - written;
  			d_off = 0;
312fcae22   Minchan Kim   zsmalloc: support...
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
  		}
  	}
  
  	kunmap_atomic(d_addr);
  	kunmap_atomic(s_addr);
  }
  
  /*
   * Find alloced object in zspage from index object and
   * return handle.
   */
251cbb951   Minchan Kim   zsmalloc: reorder...
1556
  static unsigned long find_alloced_obj(struct size_class *class,
cf675acb7   Ganesh Mahendran   mm/zsmalloc: take...
1557
  					struct page *page, int *obj_idx)
312fcae22   Minchan Kim   zsmalloc: support...
1558
1559
1560
  {
  	unsigned long head;
  	int offset = 0;
cf675acb7   Ganesh Mahendran   mm/zsmalloc: take...
1561
  	int index = *obj_idx;
312fcae22   Minchan Kim   zsmalloc: support...
1562
1563
  	unsigned long handle = 0;
  	void *addr = kmap_atomic(page);
3783689a1   Minchan Kim   zsmalloc: introdu...
1564
  	offset = get_first_obj_offset(page);
312fcae22   Minchan Kim   zsmalloc: support...
1565
1566
1567
  	offset += class->size * index;
  
  	while (offset < PAGE_SIZE) {
48b4800a1   Minchan Kim   zsmalloc: page mi...
1568
  		head = obj_to_head(page, addr + offset);
312fcae22   Minchan Kim   zsmalloc: support...
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
  		if (head & OBJ_ALLOCATED_TAG) {
  			handle = head & ~OBJ_ALLOCATED_TAG;
  			if (trypin_tag(handle))
  				break;
  			handle = 0;
  		}
  
  		offset += class->size;
  		index++;
  	}
  
  	kunmap_atomic(addr);
cf675acb7   Ganesh Mahendran   mm/zsmalloc: take...
1581
1582
  
  	*obj_idx = index;
312fcae22   Minchan Kim   zsmalloc: support...
1583
1584
1585
1586
  	return handle;
  }
  
  struct zs_compact_control {
3783689a1   Minchan Kim   zsmalloc: introdu...
1587
  	/* Source spage for migration which could be a subpage of zspage */
312fcae22   Minchan Kim   zsmalloc: support...
1588
1589
1590
1591
1592
1593
  	struct page *s_page;
  	/* Destination page for migration which should be a first page
  	 * of zspage. */
  	struct page *d_page;
  	 /* Starting object index within @s_page which used for live object
  	  * in the subpage. */
41b88e14c   Ganesh Mahendran   mm/zsmalloc: use ...
1594
  	int obj_idx;
312fcae22   Minchan Kim   zsmalloc: support...
1595
1596
1597
1598
1599
1600
1601
1602
1603
  };
  
  static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
  				struct zs_compact_control *cc)
  {
  	unsigned long used_obj, free_obj;
  	unsigned long handle;
  	struct page *s_page = cc->s_page;
  	struct page *d_page = cc->d_page;
41b88e14c   Ganesh Mahendran   mm/zsmalloc: use ...
1604
  	int obj_idx = cc->obj_idx;
312fcae22   Minchan Kim   zsmalloc: support...
1605
1606
1607
  	int ret = 0;
  
  	while (1) {
cf675acb7   Ganesh Mahendran   mm/zsmalloc: take...
1608
  		handle = find_alloced_obj(class, s_page, &obj_idx);
312fcae22   Minchan Kim   zsmalloc: support...
1609
1610
1611
1612
  		if (!handle) {
  			s_page = get_next_page(s_page);
  			if (!s_page)
  				break;
41b88e14c   Ganesh Mahendran   mm/zsmalloc: use ...
1613
  			obj_idx = 0;
312fcae22   Minchan Kim   zsmalloc: support...
1614
1615
1616
1617
  			continue;
  		}
  
  		/* Stop if there is no more space */
3783689a1   Minchan Kim   zsmalloc: introdu...
1618
  		if (zspage_full(class, get_zspage(d_page))) {
312fcae22   Minchan Kim   zsmalloc: support...
1619
1620
1621
1622
1623
1624
  			unpin_tag(handle);
  			ret = -ENOMEM;
  			break;
  		}
  
  		used_obj = handle_to_obj(handle);
3783689a1   Minchan Kim   zsmalloc: introdu...
1625
  		free_obj = obj_malloc(class, get_zspage(d_page), handle);
251cbb951   Minchan Kim   zsmalloc: reorder...
1626
  		zs_object_copy(class, free_obj, used_obj);
41b88e14c   Ganesh Mahendran   mm/zsmalloc: use ...
1627
  		obj_idx++;
c102f07ca   Junil Lee   zsmalloc: fix mig...
1628
1629
1630
1631
1632
1633
1634
  		/*
  		 * record_obj updates handle's value to free_obj and it will
  		 * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which
  		 * breaks synchronization using pin_tag(e,g, zs_free) so
  		 * let's keep the lock bit.
  		 */
  		free_obj |= BIT(HANDLE_PIN_BIT);
312fcae22   Minchan Kim   zsmalloc: support...
1635
1636
  		record_obj(handle, free_obj);
  		unpin_tag(handle);
1ee471658   Minchan Kim   zsmalloc: remove ...
1637
  		obj_free(class, used_obj);
312fcae22   Minchan Kim   zsmalloc: support...
1638
1639
1640
1641
  	}
  
  	/* Remember last position in this iteration */
  	cc->s_page = s_page;
41b88e14c   Ganesh Mahendran   mm/zsmalloc: use ...
1642
  	cc->obj_idx = obj_idx;
312fcae22   Minchan Kim   zsmalloc: support...
1643
1644
1645
  
  	return ret;
  }
3783689a1   Minchan Kim   zsmalloc: introdu...
1646
  static struct zspage *isolate_zspage(struct size_class *class, bool source)
312fcae22   Minchan Kim   zsmalloc: support...
1647
1648
  {
  	int i;
3783689a1   Minchan Kim   zsmalloc: introdu...
1649
1650
  	struct zspage *zspage;
  	enum fullness_group fg[2] = {ZS_ALMOST_EMPTY, ZS_ALMOST_FULL};
312fcae22   Minchan Kim   zsmalloc: support...
1651

3783689a1   Minchan Kim   zsmalloc: introdu...
1652
1653
1654
1655
1656
1657
1658
1659
1660
  	if (!source) {
  		fg[0] = ZS_ALMOST_FULL;
  		fg[1] = ZS_ALMOST_EMPTY;
  	}
  
  	for (i = 0; i < 2; i++) {
  		zspage = list_first_entry_or_null(&class->fullness_list[fg[i]],
  							struct zspage, list);
  		if (zspage) {
48b4800a1   Minchan Kim   zsmalloc: page mi...
1661
  			VM_BUG_ON(is_zspage_isolated(zspage));
3783689a1   Minchan Kim   zsmalloc: introdu...
1662
1663
  			remove_zspage(class, zspage, fg[i]);
  			return zspage;
312fcae22   Minchan Kim   zsmalloc: support...
1664
1665
  		}
  	}
3783689a1   Minchan Kim   zsmalloc: introdu...
1666
  	return zspage;
312fcae22   Minchan Kim   zsmalloc: support...
1667
  }
860c707dc   Sergey Senozhatsky   zsmalloc: account...
1668
  /*
3783689a1   Minchan Kim   zsmalloc: introdu...
1669
   * putback_zspage - add @zspage into right class's fullness list
860c707dc   Sergey Senozhatsky   zsmalloc: account...
1670
   * @class: destination class
3783689a1   Minchan Kim   zsmalloc: introdu...
1671
   * @zspage: target page
860c707dc   Sergey Senozhatsky   zsmalloc: account...
1672
   *
3783689a1   Minchan Kim   zsmalloc: introdu...
1673
   * Return @zspage's fullness_group
860c707dc   Sergey Senozhatsky   zsmalloc: account...
1674
   */
4aa409cab   Minchan Kim   zsmalloc: separat...
1675
  static enum fullness_group putback_zspage(struct size_class *class,
3783689a1   Minchan Kim   zsmalloc: introdu...
1676
  			struct zspage *zspage)
312fcae22   Minchan Kim   zsmalloc: support...
1677
  {
312fcae22   Minchan Kim   zsmalloc: support...
1678
  	enum fullness_group fullness;
48b4800a1   Minchan Kim   zsmalloc: page mi...
1679
  	VM_BUG_ON(is_zspage_isolated(zspage));
3783689a1   Minchan Kim   zsmalloc: introdu...
1680
1681
1682
  	fullness = get_fullness_group(class, zspage);
  	insert_zspage(class, zspage, fullness);
  	set_zspage_mapping(zspage, class->index, fullness);
839373e64   Minchan Kim   zsmalloc: remove ...
1683

860c707dc   Sergey Senozhatsky   zsmalloc: account...
1684
  	return fullness;
61989a80f   Nitin Gupta   staging: zsmalloc...
1685
  }
312fcae22   Minchan Kim   zsmalloc: support...
1686

48b4800a1   Minchan Kim   zsmalloc: page mi...
1687
  #ifdef CONFIG_COMPACTION
4d0a5402f   Colin Ian King   mm/zsmalloc.c: ma...
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
  /*
   * To prevent zspage destroy during migration, zspage freeing should
   * hold locks of all pages in the zspage.
   */
  static void lock_zspage(struct zspage *zspage)
  {
  	struct page *page = get_first_page(zspage);
  
  	do {
  		lock_page(page);
  	} while ((page = get_next_page(page)) != NULL);
  }
8e9231f81   David Howells   vfs: Convert zsma...
1700
  static int zs_init_fs_context(struct fs_context *fc)
48b4800a1   Minchan Kim   zsmalloc: page mi...
1701
  {
8e9231f81   David Howells   vfs: Convert zsma...
1702
  	return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM;
48b4800a1   Minchan Kim   zsmalloc: page mi...
1703
1704
1705
1706
  }
  
  static struct file_system_type zsmalloc_fs = {
  	.name		= "zsmalloc",
8e9231f81   David Howells   vfs: Convert zsma...
1707
  	.init_fs_context = zs_init_fs_context,
48b4800a1   Minchan Kim   zsmalloc: page mi...
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
  	.kill_sb	= kill_anon_super,
  };
  
  static int zsmalloc_mount(void)
  {
  	int ret = 0;
  
  	zsmalloc_mnt = kern_mount(&zsmalloc_fs);
  	if (IS_ERR(zsmalloc_mnt))
  		ret = PTR_ERR(zsmalloc_mnt);
  
  	return ret;
  }
  
  static void zsmalloc_unmount(void)
  {
  	kern_unmount(zsmalloc_mnt);
  }
  
  static void migrate_lock_init(struct zspage *zspage)
  {
  	rwlock_init(&zspage->lock);
  }
  
  static void migrate_read_lock(struct zspage *zspage)
  {
  	read_lock(&zspage->lock);
  }
  
  static void migrate_read_unlock(struct zspage *zspage)
  {
  	read_unlock(&zspage->lock);
  }
  
  static void migrate_write_lock(struct zspage *zspage)
  {
  	write_lock(&zspage->lock);
  }
  
  static void migrate_write_unlock(struct zspage *zspage)
  {
  	write_unlock(&zspage->lock);
  }
  
  /* Number of isolated subpage for *page migration* in this zspage */
  static void inc_zspage_isolation(struct zspage *zspage)
  {
  	zspage->isolated++;
  }
  
  static void dec_zspage_isolation(struct zspage *zspage)
  {
  	zspage->isolated--;
  }
1a87aa035   Henry Burns   mm/zsmalloc.c: mi...
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
  static void putback_zspage_deferred(struct zs_pool *pool,
  				    struct size_class *class,
  				    struct zspage *zspage)
  {
  	enum fullness_group fg;
  
  	fg = putback_zspage(class, zspage);
  	if (fg == ZS_EMPTY)
  		schedule_work(&pool->free_work);
  
  }
701d67859   Henry Burns   mm/zsmalloc.c: fi...
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
  static inline void zs_pool_dec_isolated(struct zs_pool *pool)
  {
  	VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
  	atomic_long_dec(&pool->isolated_pages);
  	/*
  	 * There's no possibility of racing, since wait_for_isolated_drain()
  	 * checks the isolated count under &class->lock after enqueuing
  	 * on migration_wait.
  	 */
  	if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
  		wake_up_all(&pool->migration_wait);
  }
48b4800a1   Minchan Kim   zsmalloc: page mi...
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
  static void replace_sub_page(struct size_class *class, struct zspage *zspage,
  				struct page *newpage, struct page *oldpage)
  {
  	struct page *page;
  	struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, };
  	int idx = 0;
  
  	page = get_first_page(zspage);
  	do {
  		if (page == oldpage)
  			pages[idx] = newpage;
  		else
  			pages[idx] = page;
  		idx++;
  	} while ((page = get_next_page(page)) != NULL);
  
  	create_page_chain(class, zspage, pages);
  	set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
  	if (unlikely(PageHugeObject(oldpage)))
  		newpage->index = oldpage->index;
  	__SetPageMovable(newpage, page_mapping(oldpage));
  }
4d0a5402f   Colin Ian King   mm/zsmalloc.c: ma...
1807
  static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
48b4800a1   Minchan Kim   zsmalloc: page mi...
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
  {
  	struct zs_pool *pool;
  	struct size_class *class;
  	int class_idx;
  	enum fullness_group fullness;
  	struct zspage *zspage;
  	struct address_space *mapping;
  
  	/*
  	 * Page is locked so zspage couldn't be destroyed. For detail, look at
  	 * lock_zspage in free_zspage.
  	 */
  	VM_BUG_ON_PAGE(!PageMovable(page), page);
  	VM_BUG_ON_PAGE(PageIsolated(page), page);
  
  	zspage = get_zspage(page);
  
  	/*
  	 * Without class lock, fullness could be stale while class_idx is okay
  	 * because class_idx is constant unless page is freed so we should get
  	 * fullness again under class lock.
  	 */
  	get_zspage_mapping(zspage, &class_idx, &fullness);
  	mapping = page_mapping(page);
  	pool = mapping->private_data;
  	class = pool->size_class[class_idx];
  
  	spin_lock(&class->lock);
  	if (get_zspage_inuse(zspage) == 0) {
  		spin_unlock(&class->lock);
  		return false;
  	}
  
  	/* zspage is isolated for object migration */
  	if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
  		spin_unlock(&class->lock);
  		return false;
  	}
  
  	/*
  	 * If this is first time isolation for the zspage, isolate zspage from
  	 * size_class to prevent further object allocation from the zspage.
  	 */
  	if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
  		get_zspage_mapping(zspage, &class_idx, &fullness);
701d67859   Henry Burns   mm/zsmalloc.c: fi...
1853
  		atomic_long_inc(&pool->isolated_pages);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1854
1855
1856
1857
1858
1859
1860
1861
  		remove_zspage(class, zspage, fullness);
  	}
  
  	inc_zspage_isolation(zspage);
  	spin_unlock(&class->lock);
  
  	return true;
  }
4d0a5402f   Colin Ian King   mm/zsmalloc.c: ma...
1862
  static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
48b4800a1   Minchan Kim   zsmalloc: page mi...
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
  		struct page *page, enum migrate_mode mode)
  {
  	struct zs_pool *pool;
  	struct size_class *class;
  	int class_idx;
  	enum fullness_group fullness;
  	struct zspage *zspage;
  	struct page *dummy;
  	void *s_addr, *d_addr, *addr;
  	int offset, pos;
  	unsigned long handle, head;
  	unsigned long old_obj, new_obj;
  	unsigned int obj_idx;
  	int ret = -EAGAIN;
2916ecc0f   Jérôme Glisse   mm/migrate: new m...
1877
1878
1879
1880
1881
1882
1883
  	/*
  	 * We cannot support the _NO_COPY case here, because copy needs to
  	 * happen under the zs lock, which does not work with
  	 * MIGRATE_SYNC_NO_COPY workflow.
  	 */
  	if (mode == MIGRATE_SYNC_NO_COPY)
  		return -EINVAL;
48b4800a1   Minchan Kim   zsmalloc: page mi...
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
  	VM_BUG_ON_PAGE(!PageMovable(page), page);
  	VM_BUG_ON_PAGE(!PageIsolated(page), page);
  
  	zspage = get_zspage(page);
  
  	/* Concurrent compactor cannot migrate any subpage in zspage */
  	migrate_write_lock(zspage);
  	get_zspage_mapping(zspage, &class_idx, &fullness);
  	pool = mapping->private_data;
  	class = pool->size_class[class_idx];
  	offset = get_first_obj_offset(page);
  
  	spin_lock(&class->lock);
  	if (!get_zspage_inuse(zspage)) {
77ff46579   Hui Zhu   zsmalloc: zs_page...
1898
1899
1900
1901
1902
  		/*
  		 * Set "offset" to end of the page so that every loops
  		 * skips unnecessary object scanning.
  		 */
  		offset = PAGE_SIZE;
48b4800a1   Minchan Kim   zsmalloc: page mi...
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
  	}
  
  	pos = offset;
  	s_addr = kmap_atomic(page);
  	while (pos < PAGE_SIZE) {
  		head = obj_to_head(page, s_addr + pos);
  		if (head & OBJ_ALLOCATED_TAG) {
  			handle = head & ~OBJ_ALLOCATED_TAG;
  			if (!trypin_tag(handle))
  				goto unpin_objects;
  		}
  		pos += class->size;
  	}
  
  	/*
  	 * Here, any user cannot access all objects in the zspage so let's move.
  	 */
  	d_addr = kmap_atomic(newpage);
  	memcpy(d_addr, s_addr, PAGE_SIZE);
  	kunmap_atomic(d_addr);
  
  	for (addr = s_addr + offset; addr < s_addr + pos;
  					addr += class->size) {
  		head = obj_to_head(page, addr);
  		if (head & OBJ_ALLOCATED_TAG) {
  			handle = head & ~OBJ_ALLOCATED_TAG;
  			if (!testpin_tag(handle))
  				BUG();
  
  			old_obj = handle_to_obj(handle);
  			obj_to_location(old_obj, &dummy, &obj_idx);
  			new_obj = (unsigned long)location_to_obj(newpage,
  								obj_idx);
  			new_obj |= BIT(HANDLE_PIN_BIT);
  			record_obj(handle, new_obj);
  		}
  	}
  
  	replace_sub_page(class, zspage, newpage, page);
  	get_page(newpage);
  
  	dec_zspage_isolation(zspage);
  
  	/*
  	 * Page migration is done so let's putback isolated zspage to
  	 * the list if @page is final isolated subpage in the zspage.
  	 */
701d67859   Henry Burns   mm/zsmalloc.c: fi...
1950
1951
1952
1953
1954
1955
1956
  	if (!is_zspage_isolated(zspage)) {
  		/*
  		 * We cannot race with zs_destroy_pool() here because we wait
  		 * for isolation to hit zero before we start destroying.
  		 * Also, we ensure that everyone can see pool->destroying before
  		 * we start waiting.
  		 */
1a87aa035   Henry Burns   mm/zsmalloc.c: mi...
1957
  		putback_zspage_deferred(pool, class, zspage);
701d67859   Henry Burns   mm/zsmalloc.c: fi...
1958
1959
  		zs_pool_dec_isolated(pool);
  	}
48b4800a1   Minchan Kim   zsmalloc: page mi...
1960

cdc57bac9   Chanho Min   mm/zsmalloc.c: fi...
1961
1962
1963
1964
  	if (page_zone(newpage) != page_zone(page)) {
  		dec_zone_page_state(page, NR_ZSPAGES);
  		inc_zone_page_state(newpage, NR_ZSPAGES);
  	}
48b4800a1   Minchan Kim   zsmalloc: page mi...
1965
1966
1967
  	reset_page(page);
  	put_page(page);
  	page = newpage;
dd4123f32   Minchan Kim   mm: fix build war...
1968
  	ret = MIGRATEPAGE_SUCCESS;
48b4800a1   Minchan Kim   zsmalloc: page mi...
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
  unpin_objects:
  	for (addr = s_addr + offset; addr < s_addr + pos;
  						addr += class->size) {
  		head = obj_to_head(page, addr);
  		if (head & OBJ_ALLOCATED_TAG) {
  			handle = head & ~OBJ_ALLOCATED_TAG;
  			if (!testpin_tag(handle))
  				BUG();
  			unpin_tag(handle);
  		}
  	}
  	kunmap_atomic(s_addr);
48b4800a1   Minchan Kim   zsmalloc: page mi...
1981
1982
1983
1984
1985
  	spin_unlock(&class->lock);
  	migrate_write_unlock(zspage);
  
  	return ret;
  }
4d0a5402f   Colin Ian King   mm/zsmalloc.c: ma...
1986
  static void zs_page_putback(struct page *page)
48b4800a1   Minchan Kim   zsmalloc: page mi...
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
  {
  	struct zs_pool *pool;
  	struct size_class *class;
  	int class_idx;
  	enum fullness_group fg;
  	struct address_space *mapping;
  	struct zspage *zspage;
  
  	VM_BUG_ON_PAGE(!PageMovable(page), page);
  	VM_BUG_ON_PAGE(!PageIsolated(page), page);
  
  	zspage = get_zspage(page);
  	get_zspage_mapping(zspage, &class_idx, &fg);
  	mapping = page_mapping(page);
  	pool = mapping->private_data;
  	class = pool->size_class[class_idx];
  
  	spin_lock(&class->lock);
  	dec_zspage_isolation(zspage);
  	if (!is_zspage_isolated(zspage)) {
48b4800a1   Minchan Kim   zsmalloc: page mi...
2007
2008
2009
2010
  		/*
  		 * Due to page_lock, we cannot free zspage immediately
  		 * so let's defer.
  		 */
1a87aa035   Henry Burns   mm/zsmalloc.c: mi...
2011
  		putback_zspage_deferred(pool, class, zspage);
701d67859   Henry Burns   mm/zsmalloc.c: fi...
2012
  		zs_pool_dec_isolated(pool);
48b4800a1   Minchan Kim   zsmalloc: page mi...
2013
2014
2015
  	}
  	spin_unlock(&class->lock);
  }
4d0a5402f   Colin Ian King   mm/zsmalloc.c: ma...
2016
  static const struct address_space_operations zsmalloc_aops = {
48b4800a1   Minchan Kim   zsmalloc: page mi...
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
  	.isolate_page = zs_page_isolate,
  	.migratepage = zs_page_migrate,
  	.putback_page = zs_page_putback,
  };
  
  static int zs_register_migration(struct zs_pool *pool)
  {
  	pool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb);
  	if (IS_ERR(pool->inode)) {
  		pool->inode = NULL;
  		return 1;
  	}
  
  	pool->inode->i_mapping->private_data = pool;
  	pool->inode->i_mapping->a_ops = &zsmalloc_aops;
  	return 0;
  }
701d67859   Henry Burns   mm/zsmalloc.c: fi...
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
  static bool pool_isolated_are_drained(struct zs_pool *pool)
  {
  	return atomic_long_read(&pool->isolated_pages) == 0;
  }
  
  /* Function for resolving migration */
  static void wait_for_isolated_drain(struct zs_pool *pool)
  {
  
  	/*
  	 * We're in the process of destroying the pool, so there are no
  	 * active allocations. zs_page_isolate() fails for completely free
  	 * zspages, so we need only wait for the zs_pool's isolated
  	 * count to hit zero.
  	 */
  	wait_event(pool->migration_wait,
  		   pool_isolated_are_drained(pool));
  }
48b4800a1   Minchan Kim   zsmalloc: page mi...
2052
2053
  static void zs_unregister_migration(struct zs_pool *pool)
  {
701d67859   Henry Burns   mm/zsmalloc.c: fi...
2054
2055
2056
2057
2058
2059
2060
2061
2062
  	pool->destroying = true;
  	/*
  	 * We need a memory barrier here to ensure global visibility of
  	 * pool->destroying. Thus pool->isolated pages will either be 0 in which
  	 * case we don't care, or it will be > 0 and pool->destroying will
  	 * ensure that we wake up once isolation hits 0.
  	 */
  	smp_mb();
  	wait_for_isolated_drain(pool); /* This can block */
48b4800a1   Minchan Kim   zsmalloc: page mi...
2063
  	flush_work(&pool->free_work);
c3491eca3   Markus Elfring   zsmalloc: Delete ...
2064
  	iput(pool->inode);
48b4800a1   Minchan Kim   zsmalloc: page mi...
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
  }
  
  /*
   * Caller should hold page_lock of all pages in the zspage
   * In here, we cannot use zspage meta data.
   */
  static void async_free_zspage(struct work_struct *work)
  {
  	int i;
  	struct size_class *class;
  	unsigned int class_idx;
  	enum fullness_group fullness;
  	struct zspage *zspage, *tmp;
  	LIST_HEAD(free_pages);
  	struct zs_pool *pool = container_of(work, struct zs_pool,
  					free_work);
cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
2081
  	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
48b4800a1   Minchan Kim   zsmalloc: page mi...
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
  		class = pool->size_class[i];
  		if (class->index != i)
  			continue;
  
  		spin_lock(&class->lock);
  		list_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages);
  		spin_unlock(&class->lock);
  	}
  
  
  	list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
  		list_del(&zspage->list);
  		lock_zspage(zspage);
  
  		get_zspage_mapping(zspage, &class_idx, &fullness);
  		VM_BUG_ON(fullness != ZS_EMPTY);
  		class = pool->size_class[class_idx];
  		spin_lock(&class->lock);
  		__free_zspage(pool, pool->size_class[class_idx], zspage);
  		spin_unlock(&class->lock);
  	}
  };
  
  static void kick_deferred_free(struct zs_pool *pool)
  {
  	schedule_work(&pool->free_work);
  }
  
  static void init_deferred_free(struct zs_pool *pool)
  {
  	INIT_WORK(&pool->free_work, async_free_zspage);
  }
  
  static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage)
  {
  	struct page *page = get_first_page(zspage);
  
  	do {
  		WARN_ON(!trylock_page(page));
  		__SetPageMovable(page, pool->inode->i_mapping);
  		unlock_page(page);
  	} while ((page = get_next_page(page)) != NULL);
  }
  #endif
04f05909e   Sergey Senozhatsky   zsmalloc: introdu...
2126
2127
2128
2129
  /*
   *
   * Based on the number of unused allocated objects calculate
   * and return the number of pages that we can free.
04f05909e   Sergey Senozhatsky   zsmalloc: introdu...
2130
2131
2132
2133
   */
  static unsigned long zs_can_compact(struct size_class *class)
  {
  	unsigned long obj_wasted;
44f43e99f   Sergey Senozhatsky   zsmalloc: fix zs_...
2134
2135
  	unsigned long obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
  	unsigned long obj_used = zs_stat_get(class, OBJ_USED);
04f05909e   Sergey Senozhatsky   zsmalloc: introdu...
2136

44f43e99f   Sergey Senozhatsky   zsmalloc: fix zs_...
2137
2138
  	if (obj_allocated <= obj_used)
  		return 0;
04f05909e   Sergey Senozhatsky   zsmalloc: introdu...
2139

44f43e99f   Sergey Senozhatsky   zsmalloc: fix zs_...
2140
  	obj_wasted = obj_allocated - obj_used;
b4fd07a08   Ganesh Mahendran   mm/zsmalloc: use ...
2141
  	obj_wasted /= class->objs_per_zspage;
04f05909e   Sergey Senozhatsky   zsmalloc: introdu...
2142

6cbf16b3b   Minchan Kim   zsmalloc: use cla...
2143
  	return obj_wasted * class->pages_per_zspage;
04f05909e   Sergey Senozhatsky   zsmalloc: introdu...
2144
  }
7d3f39382   Sergey Senozhatsky   zsmalloc/zram: in...
2145
  static void __zs_compact(struct zs_pool *pool, struct size_class *class)
312fcae22   Minchan Kim   zsmalloc: support...
2146
  {
312fcae22   Minchan Kim   zsmalloc: support...
2147
  	struct zs_compact_control cc;
3783689a1   Minchan Kim   zsmalloc: introdu...
2148
2149
  	struct zspage *src_zspage;
  	struct zspage *dst_zspage = NULL;
312fcae22   Minchan Kim   zsmalloc: support...
2150

312fcae22   Minchan Kim   zsmalloc: support...
2151
  	spin_lock(&class->lock);
3783689a1   Minchan Kim   zsmalloc: introdu...
2152
  	while ((src_zspage = isolate_zspage(class, true))) {
312fcae22   Minchan Kim   zsmalloc: support...
2153

04f05909e   Sergey Senozhatsky   zsmalloc: introdu...
2154
2155
  		if (!zs_can_compact(class))
  			break;
41b88e14c   Ganesh Mahendran   mm/zsmalloc: use ...
2156
  		cc.obj_idx = 0;
48b4800a1   Minchan Kim   zsmalloc: page mi...
2157
  		cc.s_page = get_first_page(src_zspage);
312fcae22   Minchan Kim   zsmalloc: support...
2158

3783689a1   Minchan Kim   zsmalloc: introdu...
2159
  		while ((dst_zspage = isolate_zspage(class, false))) {
48b4800a1   Minchan Kim   zsmalloc: page mi...
2160
  			cc.d_page = get_first_page(dst_zspage);
312fcae22   Minchan Kim   zsmalloc: support...
2161
  			/*
0dc63d488   Sergey Senozhatsky   zsmalloc: cosmeti...
2162
2163
  			 * If there is no more space in dst_page, resched
  			 * and see if anyone had allocated another zspage.
312fcae22   Minchan Kim   zsmalloc: support...
2164
2165
2166
  			 */
  			if (!migrate_zspage(pool, class, &cc))
  				break;
4aa409cab   Minchan Kim   zsmalloc: separat...
2167
  			putback_zspage(class, dst_zspage);
312fcae22   Minchan Kim   zsmalloc: support...
2168
2169
2170
  		}
  
  		/* Stop if we couldn't find slot */
3783689a1   Minchan Kim   zsmalloc: introdu...
2171
  		if (dst_zspage == NULL)
312fcae22   Minchan Kim   zsmalloc: support...
2172
  			break;
4aa409cab   Minchan Kim   zsmalloc: separat...
2173
2174
  		putback_zspage(class, dst_zspage);
  		if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
48b4800a1   Minchan Kim   zsmalloc: page mi...
2175
  			free_zspage(pool, class, src_zspage);
6cbf16b3b   Minchan Kim   zsmalloc: use cla...
2176
  			pool->stats.pages_compacted += class->pages_per_zspage;
4aa409cab   Minchan Kim   zsmalloc: separat...
2177
  		}
312fcae22   Minchan Kim   zsmalloc: support...
2178
  		spin_unlock(&class->lock);
312fcae22   Minchan Kim   zsmalloc: support...
2179
2180
2181
  		cond_resched();
  		spin_lock(&class->lock);
  	}
3783689a1   Minchan Kim   zsmalloc: introdu...
2182
  	if (src_zspage)
4aa409cab   Minchan Kim   zsmalloc: separat...
2183
  		putback_zspage(class, src_zspage);
312fcae22   Minchan Kim   zsmalloc: support...
2184

7d3f39382   Sergey Senozhatsky   zsmalloc/zram: in...
2185
  	spin_unlock(&class->lock);
312fcae22   Minchan Kim   zsmalloc: support...
2186
2187
2188
2189
2190
  }
  
  unsigned long zs_compact(struct zs_pool *pool)
  {
  	int i;
312fcae22   Minchan Kim   zsmalloc: support...
2191
  	struct size_class *class;
cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
2192
  	for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
312fcae22   Minchan Kim   zsmalloc: support...
2193
2194
2195
2196
2197
  		class = pool->size_class[i];
  		if (!class)
  			continue;
  		if (class->index != i)
  			continue;
7d3f39382   Sergey Senozhatsky   zsmalloc/zram: in...
2198
  		__zs_compact(pool, class);
312fcae22   Minchan Kim   zsmalloc: support...
2199
  	}
860c707dc   Sergey Senozhatsky   zsmalloc: account...
2200
  	return pool->stats.pages_compacted;
312fcae22   Minchan Kim   zsmalloc: support...
2201
2202
  }
  EXPORT_SYMBOL_GPL(zs_compact);
61989a80f   Nitin Gupta   staging: zsmalloc...
2203

7d3f39382   Sergey Senozhatsky   zsmalloc/zram: in...
2204
2205
2206
2207
2208
  void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats)
  {
  	memcpy(stats, &pool->stats, sizeof(struct zs_pool_stats));
  }
  EXPORT_SYMBOL_GPL(zs_pool_stats);
ab9d306d9   Sergey Senozhatsky   zsmalloc: use shr...
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
  static unsigned long zs_shrinker_scan(struct shrinker *shrinker,
  		struct shrink_control *sc)
  {
  	unsigned long pages_freed;
  	struct zs_pool *pool = container_of(shrinker, struct zs_pool,
  			shrinker);
  
  	pages_freed = pool->stats.pages_compacted;
  	/*
  	 * Compact classes and calculate compaction delta.
  	 * Can run concurrently with a manually triggered
  	 * (by user) compaction.
  	 */
  	pages_freed = zs_compact(pool) - pages_freed;
  
  	return pages_freed ? pages_freed : SHRINK_STOP;
  }
  
  static unsigned long zs_shrinker_count(struct shrinker *shrinker,
  		struct shrink_control *sc)
  {
  	int i;
  	struct size_class *class;
  	unsigned long pages_to_free = 0;
  	struct zs_pool *pool = container_of(shrinker, struct zs_pool,
  			shrinker);
cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
2235
  	for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
ab9d306d9   Sergey Senozhatsky   zsmalloc: use shr...
2236
2237
2238
2239
2240
  		class = pool->size_class[i];
  		if (!class)
  			continue;
  		if (class->index != i)
  			continue;
ab9d306d9   Sergey Senozhatsky   zsmalloc: use shr...
2241
  		pages_to_free += zs_can_compact(class);
ab9d306d9   Sergey Senozhatsky   zsmalloc: use shr...
2242
2243
2244
2245
2246
2247
2248
  	}
  
  	return pages_to_free;
  }
  
  static void zs_unregister_shrinker(struct zs_pool *pool)
  {
93144ca35   Aliaksei Karaliou   mm/zsmalloc: simp...
2249
  	unregister_shrinker(&pool->shrinker);
ab9d306d9   Sergey Senozhatsky   zsmalloc: use shr...
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
  }
  
  static int zs_register_shrinker(struct zs_pool *pool)
  {
  	pool->shrinker.scan_objects = zs_shrinker_scan;
  	pool->shrinker.count_objects = zs_shrinker_count;
  	pool->shrinker.batch = 0;
  	pool->shrinker.seeks = DEFAULT_SEEKS;
  
  	return register_shrinker(&pool->shrinker);
  }
00a61d861   Minchan Kim   staging: zsmalloc...
2261
  /**
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2262
   * zs_create_pool - Creates an allocation pool to work from.
fd8544639   Ganesh Mahendran   mm/zsmalloc: keep...
2263
   * @name: pool name to be created
166cfda75   Seth Jennings   staging: zsmalloc...
2264
   *
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2265
2266
   * This function must be called before anything when using
   * the zsmalloc allocator.
166cfda75   Seth Jennings   staging: zsmalloc...
2267
   *
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2268
2269
   * On success, a pointer to the newly created pool is returned,
   * otherwise NULL.
396b7fd6f   Sara Bird   staging/zsmalloc:...
2270
   */
d0d8da2dc   Sergey Senozhatsky   zsmalloc: require...
2271
  struct zs_pool *zs_create_pool(const char *name)
61989a80f   Nitin Gupta   staging: zsmalloc...
2272
  {
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2273
2274
2275
  	int i;
  	struct zs_pool *pool;
  	struct size_class *prev_class = NULL;
61989a80f   Nitin Gupta   staging: zsmalloc...
2276

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2277
2278
2279
  	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
  	if (!pool)
  		return NULL;
61989a80f   Nitin Gupta   staging: zsmalloc...
2280

48b4800a1   Minchan Kim   zsmalloc: page mi...
2281
  	init_deferred_free(pool);
61989a80f   Nitin Gupta   staging: zsmalloc...
2282

2e40e163a   Minchan Kim   zsmalloc: decoupl...
2283
2284
2285
  	pool->name = kstrdup(name, GFP_KERNEL);
  	if (!pool->name)
  		goto err;
441e254cd   Andrew Morton   mm/zsmalloc.c: fi...
2286
  #ifdef CONFIG_COMPACTION
701d67859   Henry Burns   mm/zsmalloc.c: fi...
2287
  	init_waitqueue_head(&pool->migration_wait);
441e254cd   Andrew Morton   mm/zsmalloc.c: fi...
2288
  #endif
701d67859   Henry Burns   mm/zsmalloc.c: fi...
2289

3783689a1   Minchan Kim   zsmalloc: introdu...
2290
  	if (create_cache(pool))
2e40e163a   Minchan Kim   zsmalloc: decoupl...
2291
  		goto err;
c60369f01   Seth Jennings   staging: zsmalloc...
2292
  	/*
399d8eebe   Xishi Qiu   mm: fix some typo...
2293
  	 * Iterate reversely, because, size of size_class that we want to use
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2294
  	 * for merging should be larger or equal to current size.
c60369f01   Seth Jennings   staging: zsmalloc...
2295
  	 */
cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
2296
  	for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2297
2298
  		int size;
  		int pages_per_zspage;
64d90465f   Ganesh Mahendran   mm/zsmalloc: avoi...
2299
  		int objs_per_zspage;
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2300
  		struct size_class *class;
3783689a1   Minchan Kim   zsmalloc: introdu...
2301
  		int fullness = 0;
c60369f01   Seth Jennings   staging: zsmalloc...
2302

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2303
2304
2305
2306
  		size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
  		if (size > ZS_MAX_ALLOC_SIZE)
  			size = ZS_MAX_ALLOC_SIZE;
  		pages_per_zspage = get_pages_per_zspage(size);
64d90465f   Ganesh Mahendran   mm/zsmalloc: avoi...
2307
  		objs_per_zspage = pages_per_zspage * PAGE_SIZE / size;
61989a80f   Nitin Gupta   staging: zsmalloc...
2308

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2309
  		/*
010b495e2   Sergey Senozhatsky   zsmalloc: introdu...
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
  		 * We iterate from biggest down to smallest classes,
  		 * so huge_class_size holds the size of the first huge
  		 * class. Any object bigger than or equal to that will
  		 * endup in the huge class.
  		 */
  		if (pages_per_zspage != 1 && objs_per_zspage != 1 &&
  				!huge_class_size) {
  			huge_class_size = size;
  			/*
  			 * The object uses ZS_HANDLE_SIZE bytes to store the
  			 * handle. We need to subtract it, because zs_malloc()
  			 * unconditionally adds handle size before it performs
  			 * size class search - so object may be smaller than
  			 * huge class size, yet it still can end up in the huge
  			 * class because it grows by ZS_HANDLE_SIZE extra bytes
  			 * right before class lookup.
  			 */
  			huge_class_size -= (ZS_HANDLE_SIZE - 1);
  		}
  
  		/*
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2331
2332
2333
2334
2335
2336
2337
2338
2339
  		 * size_class is used for normal zsmalloc operation such
  		 * as alloc/free for that size. Although it is natural that we
  		 * have one size_class for each size, there is a chance that we
  		 * can get more memory utilization if we use one size_class for
  		 * many different sizes whose size_class have same
  		 * characteristics. So, we makes size_class point to
  		 * previous size_class if possible.
  		 */
  		if (prev_class) {
64d90465f   Ganesh Mahendran   mm/zsmalloc: avoi...
2340
  			if (can_merge(prev_class, pages_per_zspage, objs_per_zspage)) {
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
  				pool->size_class[i] = prev_class;
  				continue;
  			}
  		}
  
  		class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
  		if (!class)
  			goto err;
  
  		class->size = size;
  		class->index = i;
  		class->pages_per_zspage = pages_per_zspage;
64d90465f   Ganesh Mahendran   mm/zsmalloc: avoi...
2353
  		class->objs_per_zspage = objs_per_zspage;
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2354
2355
  		spin_lock_init(&class->lock);
  		pool->size_class[i] = class;
48b4800a1   Minchan Kim   zsmalloc: page mi...
2356
2357
  		for (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS;
  							fullness++)
3783689a1   Minchan Kim   zsmalloc: introdu...
2358
  			INIT_LIST_HEAD(&class->fullness_list[fullness]);
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2359
2360
  
  		prev_class = class;
61989a80f   Nitin Gupta   staging: zsmalloc...
2361
  	}
d34f61572   Dan Streetman   mm/zsmalloc: don'...
2362
2363
  	/* debug only, don't abort if it fails */
  	zs_pool_stat_create(pool, name);
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
2364

48b4800a1   Minchan Kim   zsmalloc: page mi...
2365
2366
  	if (zs_register_migration(pool))
  		goto err;
ab9d306d9   Sergey Senozhatsky   zsmalloc: use shr...
2367
  	/*
93144ca35   Aliaksei Karaliou   mm/zsmalloc: simp...
2368
2369
2370
2371
  	 * Not critical since shrinker is only used to trigger internal
  	 * defragmentation of the pool which is pretty optional thing.  If
  	 * registration fails we still can use the pool normally and user can
  	 * trigger compaction manually. Thus, ignore return code.
ab9d306d9   Sergey Senozhatsky   zsmalloc: use shr...
2372
  	 */
93144ca35   Aliaksei Karaliou   mm/zsmalloc: simp...
2373
  	zs_register_shrinker(pool);
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2374
2375
2376
2377
2378
  	return pool;
  
  err:
  	zs_destroy_pool(pool);
  	return NULL;
61989a80f   Nitin Gupta   staging: zsmalloc...
2379
  }
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2380
  EXPORT_SYMBOL_GPL(zs_create_pool);
61989a80f   Nitin Gupta   staging: zsmalloc...
2381

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2382
  void zs_destroy_pool(struct zs_pool *pool)
61989a80f   Nitin Gupta   staging: zsmalloc...
2383
  {
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2384
  	int i;
61989a80f   Nitin Gupta   staging: zsmalloc...
2385

ab9d306d9   Sergey Senozhatsky   zsmalloc: use shr...
2386
  	zs_unregister_shrinker(pool);
48b4800a1   Minchan Kim   zsmalloc: page mi...
2387
  	zs_unregister_migration(pool);
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
2388
  	zs_pool_stat_destroy(pool);
cf8e0fedf   Jerome Marchand   mm/zsmalloc: simp...
2389
  	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2390
2391
  		int fg;
  		struct size_class *class = pool->size_class[i];
61989a80f   Nitin Gupta   staging: zsmalloc...
2392

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2393
2394
  		if (!class)
  			continue;
61989a80f   Nitin Gupta   staging: zsmalloc...
2395

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2396
2397
  		if (class->index != i)
  			continue;
61989a80f   Nitin Gupta   staging: zsmalloc...
2398

48b4800a1   Minchan Kim   zsmalloc: page mi...
2399
  		for (fg = ZS_EMPTY; fg < NR_ZS_FULLNESS; fg++) {
3783689a1   Minchan Kim   zsmalloc: introdu...
2400
  			if (!list_empty(&class->fullness_list[fg])) {
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2401
2402
2403
2404
2405
2406
2407
  				pr_info("Freeing non-empty class with size %db, fullness group %d
  ",
  					class->size, fg);
  			}
  		}
  		kfree(class);
  	}
f553646a6   Seth Jennings   staging: zsmalloc...
2408

3783689a1   Minchan Kim   zsmalloc: introdu...
2409
  	destroy_cache(pool);
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
2410
  	kfree(pool->name);
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2411
2412
2413
  	kfree(pool);
  }
  EXPORT_SYMBOL_GPL(zs_destroy_pool);
b74185108   Seth Jennings   staging: zsmalloc...
2414

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2415
2416
  static int __init zs_init(void)
  {
48b4800a1   Minchan Kim   zsmalloc: page mi...
2417
2418
2419
2420
2421
  	int ret;
  
  	ret = zsmalloc_mount();
  	if (ret)
  		goto out;
215c89d05   Sebastian Andrzej Siewior   mm/zsmalloc: Conv...
2422
2423
  	ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare",
  				zs_cpu_prepare, zs_cpu_dead);
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
2424
  	if (ret)
215c89d05   Sebastian Andrzej Siewior   mm/zsmalloc: Conv...
2425
  		goto hp_setup_fail;
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2426

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2427
2428
2429
  #ifdef CONFIG_ZPOOL
  	zpool_register_driver(&zs_zpool_driver);
  #endif
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
2430

4abaac9b7   Dan Streetman   update "mm/zsmall...
2431
  	zs_stat_init();
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2432
  	return 0;
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
2433

215c89d05   Sebastian Andrzej Siewior   mm/zsmalloc: Conv...
2434
  hp_setup_fail:
48b4800a1   Minchan Kim   zsmalloc: page mi...
2435
2436
  	zsmalloc_unmount();
  out:
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
2437
  	return ret;
61989a80f   Nitin Gupta   staging: zsmalloc...
2438
  }
61989a80f   Nitin Gupta   staging: zsmalloc...
2439

66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2440
  static void __exit zs_exit(void)
61989a80f   Nitin Gupta   staging: zsmalloc...
2441
  {
66cdef663   Ganesh Mahendran   mm/zsmalloc: adju...
2442
2443
2444
  #ifdef CONFIG_ZPOOL
  	zpool_unregister_driver(&zs_zpool_driver);
  #endif
48b4800a1   Minchan Kim   zsmalloc: page mi...
2445
  	zsmalloc_unmount();
215c89d05   Sebastian Andrzej Siewior   mm/zsmalloc: Conv...
2446
  	cpuhp_remove_state(CPUHP_MM_ZS_PREPARE);
0f050d997   Ganesh Mahendran   mm/zsmalloc: add ...
2447
2448
  
  	zs_stat_exit();
61989a80f   Nitin Gupta   staging: zsmalloc...
2449
  }
069f101fa   Ben Hutchings   staging: zsmalloc...
2450
2451
2452
2453
2454
2455
  
  module_init(zs_init);
  module_exit(zs_exit);
  
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");