Eric Lee / smarc-ti-linux-kernel

1

/*

1

/*

2

* zsmalloc memory allocator

2

* zsmalloc memory allocator

3

*

3

*

4

5

6

*

6

*

7

* This code is released using a dual license strategy: BSD/GPL

7

* This code is released using a dual license strategy: BSD/GPL

8

* You can choose the license that better fits your requirements.

8

* You can choose the license that better fits your requirements.

9

*

9

*

10

* Released under the terms of 3-clause BSD License

10

* Released under the terms of 3-clause BSD License

11

* Released under the terms of GNU General Public License Version 2.0

11

* Released under the terms of GNU General Public License Version 2.0

12

*/

12

*/

13

14

/*

14

/*

15

* This allocator is designed for use with zram. Thus, the allocator is

15

* This allocator is designed for use with zram. Thus, the allocator is

16

* supposed to work well under low memory conditions. In particular, it

16

* supposed to work well under low memory conditions. In particular, it

17

* never attempts higher order page allocation which is very likely to

17

* never attempts higher order page allocation which is very likely to

18

* fail under memory pressure. On the other hand, if we just use single

18

* fail under memory pressure. On the other hand, if we just use single

19

* (0-order) pages, it would suffer from very high fragmentation --

19

* (0-order) pages, it would suffer from very high fragmentation --

20

* any object of size PAGE_SIZE/2 or larger would occupy an entire page.

20

* any object of size PAGE_SIZE/2 or larger would occupy an entire page.

21

* This was one of the major issues with its predecessor (xvmalloc).

21

* This was one of the major issues with its predecessor (xvmalloc).

22

*

22

*

23

* To overcome these issues, zsmalloc allocates a bunch of 0-order pages

23

* To overcome these issues, zsmalloc allocates a bunch of 0-order pages

24

* and links them together using various 'struct page' fields. These linked

24

* and links them together using various 'struct page' fields. These linked

25

* pages act as a single higher-order page i.e. an object can span 0-order

25

* pages act as a single higher-order page i.e. an object can span 0-order

26

* page boundaries. The code refers to these linked pages as a single entity

26

* page boundaries. The code refers to these linked pages as a single entity

27

* called zspage.

27

* called zspage.

28

*

28

*

29

* For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE

29

* For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE

30

* since this satisfies the requirements of all its current users (in the

30

* since this satisfies the requirements of all its current users (in the

31

* worst case, page is incompressible and is thus stored "as-is" i.e. in

31

* worst case, page is incompressible and is thus stored "as-is" i.e. in

32

* uncompressed form). For allocation requests larger than this size, failure

32

* uncompressed form). For allocation requests larger than this size, failure

33

* is returned (see zs_malloc).

33

* is returned (see zs_malloc).

34

*

34

*

35

* Additionally, zs_malloc() does not return a dereferenceable pointer.

35

* Additionally, zs_malloc() does not return a dereferenceable pointer.

36

* Instead, it returns an opaque handle (unsigned long) which encodes actual

36

* Instead, it returns an opaque handle (unsigned long) which encodes actual

37

* location of the allocated object. The reason for this indirection is that

37

* location of the allocated object. The reason for this indirection is that

38

* zsmalloc does not keep zspages permanently mapped since that would cause

38

* zsmalloc does not keep zspages permanently mapped since that would cause

39

* issues on 32-bit systems where the VA region for kernel space mappings

39

* issues on 32-bit systems where the VA region for kernel space mappings

40

* is very small. So, before using the allocating memory, the object has to

40

* is very small. So, before using the allocating memory, the object has to

41

* be mapped using zs_map_object() to get a usable pointer and subsequently

41

* be mapped using zs_map_object() to get a usable pointer and subsequently

42

* unmapped using zs_unmap_object().

42

* unmapped using zs_unmap_object().

43

*

43

*

44

* Following is how we use various fields and flags of underlying

44

* Following is how we use various fields and flags of underlying

45

* struct page(s) to form a zspage.

45

* struct page(s) to form a zspage.

46

*

46

*

47

* Usage of struct page fields:

47

* Usage of struct page fields:

48

* page->first_page: points to the first component (0-order) page

48

* page->first_page: points to the first component (0-order) page

49

* page->index (union with page->freelist): offset of the first object

49

* page->index (union with page->freelist): offset of the first object

50

* starting in this page. For the first page, this is

50

* starting in this page. For the first page, this is

51

* always 0, so we use this field (aka freelist) to point

51

* always 0, so we use this field (aka freelist) to point

52

* to the first free object in zspage.

52

* to the first free object in zspage.

53

* page->lru: links together all component pages (except the first page)

53

* page->lru: links together all component pages (except the first page)

54

* of a zspage

54

* of a zspage

55

*

55

*

56

* For _first_ page only:

56

* For _first_ page only:

57

*

57

*

58

* page->private (union with page->first_page): refers to the

58

* page->private (union with page->first_page): refers to the

59

* component page after the first page

59

* component page after the first page

60

* page->freelist: points to the first free object in zspage.

60

* page->freelist: points to the first free object in zspage.

61

* Free objects are linked together using in-place

61

* Free objects are linked together using in-place

62

* metadata.

62

* metadata.

63

* page->objects: maximum number of objects we can store in this

63

* page->objects: maximum number of objects we can store in this

64

* zspage (class->zspage_order * PAGE_SIZE / class->size)

64

* zspage (class->zspage_order * PAGE_SIZE / class->size)

65

* page->lru: links together first pages of various zspages.

65

* page->lru: links together first pages of various zspages.

66

* Basically forming list of zspages in a fullness group.

66

* Basically forming list of zspages in a fullness group.

67

* page->mapping: class index and fullness group of the zspage

67

* page->mapping: class index and fullness group of the zspage

68

*

68

*

69

* Usage of struct page flags:

69

* Usage of struct page flags:

70

* PG_private: identifies the first component page

70

* PG_private: identifies the first component page

71

* PG_private2: identifies the last component page

71

* PG_private2: identifies the last component page

72

*

72

*

73

*/

73

*/

74

75

#ifdef CONFIG_ZSMALLOC_DEBUG

75

#ifdef CONFIG_ZSMALLOC_DEBUG

76

#define DEBUG

76

#define DEBUG

77

#endif

77

#endif

78

79

#include <linux/module.h>

79

#include <linux/module.h>

80

#include <linux/kernel.h>

80

#include <linux/kernel.h>

81

#include <linux/bitops.h>

81

#include <linux/bitops.h>

82

#include <linux/errno.h>

82

#include <linux/errno.h>

83

#include <linux/highmem.h>

83

#include <linux/highmem.h>

84

#include <linux/string.h>

84

#include <linux/string.h>

85

#include <linux/slab.h>

85

#include <linux/slab.h>

86

#include <asm/tlbflush.h>

86

#include <asm/tlbflush.h>

87

#include <asm/pgtable.h>

87

#include <asm/pgtable.h>

88

#include <linux/cpumask.h>

88

#include <linux/cpumask.h>

89

#include <linux/cpu.h>

89

#include <linux/cpu.h>

90

#include <linux/vmalloc.h>

90

#include <linux/vmalloc.h>

91

#include <linux/hardirq.h>

91

#include <linux/hardirq.h>

92

#include <linux/spinlock.h>

92

#include <linux/spinlock.h>

93

#include <linux/types.h>

93

#include <linux/types.h>

94

#include <linux/zsmalloc.h>

94

#include <linux/zsmalloc.h>

95

#include <linux/zpool.h>

95

#include <linux/zpool.h>

96

97

/*

97

/*

98

* This must be power of 2 and greater than of equal to sizeof(link_free).

98

* This must be power of 2 and greater than of equal to sizeof(link_free).

99

* These two conditions ensure that any 'struct link_free' itself doesn't

99

* These two conditions ensure that any 'struct link_free' itself doesn't

100

* span more than 1 page which avoids complex case of mapping 2 pages simply

100

* span more than 1 page which avoids complex case of mapping 2 pages simply

101

* to restore link_free pointer values.

101

* to restore link_free pointer values.

102

*/

102

*/

103

#define ZS_ALIGN 8

103

#define ZS_ALIGN 8

104

105

/*

105

/*

106

* A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)

106

* A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)

107

* pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.

107

* pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.

108

*/

108

*/

109

#define ZS_MAX_ZSPAGE_ORDER 2

109

#define ZS_MAX_ZSPAGE_ORDER 2

110

#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)

110

#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)

111

112

/*

112

/*

113

* Object location (<PFN>, <obj_idx>) is encoded as

113

* Object location (<PFN>, <obj_idx>) is encoded as

114

* as single (unsigned long) handle value.

114

* as single (unsigned long) handle value.

115

*

115

*

116

* Note that object index <obj_idx> is relative to system

116

* Note that object index <obj_idx> is relative to system

117

* page <PFN> it is stored in, so for each sub-page belonging

117

* page <PFN> it is stored in, so for each sub-page belonging

118

* to a zspage, obj_idx starts with 0.

118

* to a zspage, obj_idx starts with 0.

119

*

119

*

120

* This is made more complicated by various memory models and PAE.

120

* This is made more complicated by various memory models and PAE.

121

*/

121

*/

122

123

#ifndef MAX_PHYSMEM_BITS

123

#ifndef MAX_PHYSMEM_BITS

124

#ifdef CONFIG_HIGHMEM64G

124

#ifdef CONFIG_HIGHMEM64G

125

#define MAX_PHYSMEM_BITS 36

125

#define MAX_PHYSMEM_BITS 36

126

#else /* !CONFIG_HIGHMEM64G */

126

#else /* !CONFIG_HIGHMEM64G */

127

/*

127

/*

128

* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just

128

* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just

129

* be PAGE_SHIFT

129

* be PAGE_SHIFT

130

*/

130

*/

131

#define MAX_PHYSMEM_BITS BITS_PER_LONG

131

#define MAX_PHYSMEM_BITS BITS_PER_LONG

132

#endif

132

#endif

133

#endif

133

#endif

134

#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)

134

#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)

135

#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)

135

#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)

136

#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)

136

#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)

137

138

#define MAX(a, b) ((a) >= (b) ? (a) : (b))

138

#define MAX(a, b) ((a) >= (b) ? (a) : (b))

139

/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */

139

/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */

140

#define ZS_MIN_ALLOC_SIZE \

140

#define ZS_MIN_ALLOC_SIZE \

141

MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))

141

MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))

142

#define ZS_MAX_ALLOC_SIZE PAGE_SIZE

142

#define ZS_MAX_ALLOC_SIZE PAGE_SIZE

143

144

/*

144

/*

145

* On systems with 4K page size, this gives 255 size classes! There is a

145

* On systems with 4K page size, this gives 255 size classes! There is a

146

* trader-off here:

146

* trader-off here:

147

* - Large number of size classes is potentially wasteful as free page are

147

* - Large number of size classes is potentially wasteful as free page are

148

* spread across these classes

148

* spread across these classes

149

* - Small number of size classes causes large internal fragmentation

149

* - Small number of size classes causes large internal fragmentation

150

* - Probably its better to use specific size classes (empirically

150

* - Probably its better to use specific size classes (empirically

151

* determined). NOTE: all those class sizes must be set as multiple of

151

* determined). NOTE: all those class sizes must be set as multiple of

152

* ZS_ALIGN to make sure link_free itself never has to span 2 pages.

152

* ZS_ALIGN to make sure link_free itself never has to span 2 pages.

153

*

153

*

154

* ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN

154

* ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN

155

* (reason above)

155

* (reason above)

156

*/

156

*/

157

#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> 8)

157

#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> 8)

158

159

/*

159

/*

160

* We do not maintain any list for completely empty or full pages

160

* We do not maintain any list for completely empty or full pages

161

*/

161

*/

162

enum fullness_group {

162

enum fullness_group {

163

ZS_ALMOST_FULL,

163

ZS_ALMOST_FULL,

164

ZS_ALMOST_EMPTY,

164

ZS_ALMOST_EMPTY,

165

_ZS_NR_FULLNESS_GROUPS,

165

_ZS_NR_FULLNESS_GROUPS,

166

167

ZS_EMPTY,

167

ZS_EMPTY,

168

ZS_FULL

168

ZS_FULL

169

};

169

};

170

171

/*

171

/*

172

* number of size_classes

172

* number of size_classes

173

*/

173

*/

174

static int zs_size_classes;

174

static int zs_size_classes;

175

176

/*

176

/*

177

* We assign a page to ZS_ALMOST_EMPTY fullness group when:

177

* We assign a page to ZS_ALMOST_EMPTY fullness group when:

178

* n <= N / f, where

178

* n <= N / f, where

179

* n = number of allocated objects

179

* n = number of allocated objects

180

* N = total number of objects zspage can store

180

* N = total number of objects zspage can store

181

* f = fullness_threshold_frac

181

* f = fullness_threshold_frac

182

*

182

*

183

* Similarly, we assign zspage to:

183

* Similarly, we assign zspage to:

184

* ZS_ALMOST_FULL when n > N / f

184

* ZS_ALMOST_FULL when n > N / f

185

* ZS_EMPTY when n == 0

185

* ZS_EMPTY when n == 0

186

* ZS_FULL when n == N

186

* ZS_FULL when n == N

187

*

187

*

188

* (see: fix_fullness_group())

188

* (see: fix_fullness_group())

189

*/

189

*/

190

static const int fullness_threshold_frac = 4;

190

static const int fullness_threshold_frac = 4;

191

192

struct size_class {

192

struct size_class {

193

/*

193

/*

194

* Size of objects stored in this class. Must be multiple

194

* Size of objects stored in this class. Must be multiple

195

* of ZS_ALIGN.

195

* of ZS_ALIGN.

196

*/

196

*/

197

int size;

197

int size;

198

unsigned int index;

198

unsigned int index;

199

200

/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */

200

/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */

201

int pages_per_zspage;

201

int pages_per_zspage;

202

203

spinlock_t lock;

203

spinlock_t lock;

204

205

struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];

205

struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];

206

};

206

};

207

208

/*

208

/*

209

* Placed within free objects to form a singly linked list.

209

* Placed within free objects to form a singly linked list.

210

* For every zspage, first_page->freelist gives head of this list.

210

* For every zspage, first_page->freelist gives head of this list.

211

*

211

*

212

* This must be power of 2 and less than or equal to ZS_ALIGN

212

* This must be power of 2 and less than or equal to ZS_ALIGN

213

*/

213

*/

214

struct link_free {

214

struct link_free {

215

/* Handle of next free chunk (encodes <PFN, obj_idx>) */

215

/* Handle of next free chunk (encodes <PFN, obj_idx>) */

216

void *next;

216

void *next;

217

};

217

};

218

219

struct zs_pool {

219

struct zs_pool {

220

struct size_class **size_class;

220

struct size_class **size_class;

221

222

gfp_t flags; /* allocation flags used when growing pool */

222

gfp_t flags; /* allocation flags used when growing pool */

223

atomic_long_t pages_allocated;

223

atomic_long_t pages_allocated;

224

};

224

};

225

226

/*

226

/*

227

* A zspage's class index and fullness group

227

* A zspage's class index and fullness group

228

* are encoded in its (first)page->mapping

228

* are encoded in its (first)page->mapping

229

*/

229

*/

230

#define CLASS_IDX_BITS 28

230

#define CLASS_IDX_BITS 28

231

#define FULLNESS_BITS 4

231

#define FULLNESS_BITS 4

232

#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)

232

#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)

233

#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)

233

#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)

234

235

struct mapping_area {

235

struct mapping_area {

236

#ifdef CONFIG_PGTABLE_MAPPING

236

#ifdef CONFIG_PGTABLE_MAPPING

237

struct vm_struct *vm; /* vm area for mapping object that span pages */

237

struct vm_struct *vm; /* vm area for mapping object that span pages */

238

#else

238

#else

239

char *vm_buf; /* copy buffer for objects that span pages */

239

char *vm_buf; /* copy buffer for objects that span pages */

240

#endif

240

#endif

241

char *vm_addr; /* address of kmap_atomic()'ed pages */

241

char *vm_addr; /* address of kmap_atomic()'ed pages */

242

enum zs_mapmode vm_mm; /* mapping mode */

242

enum zs_mapmode vm_mm; /* mapping mode */

243

};

243

};

244

245

/* zpool driver */

245

/* zpool driver */

246

247

#ifdef CONFIG_ZPOOL

247

#ifdef CONFIG_ZPOOL

248

249

static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)

249

static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)

250

{

250

{

251

return zs_create_pool(gfp);

251

return zs_create_pool(gfp);

252

}

252

}

253

254

static void zs_zpool_destroy(void *pool)

254

static void zs_zpool_destroy(void *pool)

255

{

255

{

256

zs_destroy_pool(pool);

256

zs_destroy_pool(pool);

257

}

257

}

258

259

static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,

259

static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,

260

unsigned long *handle)

260

unsigned long *handle)

261

{

261

{

262

*handle = zs_malloc(pool, size);

262

*handle = zs_malloc(pool, size);

263

return *handle ? 0 : -1;

263

return *handle ? 0 : -1;

264

}

264

}

265

static void zs_zpool_free(void *pool, unsigned long handle)

265

static void zs_zpool_free(void *pool, unsigned long handle)

266

{

266

{

267

zs_free(pool, handle);

267

zs_free(pool, handle);

268

}

268

}

269

270

static int zs_zpool_shrink(void *pool, unsigned int pages,

270

static int zs_zpool_shrink(void *pool, unsigned int pages,

271

unsigned int *reclaimed)

271

unsigned int *reclaimed)

272

{

272

{

273

return -EINVAL;

273

return -EINVAL;

274

}

274

}

275

276

static void *zs_zpool_map(void *pool, unsigned long handle,

276

static void *zs_zpool_map(void *pool, unsigned long handle,

277

enum zpool_mapmode mm)

277

enum zpool_mapmode mm)

278

{

278

{

279

enum zs_mapmode zs_mm;

279

enum zs_mapmode zs_mm;

280

281

switch (mm) {

281

switch (mm) {

282

case ZPOOL_MM_RO:

282

case ZPOOL_MM_RO:

283

zs_mm = ZS_MM_RO;

283

zs_mm = ZS_MM_RO;

284

break;

284

break;

285

case ZPOOL_MM_WO:

285

case ZPOOL_MM_WO:

286

zs_mm = ZS_MM_WO;

286

zs_mm = ZS_MM_WO;

287

break;

287

break;

288

case ZPOOL_MM_RW: /* fallthru */

288

case ZPOOL_MM_RW: /* fallthru */

289

default:

289

default:

290

zs_mm = ZS_MM_RW;

290

zs_mm = ZS_MM_RW;

291

break;

291

break;

292

}

292

}

293

294

return zs_map_object(pool, handle, zs_mm);

294

return zs_map_object(pool, handle, zs_mm);

295

}

295

}

296

static void zs_zpool_unmap(void *pool, unsigned long handle)

296

static void zs_zpool_unmap(void *pool, unsigned long handle)

297

{

297

{

298

zs_unmap_object(pool, handle);

298

zs_unmap_object(pool, handle);

299

}

299

}

300

301

static u64 zs_zpool_total_size(void *pool)

301

static u64 zs_zpool_total_size(void *pool)

302

{

302

{

303

return zs_get_total_pages(pool) << PAGE_SHIFT;

303

return zs_get_total_pages(pool) << PAGE_SHIFT;

304

}

304

}

305

306

static struct zpool_driver zs_zpool_driver = {

306

static struct zpool_driver zs_zpool_driver = {

307

.type = "zsmalloc",

307

.type = "zsmalloc",

308

.owner = THIS_MODULE,

308

.owner = THIS_MODULE,

309

.create = zs_zpool_create,

309

.create = zs_zpool_create,

310

.destroy = zs_zpool_destroy,

310

.destroy = zs_zpool_destroy,

311

.malloc = zs_zpool_malloc,

311

.malloc = zs_zpool_malloc,

312

.free = zs_zpool_free,

312

.free = zs_zpool_free,

313

.shrink = zs_zpool_shrink,

313

.shrink = zs_zpool_shrink,

314

.map = zs_zpool_map,

314

.map = zs_zpool_map,

315

.unmap = zs_zpool_unmap,

315

.unmap = zs_zpool_unmap,

316

.total_size = zs_zpool_total_size,

316

.total_size = zs_zpool_total_size,

317

};

317

};

318

319

MODULE_ALIAS("zpool-zsmalloc");

319

MODULE_ALIAS("zpool-zsmalloc");

320

#endif /* CONFIG_ZPOOL */

320

#endif /* CONFIG_ZPOOL */

321

322

/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */

322

/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */

323

static DEFINE_PER_CPU(struct mapping_area, zs_map_area);

323

static DEFINE_PER_CPU(struct mapping_area, zs_map_area);

324

325

static int is_first_page(struct page *page)

325

static int is_first_page(struct page *page)

326

{

326

{

327

return PagePrivate(page);

327

return PagePrivate(page);

328

}

328

}

329

330

static int is_last_page(struct page *page)

330

static int is_last_page(struct page *page)

331

{

331

{

332

return PagePrivate2(page);

332

return PagePrivate2(page);

333

}

333

}

334

335

static void get_zspage_mapping(struct page *page, unsigned int *class_idx,

335

static void get_zspage_mapping(struct page *page, unsigned int *class_idx,

336

enum fullness_group *fullness)

336

enum fullness_group *fullness)

337

{

337

{

338

unsigned long m;

338

unsigned long m;

339

BUG_ON(!is_first_page(page));

339

BUG_ON(!is_first_page(page));

340

341

m = (unsigned long)page->mapping;

341

m = (unsigned long)page->mapping;

342

*fullness = m & FULLNESS_MASK;

342

*fullness = m & FULLNESS_MASK;

343

*class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK;

343

*class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK;

344

}

344

}

345

346

static void set_zspage_mapping(struct page *page, unsigned int class_idx,

346

static void set_zspage_mapping(struct page *page, unsigned int class_idx,

347

enum fullness_group fullness)

347

enum fullness_group fullness)

348

{

348

{

349

unsigned long m;

349

unsigned long m;

350

BUG_ON(!is_first_page(page));

350

BUG_ON(!is_first_page(page));

351

352

m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) |

352

m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) |

353

(fullness & FULLNESS_MASK);

353

(fullness & FULLNESS_MASK);

354

page->mapping = (struct address_space *)m;

354

page->mapping = (struct address_space *)m;

355

}

355

}

356

357

/*

357

/*

358

* zsmalloc divides the pool into various size classes where each

358

* zsmalloc divides the pool into various size classes where each

359

* class maintains a list of zspages where each zspage is divided

359

* class maintains a list of zspages where each zspage is divided

360

* into equal sized chunks. Each allocation falls into one of these

360

* into equal sized chunks. Each allocation falls into one of these

361

* classes depending on its size. This function returns index of the

361

* classes depending on its size. This function returns index of the

362

* size class which has chunk size big enough to hold the give size.

362

* size class which has chunk size big enough to hold the give size.

363

*/

363

*/

364

static int get_size_class_index(int size)

364

static int get_size_class_index(int size)

365

{

365

{

366

int idx = 0;

366

int idx = 0;

367

368

if (likely(size > ZS_MIN_ALLOC_SIZE))

368

if (likely(size > ZS_MIN_ALLOC_SIZE))

369

idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,

369

idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,

370

ZS_SIZE_CLASS_DELTA);

370

ZS_SIZE_CLASS_DELTA);

371

372

return idx;

372

return idx;

373

}

373

}

374

375

/*

375

/*

376

* For each size class, zspages are divided into different groups

376

* For each size class, zspages are divided into different groups

377

* depending on how "full" they are. This was done so that we could

377

* depending on how "full" they are. This was done so that we could

378

* easily find empty or nearly empty zspages when we try to shrink

378

* easily find empty or nearly empty zspages when we try to shrink

379

* the pool (not yet implemented). This function returns fullness

379

* the pool (not yet implemented). This function returns fullness

380

* status of the given page.

380

* status of the given page.

381

*/

381

*/

382

static enum fullness_group get_fullness_group(struct page *page)

382

static enum fullness_group get_fullness_group(struct page *page)

383

{

383

{

384

int inuse, max_objects;

384

int inuse, max_objects;

385

enum fullness_group fg;

385

enum fullness_group fg;

386

BUG_ON(!is_first_page(page));

386

BUG_ON(!is_first_page(page));

387

388

inuse = page->inuse;

388

inuse = page->inuse;

389

max_objects = page->objects;

389

max_objects = page->objects;

390

391

if (inuse == 0)

391

if (inuse == 0)

392

fg = ZS_EMPTY;

392

fg = ZS_EMPTY;

393

else if (inuse == max_objects)

393

else if (inuse == max_objects)

394

fg = ZS_FULL;

394

fg = ZS_FULL;

395

else if (inuse <= max_objects / fullness_threshold_frac)

395

else if (inuse <= max_objects / fullness_threshold_frac)

396

fg = ZS_ALMOST_EMPTY;

396

fg = ZS_ALMOST_EMPTY;

397

else

397

else

398

fg = ZS_ALMOST_FULL;

398

fg = ZS_ALMOST_FULL;

399

400

return fg;

400

return fg;

401

}

401

}

402

403

/*

403

/*

404

* Each size class maintains various freelists and zspages are assigned

404

* Each size class maintains various freelists and zspages are assigned

405

* to one of these freelists based on the number of live objects they

405

* to one of these freelists based on the number of live objects they

406

* have. This functions inserts the given zspage into the freelist

406

* have. This functions inserts the given zspage into the freelist

407

* identified by <class, fullness_group>.

407

* identified by <class, fullness_group>.

408

*/

408

*/

409

static void insert_zspage(struct page *page, struct size_class *class,

409

static void insert_zspage(struct page *page, struct size_class *class,

410

enum fullness_group fullness)

410

enum fullness_group fullness)

411

{

411

{

412

struct page **head;

412

struct page **head;

413

414

BUG_ON(!is_first_page(page));

414

BUG_ON(!is_first_page(page));

415

416

if (fullness >= _ZS_NR_FULLNESS_GROUPS)

416

if (fullness >= _ZS_NR_FULLNESS_GROUPS)

417

return;

417

return;

418

419

head = &class->fullness_list[fullness];

419

head = &class->fullness_list[fullness];

420

if (*head)

420

if (*head)

421

list_add_tail(&page->lru, &(*head)->lru);

421

list_add_tail(&page->lru, &(*head)->lru);

422

423

*head = page;

423

*head = page;

424

}

424

}

425

426

/*

426

/*

427

* This function removes the given zspage from the freelist identified

427

* This function removes the given zspage from the freelist identified

428

* by <class, fullness_group>.

428

* by <class, fullness_group>.

429

*/

429

*/

430

static void remove_zspage(struct page *page, struct size_class *class,

430

static void remove_zspage(struct page *page, struct size_class *class,

431

enum fullness_group fullness)

431

enum fullness_group fullness)

432

{

432

{

433

struct page **head;

433

struct page **head;

434

435

BUG_ON(!is_first_page(page));

435

BUG_ON(!is_first_page(page));

436

437

if (fullness >= _ZS_NR_FULLNESS_GROUPS)

437

if (fullness >= _ZS_NR_FULLNESS_GROUPS)

438

return;

438

return;

439

440

head = &class->fullness_list[fullness];

440

head = &class->fullness_list[fullness];

441

BUG_ON(!*head);

441

BUG_ON(!*head);

442

if (list_empty(&(*head)->lru))

442

if (list_empty(&(*head)->lru))

443

*head = NULL;

443

*head = NULL;

444

else if (*head == page)

444

else if (*head == page)

445

*head = (struct page *)list_entry((*head)->lru.next,

445

*head = (struct page *)list_entry((*head)->lru.next,

446

struct page, lru);

446

struct page, lru);

447

448

list_del_init(&page->lru);

448

list_del_init(&page->lru);

449

}

449

}

450

451

/*

451

/*

452

* Each size class maintains zspages in different fullness groups depending

452

* Each size class maintains zspages in different fullness groups depending

453

* on the number of live objects they contain. When allocating or freeing

453

* on the number of live objects they contain. When allocating or freeing

454

* objects, the fullness status of the page can change, say, from ALMOST_FULL

454

* objects, the fullness status of the page can change, say, from ALMOST_FULL

455

* to ALMOST_EMPTY when freeing an object. This function checks if such

455

* to ALMOST_EMPTY when freeing an object. This function checks if such

456

* a status change has occurred for the given page and accordingly moves the

456

* a status change has occurred for the given page and accordingly moves the

457

* page from the freelist of the old fullness group to that of the new

457

* page from the freelist of the old fullness group to that of the new

458

* fullness group.

458

* fullness group.

459

*/

459

*/

460

static enum fullness_group fix_fullness_group(struct zs_pool *pool,

460

static enum fullness_group fix_fullness_group(struct zs_pool *pool,

461

struct page *page)

461

struct page *page)

462

{

462

{

463

int class_idx;

463

int class_idx;

464

struct size_class *class;

464

struct size_class *class;

465

enum fullness_group currfg, newfg;

465

enum fullness_group currfg, newfg;

466

467

BUG_ON(!is_first_page(page));

467

BUG_ON(!is_first_page(page));

468

469

get_zspage_mapping(page, &class_idx, &currfg);

469

get_zspage_mapping(page, &class_idx, &currfg);

470

newfg = get_fullness_group(page);

470

newfg = get_fullness_group(page);

471

if (newfg == currfg)

471

if (newfg == currfg)

472

goto out;

472

goto out;

473

474

class = pool->size_class[class_idx];

474

class = pool->size_class[class_idx];

475

remove_zspage(page, class, currfg);

475

remove_zspage(page, class, currfg);

476

insert_zspage(page, class, newfg);

476

insert_zspage(page, class, newfg);

477

set_zspage_mapping(page, class_idx, newfg);

477

set_zspage_mapping(page, class_idx, newfg);

478

479

out:

479

out:

480

return newfg;

480

return newfg;

481

}

481

}

482

483

/*

483

/*

484

* We have to decide on how many pages to link together

484

* We have to decide on how many pages to link together

485

* to form a zspage for each size class. This is important

485

* to form a zspage for each size class. This is important

486

* to reduce wastage due to unusable space left at end of

486

* to reduce wastage due to unusable space left at end of

487

* each zspage which is given as:

487

* each zspage which is given as:

488

* wastage = Zp - Zp % size_class

488

* wastage = Zp - Zp % size_class

489

* where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...

489

* where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...

490

*

490

*

491

* For example, for size class of 3/8 * PAGE_SIZE, we should

491

* For example, for size class of 3/8 * PAGE_SIZE, we should

492

* link together 3 PAGE_SIZE sized pages to form a zspage

492

* link together 3 PAGE_SIZE sized pages to form a zspage

493

* since then we can perfectly fit in 8 such objects.

493

* since then we can perfectly fit in 8 such objects.

494

*/

494

*/

495

static int get_pages_per_zspage(int class_size)

495

static int get_pages_per_zspage(int class_size)

496

{

496

{

497

int i, max_usedpc = 0;

497

int i, max_usedpc = 0;

498

/* zspage order which gives maximum used size per KB */

498

/* zspage order which gives maximum used size per KB */

499

int max_usedpc_order = 1;

499

int max_usedpc_order = 1;

500

501

for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {

501

for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {

502

int zspage_size;

502

int zspage_size;

503

int waste, usedpc;

503

int waste, usedpc;

504

505

zspage_size = i * PAGE_SIZE;

505

zspage_size = i * PAGE_SIZE;

506

waste = zspage_size % class_size;

506

waste = zspage_size % class_size;

507

usedpc = (zspage_size - waste) * 100 / zspage_size;

507

usedpc = (zspage_size - waste) * 100 / zspage_size;

508

509

if (usedpc > max_usedpc) {

509

if (usedpc > max_usedpc) {

510

max_usedpc = usedpc;

510

max_usedpc = usedpc;

511

max_usedpc_order = i;

511

max_usedpc_order = i;

512

}

512

}

513

}

513

}

514

515

return max_usedpc_order;

515

return max_usedpc_order;

516

}

516

}

517

518

/*

518

/*

519

* A single 'zspage' is composed of many system pages which are

519

* A single 'zspage' is composed of many system pages which are

520

* linked together using fields in struct page. This function finds

520

* linked together using fields in struct page. This function finds

521

* the first/head page, given any component page of a zspage.

521

* the first/head page, given any component page of a zspage.

522

*/

522

*/

523

static struct page *get_first_page(struct page *page)

523

static struct page *get_first_page(struct page *page)

524

{

524

{

525

if (is_first_page(page))

525

if (is_first_page(page))

526

return page;

526

return page;

527

else

527

else

528

return page->first_page;

528

return page->first_page;

529

}

529

}

530

531

static struct page *get_next_page(struct page *page)

531

static struct page *get_next_page(struct page *page)

532

{

532

{

533

struct page *next;

533

struct page *next;

534

535

if (is_last_page(page))

535

if (is_last_page(page))

536

next = NULL;

536

next = NULL;

537

else if (is_first_page(page))

537

else if (is_first_page(page))

538

next = (struct page *)page_private(page);

538

next = (struct page *)page_private(page);

539

else

539

else

540

next = list_entry(page->lru.next, struct page, lru);

540

next = list_entry(page->lru.next, struct page, lru);

541

542

return next;

542

return next;

543

}

543

}

544

545

/*

545

/*

546

* Encode <page, obj_idx> as a single handle value.

546

* Encode <page, obj_idx> as a single handle value.

547

* On hardware platforms with physical memory starting at 0x0 the pfn

547

* On hardware platforms with physical memory starting at 0x0 the pfn

548

* could be 0 so we ensure that the handle will never be 0 by adjusting the

548

* could be 0 so we ensure that the handle will never be 0 by adjusting the

549

* encoded obj_idx value before encoding.

549

* encoded obj_idx value before encoding.

550

*/

550

*/

551

static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)

551

static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)

552

{

552

{

553

unsigned long handle;

553

unsigned long handle;

554

555

if (!page) {

555

if (!page) {

556

BUG_ON(obj_idx);

556

BUG_ON(obj_idx);

557

return NULL;

557

return NULL;

558

}

558

}

559

560

handle = page_to_pfn(page) << OBJ_INDEX_BITS;

560

handle = page_to_pfn(page) << OBJ_INDEX_BITS;

561

handle |= ((obj_idx + 1) & OBJ_INDEX_MASK);

561

handle |= ((obj_idx + 1) & OBJ_INDEX_MASK);

562

563

return (void *)handle;

563

return (void *)handle;

564

}

564

}

565

566

/*

566

/*

567

* Decode <page, obj_idx> pair from the given object handle. We adjust the

567

* Decode <page, obj_idx> pair from the given object handle. We adjust the

568

* decoded obj_idx back to its original value since it was adjusted in

568

* decoded obj_idx back to its original value since it was adjusted in

569

* obj_location_to_handle().

569

* obj_location_to_handle().

570

*/

570

*/

571

static void obj_handle_to_location(unsigned long handle, struct page **page,

571

static void obj_handle_to_location(unsigned long handle, struct page **page,

572

unsigned long *obj_idx)

572

unsigned long *obj_idx)

573

{

573

{

574

*page = pfn_to_page(handle >> OBJ_INDEX_BITS);

574

*page = pfn_to_page(handle >> OBJ_INDEX_BITS);

575

*obj_idx = (handle & OBJ_INDEX_MASK) - 1;

575

*obj_idx = (handle & OBJ_INDEX_MASK) - 1;

576

}

576

}

577

578

static unsigned long obj_idx_to_offset(struct page *page,

578

static unsigned long obj_idx_to_offset(struct page *page,

579

unsigned long obj_idx, int class_size)

579

unsigned long obj_idx, int class_size)

580

{

580

{

581

unsigned long off = 0;

581

unsigned long off = 0;

582

583

if (!is_first_page(page))

583

if (!is_first_page(page))

584

off = page->index;

584

off = page->index;

585

586

return off + obj_idx * class_size;

586

return off + obj_idx * class_size;

587

}

587

}

588

589

static void reset_page(struct page *page)

589

static void reset_page(struct page *page)

590

{

590

{

591

clear_bit(PG_private, &page->flags);

591

clear_bit(PG_private, &page->flags);

592

clear_bit(PG_private_2, &page->flags);

592

clear_bit(PG_private_2, &page->flags);

593

set_page_private(page, 0);

593

set_page_private(page, 0);

594

page->mapping = NULL;

594

page->mapping = NULL;

595

page->freelist = NULL;

595

page->freelist = NULL;

596

page_mapcount_reset(page);

596

page_mapcount_reset(page);

597

}

597

}

598

599

static void free_zspage(struct page *first_page)

599

static void free_zspage(struct page *first_page)

600

{

600

{

601

struct page *nextp, *tmp, *head_extra;

601

struct page *nextp, *tmp, *head_extra;

602

603

BUG_ON(!is_first_page(first_page));

603

BUG_ON(!is_first_page(first_page));

604

BUG_ON(first_page->inuse);

604

BUG_ON(first_page->inuse);

605

606

head_extra = (struct page *)page_private(first_page);

606

head_extra = (struct page *)page_private(first_page);

607

608

reset_page(first_page);

608

reset_page(first_page);

609

__free_page(first_page);

609

__free_page(first_page);

610

611

/* zspage with only 1 system page */

611

/* zspage with only 1 system page */

612

if (!head_extra)

612

if (!head_extra)

613

return;

613

return;

614

615

list_for_each_entry_safe(nextp, tmp, &head_extra->lru, lru) {

615

list_for_each_entry_safe(nextp, tmp, &head_extra->lru, lru) {

616

list_del(&nextp->lru);

616

list_del(&nextp->lru);

617

reset_page(nextp);

617

reset_page(nextp);

618

__free_page(nextp);

618

__free_page(nextp);

619

}

619

}

620

reset_page(head_extra);

620

reset_page(head_extra);

621

__free_page(head_extra);

621

__free_page(head_extra);

622

}

622

}

623

624

/* Initialize a newly allocated zspage */

624

/* Initialize a newly allocated zspage */

625

static void init_zspage(struct page *first_page, struct size_class *class)

625

static void init_zspage(struct page *first_page, struct size_class *class)

626

{

626

{

627

unsigned long off = 0;

627

unsigned long off = 0;

628

struct page *page = first_page;

628

struct page *page = first_page;

629

630

BUG_ON(!is_first_page(first_page));

630

BUG_ON(!is_first_page(first_page));

631

while (page) {

631

while (page) {

632

struct page *next_page;

632

struct page *next_page;

633

struct link_free *link;

633

struct link_free *link;

634

unsigned int i = 1;

634

unsigned int i = 1;

635

void *vaddr;

635

void *vaddr;

636

637

/*

637

/*

638

* page->index stores offset of first object starting

638

* page->index stores offset of first object starting

639

* in the page. For the first page, this is always 0,

639

* in the page. For the first page, this is always 0,

640

* so we use first_page->index (aka ->freelist) to store

640

* so we use first_page->index (aka ->freelist) to store

641

* head of corresponding zspage's freelist.

641

* head of corresponding zspage's freelist.

642

*/

642

*/

643

if (page != first_page)

643

if (page != first_page)

644

page->index = off;

644

page->index = off;

645

646

vaddr = kmap_atomic(page);

646

vaddr = kmap_atomic(page);

647

link = (struct link_free *)vaddr + off / sizeof(*link);

647

link = (struct link_free *)vaddr + off / sizeof(*link);

648

649

while ((off += class->size) < PAGE_SIZE) {

649

while ((off += class->size) < PAGE_SIZE) {

650

link->next = obj_location_to_handle(page, i++);

650

link->next = obj_location_to_handle(page, i++);

651

link += class->size / sizeof(*link);

651

link += class->size / sizeof(*link);

652

}

652

}

653

654

/*

654

/*

655

* We now come to the last (full or partial) object on this

655

* We now come to the last (full or partial) object on this

656

* page, which must point to the first object on the next

656

* page, which must point to the first object on the next

657

* page (if present)

657

* page (if present)

658

*/

658

*/

659

next_page = get_next_page(page);

659

next_page = get_next_page(page);

660

link->next = obj_location_to_handle(next_page, 0);

660

link->next = obj_location_to_handle(next_page, 0);

661

kunmap_atomic(vaddr);

661

kunmap_atomic(vaddr);

662

page = next_page;

662

page = next_page;

663

off %= PAGE_SIZE;

663

off %= PAGE_SIZE;

664

}

664

}

665

}

665

}

666

667

/*

667

/*

668

* Allocate a zspage for the given size class

668

* Allocate a zspage for the given size class

669

*/

669

*/

670

static struct page *alloc_zspage(struct size_class *class, gfp_t flags)

670

static struct page *alloc_zspage(struct size_class *class, gfp_t flags)

671

{

671

{

672

int i, error;

672

int i, error;

673

struct page *first_page = NULL, *uninitialized_var(prev_page);

673

struct page *first_page = NULL, *uninitialized_var(prev_page);

674

675

/*

675

/*

676

* Allocate individual pages and link them together as:

676

* Allocate individual pages and link them together as:

677

* 1. first page->private = first sub-page

677

* 1. first page->private = first sub-page

678

* 2. all sub-pages are linked together using page->lru

678

* 2. all sub-pages are linked together using page->lru

679

* 3. each sub-page is linked to the first page using page->first_page

679

* 3. each sub-page is linked to the first page using page->first_page

680

*

680

*

681

* For each size class, First/Head pages are linked together using

681

* For each size class, First/Head pages are linked together using

682

* page->lru. Also, we set PG_private to identify the first page

682

* page->lru. Also, we set PG_private to identify the first page

683

* (i.e. no other sub-page has this flag set) and PG_private_2 to

683

* (i.e. no other sub-page has this flag set) and PG_private_2 to

684

* identify the last page.

684

* identify the last page.

685

*/

685

*/

686

error = -ENOMEM;

686

error = -ENOMEM;

687

for (i = 0; i < class->pages_per_zspage; i++) {

687

for (i = 0; i < class->pages_per_zspage; i++) {

688

struct page *page;

688

struct page *page;

689

690

page = alloc_page(flags);

690

page = alloc_page(flags);

691

if (!page)

691

if (!page)

692

goto cleanup;

692

goto cleanup;

693

694

INIT_LIST_HEAD(&page->lru);

694

INIT_LIST_HEAD(&page->lru);

695

if (i == 0) { /* first page */

695

if (i == 0) { /* first page */

696

SetPagePrivate(page);

696

SetPagePrivate(page);

697

set_page_private(page, 0);

697

set_page_private(page, 0);

698

first_page = page;

698

first_page = page;

699

first_page->inuse = 0;

699

first_page->inuse = 0;

700

}

700

}

701

if (i == 1)

701

if (i == 1)

702

set_page_private(first_page, (unsigned long)page);

702

set_page_private(first_page, (unsigned long)page);

703

if (i >= 1)

703

if (i >= 1)

704

page->first_page = first_page;

704

page->first_page = first_page;

705

if (i >= 2)

705

if (i >= 2)

706

list_add(&page->lru, &prev_page->lru);

706

list_add(&page->lru, &prev_page->lru);

707

if (i == class->pages_per_zspage - 1) /* last page */

707

if (i == class->pages_per_zspage - 1) /* last page */

708

SetPagePrivate2(page);

708

SetPagePrivate2(page);

709

prev_page = page;

709

prev_page = page;

710

}

710

}

711

712

init_zspage(first_page, class);

712

init_zspage(first_page, class);

713

714

first_page->freelist = obj_location_to_handle(first_page, 0);

714

first_page->freelist = obj_location_to_handle(first_page, 0);

715

/* Maximum number of objects we can store in this zspage */

715

/* Maximum number of objects we can store in this zspage */

716

first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;

716

first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;

717

718

error = 0; /* Success */

718

error = 0; /* Success */

719

720

cleanup:

720

cleanup:

721

if (unlikely(error) && first_page) {

721

if (unlikely(error) && first_page) {

722

free_zspage(first_page);

722

free_zspage(first_page);

723

first_page = NULL;

723

first_page = NULL;

724

}

724

}

725

726

return first_page;

726

return first_page;

727

}

727

}

728

729

static struct page *find_get_zspage(struct size_class *class)

729

static struct page *find_get_zspage(struct size_class *class)

730

{

730

{

731

int i;

731

int i;

732

struct page *page;

732

struct page *page;

733

734

for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {

734

for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {

735

page = class->fullness_list[i];

735

page = class->fullness_list[i];

736

if (page)

736

if (page)

737

break;

737

break;

738

}

738

}

739

740

return page;

740

return page;

741

}

741

}

742

743

#ifdef CONFIG_PGTABLE_MAPPING

743

#ifdef CONFIG_PGTABLE_MAPPING

744

static inline int __zs_cpu_up(struct mapping_area *area)

744

static inline int __zs_cpu_up(struct mapping_area *area)

745

{

745

{

746

/*

746

/*

747

* Make sure we don't leak memory if a cpu UP notification

747

* Make sure we don't leak memory if a cpu UP notification

748

* and zs_init() race and both call zs_cpu_up() on the same cpu

748

* and zs_init() race and both call zs_cpu_up() on the same cpu

749

*/

749

*/

750

if (area->vm)

750

if (area->vm)

751

return 0;

751

return 0;

752

area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);

752

area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);

753

if (!area->vm)

753

if (!area->vm)

754

return -ENOMEM;

754

return -ENOMEM;

755

return 0;

755

return 0;

756

}

756

}

757

758

static inline void __zs_cpu_down(struct mapping_area *area)

758

static inline void __zs_cpu_down(struct mapping_area *area)

759

{

759

{

760

if (area->vm)

760

if (area->vm)

761

free_vm_area(area->vm);

761

free_vm_area(area->vm);

762

area->vm = NULL;

762

area->vm = NULL;

763

}

763

}

764

765

static inline void *__zs_map_object(struct mapping_area *area,

765

static inline void *__zs_map_object(struct mapping_area *area,

766

struct page *pages[2], int off, int size)

766

struct page *pages[2], int off, int size)

767

{

767

{

768

BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));

768

BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));

769

area->vm_addr = area->vm->addr;

769

area->vm_addr = area->vm->addr;

770

return area->vm_addr + off;

770

return area->vm_addr + off;

771

}

771

}

772

773

static inline void __zs_unmap_object(struct mapping_area *area,

773

static inline void __zs_unmap_object(struct mapping_area *area,

774

struct page *pages[2], int off, int size)

774

struct page *pages[2], int off, int size)

775

{

775

{

776

unsigned long addr = (unsigned long)area->vm_addr;

776

unsigned long addr = (unsigned long)area->vm_addr;

777

778

unmap_kernel_range(addr, PAGE_SIZE * 2);

778

unmap_kernel_range(addr, PAGE_SIZE * 2);

779

}

779

}

780

781

#else /* CONFIG_PGTABLE_MAPPING */

781

#else /* CONFIG_PGTABLE_MAPPING */

782

783

static inline int __zs_cpu_up(struct mapping_area *area)

783

static inline int __zs_cpu_up(struct mapping_area *area)

784

{

784

{

785

/*

785

/*

786

* Make sure we don't leak memory if a cpu UP notification

786

* Make sure we don't leak memory if a cpu UP notification

787

* and zs_init() race and both call zs_cpu_up() on the same cpu

787

* and zs_init() race and both call zs_cpu_up() on the same cpu

788

*/

788

*/

789

if (area->vm_buf)

789

if (area->vm_buf)

790

return 0;

790

return 0;

791

area->vm_buf = kmalloc(ZS_MAX_ALLOC_SIZE, GFP_KERNEL);

791

area->vm_buf = kmalloc(ZS_MAX_ALLOC_SIZE, GFP_KERNEL);

792

if (!area->vm_buf)

792

if (!area->vm_buf)

793

return -ENOMEM;

793

return -ENOMEM;

794

return 0;

794

return 0;

795

}

795

}

796

797

static inline void __zs_cpu_down(struct mapping_area *area)

797

static inline void __zs_cpu_down(struct mapping_area *area)

798

{

798

{

799

kfree(area->vm_buf);

799

kfree(area->vm_buf);

800

area->vm_buf = NULL;

800

area->vm_buf = NULL;

801

}

801

}

802

803

static void *__zs_map_object(struct mapping_area *area,

803

static void *__zs_map_object(struct mapping_area *area,

804

struct page *pages[2], int off, int size)

804

struct page *pages[2], int off, int size)

805

{

805

{

806

int sizes[2];

806

int sizes[2];

807

void *addr;

807

void *addr;

808

char *buf = area->vm_buf;

808

char *buf = area->vm_buf;

809

810

/* disable page faults to match kmap_atomic() return conditions */

810

/* disable page faults to match kmap_atomic() return conditions */

811

pagefault_disable();

811

pagefault_disable();

812

813

/* no read fastpath */

813

/* no read fastpath */

814

if (area->vm_mm == ZS_MM_WO)

814

if (area->vm_mm == ZS_MM_WO)

815

goto out;

815

goto out;

816

817

sizes[0] = PAGE_SIZE - off;

817

sizes[0] = PAGE_SIZE - off;

818

sizes[1] = size - sizes[0];

818

sizes[1] = size - sizes[0];

819

820

/* copy object to per-cpu buffer */

820

/* copy object to per-cpu buffer */

821

addr = kmap_atomic(pages[0]);

821

addr = kmap_atomic(pages[0]);

822

memcpy(buf, addr + off, sizes[0]);

822

memcpy(buf, addr + off, sizes[0]);

823

kunmap_atomic(addr);

823

kunmap_atomic(addr);

824

addr = kmap_atomic(pages[1]);

824

addr = kmap_atomic(pages[1]);

825

memcpy(buf + sizes[0], addr, sizes[1]);

825

memcpy(buf + sizes[0], addr, sizes[1]);

826

kunmap_atomic(addr);

826

kunmap_atomic(addr);

827

out:

827

out:

828

return area->vm_buf;

828

return area->vm_buf;

829

}

829

}

830

831

static void __zs_unmap_object(struct mapping_area *area,

831

static void __zs_unmap_object(struct mapping_area *area,

832

struct page *pages[2], int off, int size)

832

struct page *pages[2], int off, int size)

833

{

833

{

834

int sizes[2];

834

int sizes[2];

835

void *addr;

835

void *addr;

836

char *buf = area->vm_buf;

836

char *buf = area->vm_buf;

837

838

/* no write fastpath */

838

/* no write fastpath */

839

if (area->vm_mm == ZS_MM_RO)

839

if (area->vm_mm == ZS_MM_RO)

840

goto out;

840

goto out;

841

842

sizes[0] = PAGE_SIZE - off;

842

sizes[0] = PAGE_SIZE - off;

843

sizes[1] = size - sizes[0];

843

sizes[1] = size - sizes[0];

844

845

/* copy per-cpu buffer to object */

845

/* copy per-cpu buffer to object */

846

addr = kmap_atomic(pages[0]);

846

addr = kmap_atomic(pages[0]);

847

memcpy(addr + off, buf, sizes[0]);

847

memcpy(addr + off, buf, sizes[0]);

848

kunmap_atomic(addr);

848

kunmap_atomic(addr);

849

addr = kmap_atomic(pages[1]);

849

addr = kmap_atomic(pages[1]);

850

memcpy(addr, buf + sizes[0], sizes[1]);

850

memcpy(addr, buf + sizes[0], sizes[1]);

851

kunmap_atomic(addr);

851

kunmap_atomic(addr);

852

853

out:

853

out:

854

/* enable page faults to match kunmap_atomic() return conditions */

854

/* enable page faults to match kunmap_atomic() return conditions */

855

pagefault_enable();

855

pagefault_enable();

856

}

856

}

857

858

#endif /* CONFIG_PGTABLE_MAPPING */

858

#endif /* CONFIG_PGTABLE_MAPPING */

859

860

static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,

860

static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,

861

void *pcpu)

861

void *pcpu)

862

{

862

{

863

int ret, cpu = (long)pcpu;

863

int ret, cpu = (long)pcpu;

864

struct mapping_area *area;

864

struct mapping_area *area;

865

866

switch (action) {

866

switch (action) {

867

case CPU_UP_PREPARE:

867

case CPU_UP_PREPARE:

868

area = &per_cpu(zs_map_area, cpu);

868

area = &per_cpu(zs_map_area, cpu);

869

ret = __zs_cpu_up(area);

869

ret = __zs_cpu_up(area);

870

if (ret)

870

if (ret)

871

return notifier_from_errno(ret);

871

return notifier_from_errno(ret);

872

break;

872

break;

873

case CPU_DEAD:

873

case CPU_DEAD:

874

case CPU_UP_CANCELED:

874

case CPU_UP_CANCELED:

875

area = &per_cpu(zs_map_area, cpu);

875

area = &per_cpu(zs_map_area, cpu);

876

__zs_cpu_down(area);

876

__zs_cpu_down(area);

877

break;

877

break;

878

}

878

}

879

880

return NOTIFY_OK;

880

return NOTIFY_OK;

881

}

881

}

882

883

static struct notifier_block zs_cpu_nb = {

883

static struct notifier_block zs_cpu_nb = {

884

.notifier_call = zs_cpu_notifier

884

.notifier_call = zs_cpu_notifier

885

};

885

};

886

887

static void zs_unregister_cpu_notifier(void)

888

{

889

int cpu;

890

891

cpu_notifier_register_begin();

892

893

for_each_online_cpu(cpu)

894

zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);

895

__unregister_cpu_notifier(&zs_cpu_nb);

896

897

cpu_notifier_register_done();

898

}

899

900

static int zs_register_cpu_notifier(void)

887

static int zs_register_cpu_notifier(void)

901

{

888

{

902

int cpu, uninitialized_var(ret);

889

int cpu, uninitialized_var(ret);

903

890

904

cpu_notifier_register_begin();

891

cpu_notifier_register_begin();

905

892

906

__register_cpu_notifier(&zs_cpu_nb);

893

__register_cpu_notifier(&zs_cpu_nb);

907

for_each_online_cpu(cpu) {

894

for_each_online_cpu(cpu) {

908

ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu);

895

ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu);

909

if (notifier_to_errno(ret))

896

if (notifier_to_errno(ret))

910

break;

897

break;

911

}

898

}

912

899

913

cpu_notifier_register_done();

900

cpu_notifier_register_done();

914

return notifier_to_errno(ret);

901

return notifier_to_errno(ret);

915

}

902

}

916

903

904

static void zs_unregister_cpu_notifier(void)

905

{

906

int cpu;

907

908

cpu_notifier_register_begin();

909

910

for_each_online_cpu(cpu)

911

zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);

912

__unregister_cpu_notifier(&zs_cpu_nb);

913

914

cpu_notifier_register_done();

915

}

916

917

static void init_zs_size_classes(void)

917

static void init_zs_size_classes(void)

918

{

918

{

919

int nr;

919

int nr;

920

921

nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;

921

nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;

922

if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)

922

if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)

923

nr += 1;

923

nr += 1;

924

925

zs_size_classes = nr;

925

zs_size_classes = nr;

926

}

926

}

927

928

static void __exit zs_exit(void)

929

{

930

#ifdef CONFIG_ZPOOL

931

zpool_unregister_driver(&zs_zpool_driver);

932

#endif

933

zs_unregister_cpu_notifier();

934

}

935

936

static int __init zs_init(void)

937

{

938

int ret = zs_register_cpu_notifier();

939

940

if (ret) {

941

zs_unregister_cpu_notifier();

942

return ret;

943

}

944

945

init_zs_size_classes();

946

947

#ifdef CONFIG_ZPOOL

948

zpool_register_driver(&zs_zpool_driver);

949

#endif

950

return 0;

951

}

952

953

static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)

928

static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)

954

{

929

{

955

return pages_per_zspage * PAGE_SIZE / size;

930

return pages_per_zspage * PAGE_SIZE / size;

956

}

931

}

957

932

958

static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)

933

static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)

959

{

934

{

960

if (prev->pages_per_zspage != pages_per_zspage)

935

if (prev->pages_per_zspage != pages_per_zspage)

961

return false;

936

return false;

962

937

963

if (get_maxobj_per_zspage(prev->size, prev->pages_per_zspage)

938

if (get_maxobj_per_zspage(prev->size, prev->pages_per_zspage)

964

!= get_maxobj_per_zspage(size, pages_per_zspage))

939

!= get_maxobj_per_zspage(size, pages_per_zspage))

965

return false;

940

return false;

966

941

967

return true;

942

return true;

968

}

943

}

969

944

945

unsigned long zs_get_total_pages(struct zs_pool *pool)

946

{

947

return atomic_long_read(&pool->pages_allocated);

948

}

949

EXPORT_SYMBOL_GPL(zs_get_total_pages);

950

970

/**

951

/**

971

* zs_create_pool - Creates an allocation pool to work from.

952

* zs_map_object - get address of allocated object from handle.

972

* @flags: allocation flags used to allocate pool metadata

953

* @pool: pool from which the object was allocated

954

* @handle: handle returned from zs_malloc

973

*

955

*

974

* This function must be called before anything when using

956

* Before using an object allocated from zs_malloc, it must be mapped using

975

* the zsmalloc allocator.

957

* this function. When done with the object, it must be unmapped using

958

* zs_unmap_object.

976

*

959

*

977

* On success, a pointer to the newly created pool is returned,

960

* Only one object can be mapped per cpu at a time. There is no protection

978

* otherwise NULL.

961

* against nested mappings.

962

*

963

* This function returns with preemption and page faults disabled.

979

*/

964

*/

980

struct zs_pool *zs_create_pool(gfp_t flags)

965

void *zs_map_object(struct zs_pool *pool, unsigned long handle,

966

enum zs_mapmode mm)

981

{

967

{

982

int i;

968

struct page *page;

983

struct zs_pool *pool;

969

unsigned long obj_idx, off;

984

struct size_class *prev_class = NULL;

985

970

986

pool = kzalloc(sizeof(*pool), GFP_KERNEL);

971

unsigned int class_idx;

987

if (!pool)

972

enum fullness_group fg;

988

return NULL;

973

struct size_class *class;

974

struct mapping_area *area;

975

struct page *pages[2];

989

976

990

pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),

977

BUG_ON(!handle);

991

GFP_KERNEL);

992

if (!pool->size_class) {

993

kfree(pool);

994

return NULL;

995

}

996

978

997

/*

979

/*

998

* Iterate reversly, because, size of size_class that we want to use

980

* Because we use per-cpu mapping areas shared among the

999

* for merging should be larger or equal to current size.

981

* pools/users, we can't allow mapping in interrupt context

982

* because it can corrupt another users mappings.

1000

*/

983

*/

1001

for (i = zs_size_classes - 1; i >= 0; i--) {

984

BUG_ON(in_interrupt());

1002

int size;

1003

int pages_per_zspage;

1004

struct size_class *class;

1005

985

1006

size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;

986

obj_handle_to_location(handle, &page, &obj_idx);

1007

if (size > ZS_MAX_ALLOC_SIZE)

987

get_zspage_mapping(get_first_page(page), &class_idx, &fg);

1008

size = ZS_MAX_ALLOC_SIZE;

988

class = pool->size_class[class_idx];

1009

pages_per_zspage = get_pages_per_zspage(size);

989

off = obj_idx_to_offset(page, obj_idx, class->size);

1010

990

1011

/*

991

area = &get_cpu_var(zs_map_area);

1012

* size_class is used for normal zsmalloc operation such

992

area->vm_mm = mm;

1013

* as alloc/free for that size. Although it is natural that we

993

if (off + class->size <= PAGE_SIZE) {

1014

* have one size_class for each size, there is a chance that we

994

/* this object is contained entirely within a page */

1015

* can get more memory utilization if we use one size_class for

995

area->vm_addr = kmap_atomic(page);

1016

* many different sizes whose size_class have same

996

return area->vm_addr + off;

1017

* characteristics. So, we makes size_class point to

1018

* previous size_class if possible.

1019

*/

1020

if (prev_class) {

1021

if (can_merge(prev_class, size, pages_per_zspage)) {

1022

pool->size_class[i] = prev_class;

1023

continue;

1024

}

1025

}

1026

1027

class = kzalloc(sizeof(struct size_class), GFP_KERNEL);

1028

if (!class)

1029

goto err;

1030

1031

class->size = size;

1032

class->index = i;

1033

class->pages_per_zspage = pages_per_zspage;

1034

spin_lock_init(&class->lock);

1035

pool->size_class[i] = class;

1036

1037

prev_class = class;

1038

}

997

}

1039

998

1040

pool->flags = flags;

999

/* this object spans two pages */

1000

pages[0] = page;

1001

pages[1] = get_next_page(page);

1002

BUG_ON(!pages[1]);

1041

1003

1042

return pool;

1004

return __zs_map_object(area, pages, off, class->size);

1043

1044

err:

1045

zs_destroy_pool(pool);

1046

return NULL;

1047

}

1005

}

1048

EXPORT_SYMBOL_GPL(zs_create_pool);

1006

EXPORT_SYMBOL_GPL(zs_map_object);

1049

1007

1050

void zs_destroy_pool(struct zs_pool *pool)

1008

void zs_unmap_object(struct zs_pool *pool, unsigned long handle)

1051

{

1009

{

1052

int i;

1010

struct page *page;

1011

unsigned long obj_idx, off;

1053

1012

1054

for (i = 0; i < zs_size_classes; i++) {

1013

unsigned int class_idx;

1055

int fg;

1014

enum fullness_group fg;

1056

struct size_class *class = pool->size_class[i];

1015

struct size_class *class;

1016

struct mapping_area *area;

1057

1017

1058

if (!class)

1018

BUG_ON(!handle);

1059

continue;

1060

1019

1061

if (class->index != i)

1020

obj_handle_to_location(handle, &page, &obj_idx);

1062

continue;

1021

get_zspage_mapping(get_first_page(page), &class_idx, &fg);

1022

class = pool->size_class[class_idx];

1023

off = obj_idx_to_offset(page, obj_idx, class->size);

1063

1024

1064

for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {

1025

area = this_cpu_ptr(&zs_map_area);

1065

if (class->fullness_list[fg]) {

1026

if (off + class->size <= PAGE_SIZE)

1066

pr_info("Freeing non-empty class with size %db, fullness group %d\n",

1027

kunmap_atomic(area->vm_addr);

1067

class->size, fg);

1028

else {

1068

}

1029

struct page *pages[2];

1069

}

1070

kfree(class);

1071

}

1072

1030

1073

kfree(pool->size_class);

1031

pages[0] = page;

1074

kfree(pool);

1032

pages[1] = get_next_page(page);

1033

BUG_ON(!pages[1]);

1034

1035

__zs_unmap_object(area, pages, off, class->size);

1036

}

1037

put_cpu_var(zs_map_area);

1075

}

1038

}

1076

EXPORT_SYMBOL_GPL(zs_destroy_pool);

1039

EXPORT_SYMBOL_GPL(zs_unmap_object);

1077

1040

1078

/**

1041

/**

1079

* zs_malloc - Allocate block of given size from pool.

1042

* zs_malloc - Allocate block of given size from pool.

1080

* @pool: pool to allocate from

1043

* @pool: pool to allocate from

1081

* @size: size of block to allocate

1044

* @size: size of block to allocate

1082

*

1045

*

1083

* On success, handle to the allocated object is returned,

1046

* On success, handle to the allocated object is returned,

1084

* otherwise 0.

1047

* otherwise 0.

1085

* Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.

1048

* Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.

1086

*/

1049

*/

1087

unsigned long zs_malloc(struct zs_pool *pool, size_t size)

1050

unsigned long zs_malloc(struct zs_pool *pool, size_t size)

1088

{

1051

{

1089

unsigned long obj;

1052

unsigned long obj;

1090

struct link_free *link;

1053

struct link_free *link;

1091

struct size_class *class;

1054

struct size_class *class;

1092

void *vaddr;

1055

void *vaddr;

1093

1056

1094

struct page *first_page, *m_page;

1057

struct page *first_page, *m_page;

1095

unsigned long m_objidx, m_offset;

1058

unsigned long m_objidx, m_offset;

1096

1059

1097

if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))

1060

if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))

1098

return 0;

1061

return 0;

1099

1062

1100

class = pool->size_class[get_size_class_index(size)];

1063

class = pool->size_class[get_size_class_index(size)];

1101

1064

1102

spin_lock(&class->lock);

1065

spin_lock(&class->lock);

1103

first_page = find_get_zspage(class);

1066

first_page = find_get_zspage(class);

1104

1067

1105

if (!first_page) {

1068

if (!first_page) {

1106

spin_unlock(&class->lock);

1069

spin_unlock(&class->lock);

1107

first_page = alloc_zspage(class, pool->flags);

1070

first_page = alloc_zspage(class, pool->flags);

1108

if (unlikely(!first_page))

1071

if (unlikely(!first_page))

1109

return 0;

1072

return 0;

1110

1073

1111

set_zspage_mapping(first_page, class->index, ZS_EMPTY);

1074

set_zspage_mapping(first_page, class->index, ZS_EMPTY);

1112

atomic_long_add(class->pages_per_zspage,

1075

atomic_long_add(class->pages_per_zspage,

1113

&pool->pages_allocated);

1076

&pool->pages_allocated);

1114

spin_lock(&class->lock);

1077

spin_lock(&class->lock);

1115

}

1078

}

1116

1079

1117

obj = (unsigned long)first_page->freelist;

1080

obj = (unsigned long)first_page->freelist;

1118

obj_handle_to_location(obj, &m_page, &m_objidx);

1081

obj_handle_to_location(obj, &m_page, &m_objidx);

1119

m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);

1082

m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);

1120

1083

1121

vaddr = kmap_atomic(m_page);

1084

vaddr = kmap_atomic(m_page);

1122

link = (struct link_free *)vaddr + m_offset / sizeof(*link);

1085

link = (struct link_free *)vaddr + m_offset / sizeof(*link);

1123

first_page->freelist = link->next;

1086

first_page->freelist = link->next;

1124

memset(link, POISON_INUSE, sizeof(*link));

1087

memset(link, POISON_INUSE, sizeof(*link));

1125

kunmap_atomic(vaddr);

1088

kunmap_atomic(vaddr);

1126

1089

1127

first_page->inuse++;

1090

first_page->inuse++;

1128

/* Now move the zspage to another fullness group, if required */

1091

/* Now move the zspage to another fullness group, if required */

1129

fix_fullness_group(pool, first_page);

1092

fix_fullness_group(pool, first_page);

1130

spin_unlock(&class->lock);

1093

spin_unlock(&class->lock);

1131

1094

1132

return obj;

1095

return obj;

1133

}

1096

}

1134

EXPORT_SYMBOL_GPL(zs_malloc);

1097

EXPORT_SYMBOL_GPL(zs_malloc);

1135

1098

1136

void zs_free(struct zs_pool *pool, unsigned long obj)

1099

void zs_free(struct zs_pool *pool, unsigned long obj)

1137

{

1100

{

1138

struct link_free *link;

1101

struct link_free *link;

1139

struct page *first_page, *f_page;

1102

struct page *first_page, *f_page;

1140

unsigned long f_objidx, f_offset;

1103

unsigned long f_objidx, f_offset;

1141

void *vaddr;

1104

void *vaddr;

1142

1105

1143

int class_idx;

1106

int class_idx;

1144

struct size_class *class;

1107

struct size_class *class;

1145

enum fullness_group fullness;

1108

enum fullness_group fullness;

1146

1109

1147

if (unlikely(!obj))

1110

if (unlikely(!obj))

1148

return;

1111

return;

1149

1112

1150

obj_handle_to_location(obj, &f_page, &f_objidx);

1113

obj_handle_to_location(obj, &f_page, &f_objidx);

1151

first_page = get_first_page(f_page);

1114

first_page = get_first_page(f_page);

1152

1115

1153

get_zspage_mapping(first_page, &class_idx, &fullness);

1116

get_zspage_mapping(first_page, &class_idx, &fullness);

1154

class = pool->size_class[class_idx];

1117

class = pool->size_class[class_idx];

1155

f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);

1118

f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);

1156

1119

1157

spin_lock(&class->lock);

1120

spin_lock(&class->lock);

1158

1121

1159

/* Insert this object in containing zspage's freelist */

1122

/* Insert this object in containing zspage's freelist */

1160

vaddr = kmap_atomic(f_page);

1123

vaddr = kmap_atomic(f_page);

1161

link = (struct link_free *)(vaddr + f_offset);

1124

link = (struct link_free *)(vaddr + f_offset);

1162

link->next = first_page->freelist;

1125

link->next = first_page->freelist;

1163

kunmap_atomic(vaddr);

1126

kunmap_atomic(vaddr);

1164

first_page->freelist = (void *)obj;

1127

first_page->freelist = (void *)obj;

1165

1128

1166

first_page->inuse--;

1129

first_page->inuse--;

1167

fullness = fix_fullness_group(pool, first_page);

1130

fullness = fix_fullness_group(pool, first_page);

1168

spin_unlock(&class->lock);

1131

spin_unlock(&class->lock);

1169

1132

1170

if (fullness == ZS_EMPTY) {

1133

if (fullness == ZS_EMPTY) {

1171

atomic_long_sub(class->pages_per_zspage,

1134

atomic_long_sub(class->pages_per_zspage,

1172

&pool->pages_allocated);

1135

&pool->pages_allocated);

1173

free_zspage(first_page);

1136

free_zspage(first_page);

1174

}

1137

}

1175

}

1138

}

1176

EXPORT_SYMBOL_GPL(zs_free);

1139

EXPORT_SYMBOL_GPL(zs_free);

1177

1140

1178

/**

1141

/**

1179

* zs_map_object - get address of allocated object from handle.

1142

* zs_create_pool - Creates an allocation pool to work from.

1180

* @pool: pool from which the object was allocated

1143

* @flags: allocation flags used to allocate pool metadata

1181

* @handle: handle returned from zs_malloc

1182

*

1144

*

1183

* Before using an object allocated from zs_malloc, it must be mapped using

1145

* This function must be called before anything when using

1184

* this function. When done with the object, it must be unmapped using

1146

* the zsmalloc allocator.

1185

* zs_unmap_object.

1186

*

1147

*

1187

* Only one object can be mapped per cpu at a time. There is no protection

1148

* On success, a pointer to the newly created pool is returned,

1188

* against nested mappings.

1149

* otherwise NULL.

1189

*

1190

* This function returns with preemption and page faults disabled.

1191

*/

1150

*/

1192

void *zs_map_object(struct zs_pool *pool, unsigned long handle,

1151

struct zs_pool *zs_create_pool(gfp_t flags)

1193

enum zs_mapmode mm)

1194

{

1152

{

1195

struct page *page;

1153

int i;

1196

unsigned long obj_idx, off;

1154

struct zs_pool *pool;

1155

struct size_class *prev_class = NULL;

1197

1156

1198

unsigned int class_idx;

1157

pool = kzalloc(sizeof(*pool), GFP_KERNEL);

1199

enum fullness_group fg;

1158

if (!pool)

1200

struct size_class *class;

1159

return NULL;

1201

struct mapping_area *area;

1202

struct page *pages[2];

1203

1160

1204

BUG_ON(!handle);

1161

pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),

1162

GFP_KERNEL);

1163

if (!pool->size_class) {

1164

kfree(pool);

1165

return NULL;

1166

}

1205

1167

1206

/*

1168

/*

1207

* Because we use per-cpu mapping areas shared among the

1169

* Iterate reversly, because, size of size_class that we want to use

1208

* pools/users, we can't allow mapping in interrupt context

1170

* for merging should be larger or equal to current size.

1209

* because it can corrupt another users mappings.

1210

*/

1171

*/

1211

BUG_ON(in_interrupt());

1172

for (i = zs_size_classes - 1; i >= 0; i--) {

1173

int size;

1174

int pages_per_zspage;

1175

struct size_class *class;

1212

1176

1213

obj_handle_to_location(handle, &page, &obj_idx);

1177

size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;

1214

get_zspage_mapping(get_first_page(page), &class_idx, &fg);

1178

if (size > ZS_MAX_ALLOC_SIZE)

1215

class = pool->size_class[class_idx];

1179

size = ZS_MAX_ALLOC_SIZE;

1216

off = obj_idx_to_offset(page, obj_idx, class->size);

1180

pages_per_zspage = get_pages_per_zspage(size);

1217

1181

1218

area = &get_cpu_var(zs_map_area);

1182

/*

1219

area->vm_mm = mm;

1183

* size_class is used for normal zsmalloc operation such

1220

if (off + class->size <= PAGE_SIZE) {

1184

* as alloc/free for that size. Although it is natural that we

1221

/* this object is contained entirely within a page */

1185

* have one size_class for each size, there is a chance that we

1222

area->vm_addr = kmap_atomic(page);

1186

* can get more memory utilization if we use one size_class for

1223

return area->vm_addr + off;

1187

* many different sizes whose size_class have same

1188

* characteristics. So, we makes size_class point to

1189

* previous size_class if possible.

1190

*/

1191

if (prev_class) {

1192

if (can_merge(prev_class, size, pages_per_zspage)) {

1193

pool->size_class[i] = prev_class;

1194

continue;

1195

}

GITLAB

mm/zsmalloc: adjust order of functions

 /*
  * zsmalloc memory allocator
  *
  * Copyright (C) 2011  Nitin Gupta
  * Copyright (C) 2012, 2013 Minchan Kim
  *
  * This code is released using a dual license strategy: BSD/GPL
  * You can choose the license that better fits your requirements.
  *
  * Released under the terms of 3-clause BSD License
  * Released under the terms of GNU General Public License Version 2.0
  */
 /*
  * This allocator is designed for use with zram. Thus, the allocator is
  * supposed to work well under low memory conditions. In particular, it
  * never attempts higher order page allocation which is very likely to
  * fail under memory pressure. On the other hand, if we just use single
  * (0-order) pages, it would suffer from very high fragmentation --
  * any object of size PAGE_SIZE/2 or larger would occupy an entire page.
  * This was one of the major issues with its predecessor (xvmalloc).
  *
  * To overcome these issues, zsmalloc allocates a bunch of 0-order pages
  * and links them together using various 'struct page' fields. These linked
  * pages act as a single higher-order page i.e. an object can span 0-order
  * page boundaries. The code refers to these linked pages as a single entity
  * called zspage.
  *
  * For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE
  * since this satisfies the requirements of all its current users (in the
  * worst case, page is incompressible and is thus stored "as-is" i.e. in
  * uncompressed form). For allocation requests larger than this size, failure
  * is returned (see zs_malloc).
  *
  * Additionally, zs_malloc() does not return a dereferenceable pointer.
  * Instead, it returns an opaque handle (unsigned long) which encodes actual
  * location of the allocated object. The reason for this indirection is that
  * zsmalloc does not keep zspages permanently mapped since that would cause
  * issues on 32-bit systems where the VA region for kernel space mappings
  * is very small. So, before using the allocating memory, the object has to
  * be mapped using zs_map_object() to get a usable pointer and subsequently
  * unmapped using zs_unmap_object().
  *
  * Following is how we use various fields and flags of underlying
  * struct page(s) to form a zspage.
  *
  * Usage of struct page fields:
  *	page->first_page: points to the first component (0-order) page
  *	page->index (union with page->freelist): offset of the first object
  *		starting in this page. For the first page, this is
  *		always 0, so we use this field (aka freelist) to point
  *		to the first free object in zspage.
  *	page->lru: links together all component pages (except the first page)
  *		of a zspage
  *
  *	For _first_ page only:
  *
  *	page->private (union with page->first_page): refers to the
  *		component page after the first page
  *	page->freelist: points to the first free object in zspage.
  *		Free objects are linked together using in-place
  *		metadata.
  *	page->objects: maximum number of objects we can store in this
  *		zspage (class->zspage_order * PAGE_SIZE / class->size)
  *	page->lru: links together first pages of various zspages.
  *		Basically forming list of zspages in a fullness group.
  *	page->mapping: class index and fullness group of the zspage
  *
  * Usage of struct page flags:
  *	PG_private: identifies the first component page
  *	PG_private2: identifies the last component page
  *
  */
 #ifdef CONFIG_ZSMALLOC_DEBUG
 #define DEBUG
 #endif
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/highmem.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <asm/tlbflush.h>
 #include <asm/pgtable.h>
 #include <linux/cpumask.h>
 #include <linux/cpu.h>
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/zsmalloc.h>
 #include <linux/zpool.h>
 /*
  * This must be power of 2 and greater than of equal to sizeof(link_free).
  * These two conditions ensure that any 'struct link_free' itself doesn't
  * span more than 1 page which avoids complex case of mapping 2 pages simply
  * to restore link_free pointer values.
  */
 #define ZS_ALIGN		8
 /*
  * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
  * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
  */
 #define ZS_MAX_ZSPAGE_ORDER 2
 #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
 /*
  * Object location (<PFN>, <obj_idx>) is encoded as
  * as single (unsigned long) handle value.
  *
  * Note that object index <obj_idx> is relative to system
  * page <PFN> it is stored in, so for each sub-page belonging
  * to a zspage, obj_idx starts with 0.
  *
  * This is made more complicated by various memory models and PAE.
  */
 #ifndef MAX_PHYSMEM_BITS
 #ifdef CONFIG_HIGHMEM64G
 #define MAX_PHYSMEM_BITS 36
 #else /* !CONFIG_HIGHMEM64G */
 /*
  * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
  * be PAGE_SHIFT
  */
 #define MAX_PHYSMEM_BITS BITS_PER_LONG
 #endif
 #endif
 #define _PFN_BITS		(MAX_PHYSMEM_BITS - PAGE_SHIFT)
 #define OBJ_INDEX_BITS	(BITS_PER_LONG - _PFN_BITS)
 #define OBJ_INDEX_MASK	((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
 #define MAX(a, b) ((a) >= (b) ? (a) : (b))
 /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
 #define ZS_MIN_ALLOC_SIZE \
 	MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
 #define ZS_MAX_ALLOC_SIZE	PAGE_SIZE
 /*
  * On systems with 4K page size, this gives 255 size classes! There is a
  * trader-off here:
  *  - Large number of size classes is potentially wasteful as free page are
  *    spread across these classes
  *  - Small number of size classes causes large internal fragmentation
  *  - Probably its better to use specific size classes (empirically
  *    determined). NOTE: all those class sizes must be set as multiple of
  *    ZS_ALIGN to make sure link_free itself never has to span 2 pages.
  *
  *  ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
  *  (reason above)
  */
 #define ZS_SIZE_CLASS_DELTA	(PAGE_SIZE >> 8)
 /*
  * We do not maintain any list for completely empty or full pages
  */
 enum fullness_group {
 	ZS_ALMOST_FULL,
 	ZS_ALMOST_EMPTY,
 	_ZS_NR_FULLNESS_GROUPS,
 	ZS_EMPTY,
 	ZS_FULL
 };
 /*
  * number of size_classes
  */
 static int zs_size_classes;
 /*
  * We assign a page to ZS_ALMOST_EMPTY fullness group when:
  *	n <= N / f, where
  * n = number of allocated objects
  * N = total number of objects zspage can store
  * f = fullness_threshold_frac
  *
  * Similarly, we assign zspage to:
  *	ZS_ALMOST_FULL	when n > N / f
  *	ZS_EMPTY	when n == 0
  *	ZS_FULL		when n == N
  *
  * (see: fix_fullness_group())
  */
 static const int fullness_threshold_frac = 4;
 struct size_class {
 	/*
 	 * Size of objects stored in this class. Must be multiple
 	 * of ZS_ALIGN.
 	 */
 	int size;
 	unsigned int index;
 	/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
 	int pages_per_zspage;
 	spinlock_t lock;
 	struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
 };
 /*
  * Placed within free objects to form a singly linked list.
  * For every zspage, first_page->freelist gives head of this list.
  *
  * This must be power of 2 and less than or equal to ZS_ALIGN
  */
 struct link_free {
 	/* Handle of next free chunk (encodes <PFN, obj_idx>) */
 	void *next;
 };
 struct zs_pool {
 	struct size_class **size_class;
 	gfp_t flags;	/* allocation flags used when growing pool */
 	atomic_long_t pages_allocated;
 };
 /*
  * A zspage's class index and fullness group
  * are encoded in its (first)page->mapping
  */
 #define CLASS_IDX_BITS	28
 #define FULLNESS_BITS	4
 #define CLASS_IDX_MASK	((1 << CLASS_IDX_BITS) - 1)
 #define FULLNESS_MASK	((1 << FULLNESS_BITS) - 1)
 struct mapping_area {
 #ifdef CONFIG_PGTABLE_MAPPING
 	struct vm_struct *vm; /* vm area for mapping object that span pages */
 #else
 	char *vm_buf; /* copy buffer for objects that span pages */
 #endif
 	char *vm_addr; /* address of kmap_atomic()'ed pages */
 	enum zs_mapmode vm_mm; /* mapping mode */
 };
 /* zpool driver */
 #ifdef CONFIG_ZPOOL
 static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
 {
 	return zs_create_pool(gfp);
 }
 static void zs_zpool_destroy(void *pool)
 {
 	zs_destroy_pool(pool);
 }
 static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
 			unsigned long *handle)
 {
 	*handle = zs_malloc(pool, size);
 	return *handle ? 0 : -1;
 }
 static void zs_zpool_free(void *pool, unsigned long handle)
 {
 	zs_free(pool, handle);
 }
 static int zs_zpool_shrink(void *pool, unsigned int pages,
 			unsigned int *reclaimed)
 {
 	return -EINVAL;
 }
 static void *zs_zpool_map(void *pool, unsigned long handle,
 			enum zpool_mapmode mm)
 {
 	enum zs_mapmode zs_mm;
 	switch (mm) {
 	case ZPOOL_MM_RO:
 		zs_mm = ZS_MM_RO;
 		break;
 	case ZPOOL_MM_WO:
 		zs_mm = ZS_MM_WO;
 		break;
 	case ZPOOL_MM_RW: /* fallthru */
 	default:
 		zs_mm = ZS_MM_RW;
 		break;
 	}
 	return zs_map_object(pool, handle, zs_mm);
 }
 static void zs_zpool_unmap(void *pool, unsigned long handle)
 {
 	zs_unmap_object(pool, handle);
 }
 static u64 zs_zpool_total_size(void *pool)
 {
 	return zs_get_total_pages(pool) << PAGE_SHIFT;
 }
 static struct zpool_driver zs_zpool_driver = {
 	.type =		"zsmalloc",
 	.owner =	THIS_MODULE,
 	.create =	zs_zpool_create,
 	.destroy =	zs_zpool_destroy,
 	.malloc =	zs_zpool_malloc,
 	.free =		zs_zpool_free,
 	.shrink =	zs_zpool_shrink,
 	.map =		zs_zpool_map,
 	.unmap =	zs_zpool_unmap,
 	.total_size =	zs_zpool_total_size,
 };
 MODULE_ALIAS("zpool-zsmalloc");
 #endif /* CONFIG_ZPOOL */
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
 static int is_first_page(struct page *page)
 {
 	return PagePrivate(page);
 }
 static int is_last_page(struct page *page)
 {
 	return PagePrivate2(page);
 }
 static void get_zspage_mapping(struct page *page, unsigned int *class_idx,
 				enum fullness_group *fullness)
 {
 	unsigned long m;
 	BUG_ON(!is_first_page(page));
 	m = (unsigned long)page->mapping;
 	*fullness = m & FULLNESS_MASK;
 	*class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK;
 }
 static void set_zspage_mapping(struct page *page, unsigned int class_idx,
 				enum fullness_group fullness)
 {
 	unsigned long m;
 	BUG_ON(!is_first_page(page));
 	m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) |
 			(fullness & FULLNESS_MASK);
 	page->mapping = (struct address_space *)m;
 }
 /*
  * zsmalloc divides the pool into various size classes where each
  * class maintains a list of zspages where each zspage is divided
  * into equal sized chunks. Each allocation falls into one of these
  * classes depending on its size. This function returns index of the
  * size class which has chunk size big enough to hold the give size.
  */
 static int get_size_class_index(int size)
 {
 	int idx = 0;
 	if (likely(size > ZS_MIN_ALLOC_SIZE))
 		idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
 				ZS_SIZE_CLASS_DELTA);
 	return idx;
 }
 /*
  * For each size class, zspages are divided into different groups
  * depending on how "full" they are. This was done so that we could
  * easily find empty or nearly empty zspages when we try to shrink
  * the pool (not yet implemented). This function returns fullness
  * status of the given page.
  */
 static enum fullness_group get_fullness_group(struct page *page)
 {
 	int inuse, max_objects;
 	enum fullness_group fg;
 	BUG_ON(!is_first_page(page));
 	inuse = page->inuse;
 	max_objects = page->objects;
 	if (inuse == 0)
 		fg = ZS_EMPTY;
 	else if (inuse == max_objects)
 		fg = ZS_FULL;
 	else if (inuse <= max_objects / fullness_threshold_frac)
 		fg = ZS_ALMOST_EMPTY;
 	else
 		fg = ZS_ALMOST_FULL;
 	return fg;
 }
 /*
  * Each size class maintains various freelists and zspages are assigned
  * to one of these freelists based on the number of live objects they
  * have. This functions inserts the given zspage into the freelist
  * identified by <class, fullness_group>.
  */
 static void insert_zspage(struct page *page, struct size_class *class,
 				enum fullness_group fullness)
 {
 	struct page **head;
 	BUG_ON(!is_first_page(page));
 	if (fullness >= _ZS_NR_FULLNESS_GROUPS)
 		return;
 	head = &class->fullness_list[fullness];
 	if (*head)
 		list_add_tail(&page->lru, &(*head)->lru);
 	*head = page;
 }
 /*
  * This function removes the given zspage from the freelist identified
  * by <class, fullness_group>.
  */
 static void remove_zspage(struct page *page, struct size_class *class,
 				enum fullness_group fullness)
 {
 	struct page **head;
 	BUG_ON(!is_first_page(page));
 	if (fullness >= _ZS_NR_FULLNESS_GROUPS)
 		return;
 	head = &class->fullness_list[fullness];
 	BUG_ON(!*head);
 	if (list_empty(&(*head)->lru))
 		*head = NULL;
 	else if (*head == page)
 		*head = (struct page *)list_entry((*head)->lru.next,
 					struct page, lru);
 	list_del_init(&page->lru);
 }
 /*
  * Each size class maintains zspages in different fullness groups depending
  * on the number of live objects they contain. When allocating or freeing
  * objects, the fullness status of the page can change, say, from ALMOST_FULL
  * to ALMOST_EMPTY when freeing an object. This function checks if such
  * a status change has occurred for the given page and accordingly moves the
  * page from the freelist of the old fullness group to that of the new
  * fullness group.
  */
 static enum fullness_group fix_fullness_group(struct zs_pool *pool,
 						struct page *page)
 {
 	int class_idx;
 	struct size_class *class;
 	enum fullness_group currfg, newfg;
 	BUG_ON(!is_first_page(page));
 	get_zspage_mapping(page, &class_idx, &currfg);
 	newfg = get_fullness_group(page);
 	if (newfg == currfg)
 		goto out;
 	class = pool->size_class[class_idx];
 	remove_zspage(page, class, currfg);
 	insert_zspage(page, class, newfg);
 	set_zspage_mapping(page, class_idx, newfg);
 out:
 	return newfg;
 }
 /*
  * We have to decide on how many pages to link together
  * to form a zspage for each size class. This is important
  * to reduce wastage due to unusable space left at end of
  * each zspage which is given as:
  *	wastage = Zp - Zp % size_class
  * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...
  *
  * For example, for size class of 3/8 * PAGE_SIZE, we should
  * link together 3 PAGE_SIZE sized pages to form a zspage
  * since then we can perfectly fit in 8 such objects.
  */
 static int get_pages_per_zspage(int class_size)
 {
 	int i, max_usedpc = 0;
 	/* zspage order which gives maximum used size per KB */
 	int max_usedpc_order = 1;
 	for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {
 		int zspage_size;
 		int waste, usedpc;
 		zspage_size = i * PAGE_SIZE;
 		waste = zspage_size % class_size;
 		usedpc = (zspage_size - waste) * 100 / zspage_size;
 		if (usedpc > max_usedpc) {
 			max_usedpc = usedpc;
 			max_usedpc_order = i;
 		}
 	}
 	return max_usedpc_order;
 }
 /*
  * A single 'zspage' is composed of many system pages which are
  * linked together using fields in struct page. This function finds
  * the first/head page, given any component page of a zspage.
  */
 static struct page *get_first_page(struct page *page)
 {
 	if (is_first_page(page))
 		return page;
 	else
 		return page->first_page;
 }
 static struct page *get_next_page(struct page *page)
 {
 	struct page *next;
 	if (is_last_page(page))
 		next = NULL;
 	else if (is_first_page(page))
 		next = (struct page *)page_private(page);
 	else
 		next = list_entry(page->lru.next, struct page, lru);
 	return next;
 }
 /*
  * Encode <page, obj_idx> as a single handle value.
  * On hardware platforms with physical memory starting at 0x0 the pfn
  * could be 0 so we ensure that the handle will never be 0 by adjusting the
  * encoded obj_idx value before encoding.
  */
 static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
 {
 	unsigned long handle;
 	if (!page) {
 		BUG_ON(obj_idx);
 		return NULL;
 	}
 	handle = page_to_pfn(page) << OBJ_INDEX_BITS;
 	handle |= ((obj_idx + 1) & OBJ_INDEX_MASK);
 	return (void *)handle;
 }
 /*
  * Decode <page, obj_idx> pair from the given object handle. We adjust the
  * decoded obj_idx back to its original value since it was adjusted in
  * obj_location_to_handle().
  */
 static void obj_handle_to_location(unsigned long handle, struct page **page,
 				unsigned long *obj_idx)
 {
 	*page = pfn_to_page(handle >> OBJ_INDEX_BITS);
 	*obj_idx = (handle & OBJ_INDEX_MASK) - 1;
 }
 static unsigned long obj_idx_to_offset(struct page *page,
 				unsigned long obj_idx, int class_size)
 {
 	unsigned long off = 0;
 	if (!is_first_page(page))
 		off = page->index;
 	return off + obj_idx * class_size;
 }
 static void reset_page(struct page *page)
 {
 	clear_bit(PG_private, &page->flags);
 	clear_bit(PG_private_2, &page->flags);
 	set_page_private(page, 0);
 	page->mapping = NULL;
 	page->freelist = NULL;
 	page_mapcount_reset(page);
 }
 static void free_zspage(struct page *first_page)
 {
 	struct page *nextp, *tmp, *head_extra;
 	BUG_ON(!is_first_page(first_page));
 	BUG_ON(first_page->inuse);
 	head_extra = (struct page *)page_private(first_page);
 	reset_page(first_page);
 	__free_page(first_page);
 	/* zspage with only 1 system page */
 	if (!head_extra)
 		return;
 	list_for_each_entry_safe(nextp, tmp, &head_extra->lru, lru) {
 		list_del(&nextp->lru);
 		reset_page(nextp);
 		__free_page(nextp);
 	}
 	reset_page(head_extra);
 	__free_page(head_extra);
 }
 /* Initialize a newly allocated zspage */
 static void init_zspage(struct page *first_page, struct size_class *class)
 {
 	unsigned long off = 0;
 	struct page *page = first_page;
 	BUG_ON(!is_first_page(first_page));
 	while (page) {
 		struct page *next_page;
 		struct link_free *link;
 		unsigned int i = 1;
 		void *vaddr;
 		/*
 		 * page->index stores offset of first object starting
 		 * in the page. For the first page, this is always 0,
 		 * so we use first_page->index (aka ->freelist) to store
 		 * head of corresponding zspage's freelist.
 		 */
 		if (page != first_page)
 			page->index = off;
 		vaddr = kmap_atomic(page);
 		link = (struct link_free *)vaddr + off / sizeof(*link);
 		while ((off += class->size) < PAGE_SIZE) {
 			link->next = obj_location_to_handle(page, i++);
 			link += class->size / sizeof(*link);
 		}
 		/*
 		 * We now come to the last (full or partial) object on this
 		 * page, which must point to the first object on the next
 		 * page (if present)
 		 */
 		next_page = get_next_page(page);
 		link->next = obj_location_to_handle(next_page, 0);
 		kunmap_atomic(vaddr);
 		page = next_page;
 		off %= PAGE_SIZE;
 	}
 }
 /*
  * Allocate a zspage for the given size class
  */
 static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
 {
 	int i, error;
 	struct page *first_page = NULL, *uninitialized_var(prev_page);
 	/*
 	 * Allocate individual pages and link them together as:
 	 * 1. first page->private = first sub-page
 	 * 2. all sub-pages are linked together using page->lru
 	 * 3. each sub-page is linked to the first page using page->first_page
 	 *
 	 * For each size class, First/Head pages are linked together using
 	 * page->lru. Also, we set PG_private to identify the first page
 	 * (i.e. no other sub-page has this flag set) and PG_private_2 to
 	 * identify the last page.
 	 */
 	error = -ENOMEM;
 	for (i = 0; i < class->pages_per_zspage; i++) {
 		struct page *page;
 		page = alloc_page(flags);
 		if (!page)
 			goto cleanup;
 		INIT_LIST_HEAD(&page->lru);
 		if (i == 0) {	/* first page */
 			SetPagePrivate(page);
 			set_page_private(page, 0);
 			first_page = page;
 			first_page->inuse = 0;
 		}
 		if (i == 1)
 			set_page_private(first_page, (unsigned long)page);
 		if (i >= 1)
 			page->first_page = first_page;
 		if (i >= 2)
 			list_add(&page->lru, &prev_page->lru);
 		if (i == class->pages_per_zspage - 1)	/* last page */
 			SetPagePrivate2(page);
 		prev_page = page;
 	}
 	init_zspage(first_page, class);
 	first_page->freelist = obj_location_to_handle(first_page, 0);
 	/* Maximum number of objects we can store in this zspage */
 	first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
 	error = 0; /* Success */
 cleanup:
 	if (unlikely(error) && first_page) {
 		free_zspage(first_page);
 		first_page = NULL;
 	}
 	return first_page;
 }
 static struct page *find_get_zspage(struct size_class *class)
 {
 	int i;
 	struct page *page;
 	for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
 		page = class->fullness_list[i];
 		if (page)
 			break;
 	}
 	return page;
 }
 #ifdef CONFIG_PGTABLE_MAPPING
 static inline int __zs_cpu_up(struct mapping_area *area)
 {
 	/*
 	 * Make sure we don't leak memory if a cpu UP notification
 	 * and zs_init() race and both call zs_cpu_up() on the same cpu
 	 */
 	if (area->vm)
 		return 0;
 	area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
 	if (!area->vm)
 		return -ENOMEM;
 	return 0;
 }
 static inline void __zs_cpu_down(struct mapping_area *area)
 {
 	if (area->vm)
 		free_vm_area(area->vm);
 	area->vm = NULL;
 }
 static inline void *__zs_map_object(struct mapping_area *area,
 				struct page *pages[2], int off, int size)
 {
 	BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
 	area->vm_addr = area->vm->addr;
 	return area->vm_addr + off;
 }
 static inline void __zs_unmap_object(struct mapping_area *area,
 				struct page *pages[2], int off, int size)
 {
 	unsigned long addr = (unsigned long)area->vm_addr;
 	unmap_kernel_range(addr, PAGE_SIZE * 2);
 }
 #else /* CONFIG_PGTABLE_MAPPING */
 static inline int __zs_cpu_up(struct mapping_area *area)
 {
 	/*
 	 * Make sure we don't leak memory if a cpu UP notification
 	 * and zs_init() race and both call zs_cpu_up() on the same cpu
 	 */
 	if (area->vm_buf)
 		return 0;
 	area->vm_buf = kmalloc(ZS_MAX_ALLOC_SIZE, GFP_KERNEL);
 	if (!area->vm_buf)
 		return -ENOMEM;
 	return 0;
 }
 static inline void __zs_cpu_down(struct mapping_area *area)
 {
 	kfree(area->vm_buf);
 	area->vm_buf = NULL;
 }
 static void *__zs_map_object(struct mapping_area *area,
 			struct page *pages[2], int off, int size)
 {
 	int sizes[2];
 	void *addr;
 	char *buf = area->vm_buf;
 	/* disable page faults to match kmap_atomic() return conditions */
 	pagefault_disable();
 	/* no read fastpath */
 	if (area->vm_mm == ZS_MM_WO)
 		goto out;
 	sizes[0] = PAGE_SIZE - off;
 	sizes[1] = size - sizes[0];
 	/* copy object to per-cpu buffer */
 	addr = kmap_atomic(pages[0]);
 	memcpy(buf, addr + off, sizes[0]);
 	kunmap_atomic(addr);
 	addr = kmap_atomic(pages[1]);
 	memcpy(buf + sizes[0], addr, sizes[1]);
 	kunmap_atomic(addr);
 out:
 	return area->vm_buf;
 }
 static void __zs_unmap_object(struct mapping_area *area,
 			struct page *pages[2], int off, int size)
 {
 	int sizes[2];
 	void *addr;
 	char *buf = area->vm_buf;
 	/* no write fastpath */
 	if (area->vm_mm == ZS_MM_RO)
 		goto out;
 	sizes[0] = PAGE_SIZE - off;
 	sizes[1] = size - sizes[0];
 	/* copy per-cpu buffer to object */
 	addr = kmap_atomic(pages[0]);
 	memcpy(addr + off, buf, sizes[0]);
 	kunmap_atomic(addr);
 	addr = kmap_atomic(pages[1]);
 	memcpy(addr, buf + sizes[0], sizes[1]);
 	kunmap_atomic(addr);
 out:
 	/* enable page faults to match kunmap_atomic() return conditions */
 	pagefault_enable();
 }
 #endif /* CONFIG_PGTABLE_MAPPING */
 static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
 				void *pcpu)
 {
 	int ret, cpu = (long)pcpu;
 	struct mapping_area *area;
 	switch (action) {
 	case CPU_UP_PREPARE:
 		area = &per_cpu(zs_map_area, cpu);
 		ret = __zs_cpu_up(area);
 		if (ret)
 			return notifier_from_errno(ret);
 		break;
 	case CPU_DEAD:
 	case CPU_UP_CANCELED:
 		area = &per_cpu(zs_map_area, cpu);
 		__zs_cpu_down(area);
 		break;
 	}
 	return NOTIFY_OK;
 }
 static struct notifier_block zs_cpu_nb = {
 	.notifier_call = zs_cpu_notifier
 };
-static void zs_unregister_cpu_notifier(void)
-{
-	int cpu;
-	cpu_notifier_register_begin();
-	for_each_online_cpu(cpu)
-		zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
-	__unregister_cpu_notifier(&zs_cpu_nb);
-	cpu_notifier_register_done();
-}
 static int zs_register_cpu_notifier(void)
 {
 	int cpu, uninitialized_var(ret);
 	cpu_notifier_register_begin();
 	__register_cpu_notifier(&zs_cpu_nb);
 	for_each_online_cpu(cpu) {
 		ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
 		if (notifier_to_errno(ret))
 			break;
 	}
 	cpu_notifier_register_done();
 	return notifier_to_errno(ret);
 }
+static void zs_unregister_cpu_notifier(void)
+{
+	int cpu;
+	cpu_notifier_register_begin();
+	for_each_online_cpu(cpu)
+		zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
+	__unregister_cpu_notifier(&zs_cpu_nb);
+	cpu_notifier_register_done();
+}
 static void init_zs_size_classes(void)
 {
 	int nr;
 	nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;
 	if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)
 		nr += 1;
 	zs_size_classes = nr;
 }
-static void __exit zs_exit(void)
-{
-#ifdef CONFIG_ZPOOL
-	zpool_unregister_driver(&zs_zpool_driver);
-#endif
-	zs_unregister_cpu_notifier();
-}
-static int __init zs_init(void)
-{
-	int ret = zs_register_cpu_notifier();
-	if (ret) {
-		zs_unregister_cpu_notifier();
-		return ret;
-	}
-	init_zs_size_classes();
-#ifdef CONFIG_ZPOOL
-	zpool_register_driver(&zs_zpool_driver);
-#endif
-	return 0;
-}
 static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
 {
 	return pages_per_zspage * PAGE_SIZE / size;
 }
 static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
 {
 	if (prev->pages_per_zspage != pages_per_zspage)
 		return false;
 	if (get_maxobj_per_zspage(prev->size, prev->pages_per_zspage)
 		!= get_maxobj_per_zspage(size, pages_per_zspage))
 		return false;
 	return true;
 }
+unsigned long zs_get_total_pages(struct zs_pool *pool)
+{
+	return atomic_long_read(&pool->pages_allocated);
+}
+EXPORT_SYMBOL_GPL(zs_get_total_pages);
 /**
- * zs_create_pool - Creates an allocation pool to work from.
+ * zs_map_object - get address of allocated object from handle.
- * @flags: allocation flags used to allocate pool metadata
+ * @pool: pool from which the object was allocated
+ * @handle: handle returned from zs_malloc
  *
- * This function must be called before anything when using
+ * Before using an object allocated from zs_malloc, it must be mapped using
- * the zsmalloc allocator.
+ * this function. When done with the object, it must be unmapped using
+ * zs_unmap_object.
  *
- * On success, a pointer to the newly created pool is returned,
+ * Only one object can be mapped per cpu at a time. There is no protection
- * otherwise NULL.
+ * against nested mappings.
+ *
+ * This function returns with preemption and page faults disabled.
  */
-struct zs_pool *zs_create_pool(gfp_t flags)
+void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+			enum zs_mapmode mm)
 {
-	int i;
+	struct page *page;
-	struct zs_pool *pool;
+	unsigned long obj_idx, off;
-	struct size_class *prev_class = NULL;
-	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	unsigned int class_idx;
-	if (!pool)
+	enum fullness_group fg;
-		return NULL;
+	struct size_class *class;
+	struct mapping_area *area;
+	struct page *pages[2];
-	pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
+	BUG_ON(!handle);
-			GFP_KERNEL);
-	if (!pool->size_class) {
-		kfree(pool);
-		return NULL;
-	}
 	/*
-	 * Iterate reversly, because, size of size_class that we want to use
+	 * Because we use per-cpu mapping areas shared among the
-	 * for merging should be larger or equal to current size.
+	 * pools/users, we can't allow mapping in interrupt context
+	 * because it can corrupt another users mappings.
 	 */
-	for (i = zs_size_classes - 1; i >= 0; i--) {
+	BUG_ON(in_interrupt());
-		int size;
-		int pages_per_zspage;
-		struct size_class *class;
-		size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
+	obj_handle_to_location(handle, &page, &obj_idx);
-		if (size > ZS_MAX_ALLOC_SIZE)
+	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
-			size = ZS_MAX_ALLOC_SIZE;
+	class = pool->size_class[class_idx];
-		pages_per_zspage = get_pages_per_zspage(size);
+	off = obj_idx_to_offset(page, obj_idx, class->size);
-		/*
+	area = &get_cpu_var(zs_map_area);
-		 * size_class is used for normal zsmalloc operation such
+	area->vm_mm = mm;
-		 * as alloc/free for that size. Although it is natural that we
+	if (off + class->size <= PAGE_SIZE) {
-		 * have one size_class for each size, there is a chance that we
+		/* this object is contained entirely within a page */
-		 * can get more memory utilization if we use one size_class for
+		area->vm_addr = kmap_atomic(page);
-		 * many different sizes whose size_class have same
+		return area->vm_addr + off;
-		 * characteristics. So, we makes size_class point to
-		 * previous size_class if possible.
-		 */
-		if (prev_class) {
-			if (can_merge(prev_class, size, pages_per_zspage)) {
-				pool->size_class[i] = prev_class;
-				continue;
-			}
-		}
-		class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
-		if (!class)
-			goto err;
-		class->size = size;
-		class->index = i;
-		class->pages_per_zspage = pages_per_zspage;
-		spin_lock_init(&class->lock);
-		pool->size_class[i] = class;
-		prev_class = class;
 	}
-	pool->flags = flags;
+	/* this object spans two pages */
+	pages[0] = page;
+	pages[1] = get_next_page(page);
+	BUG_ON(!pages[1]);
-	return pool;
+	return __zs_map_object(area, pages, off, class->size);
-err:
-	zs_destroy_pool(pool);
-	return NULL;
 }
-EXPORT_SYMBOL_GPL(zs_create_pool);
+EXPORT_SYMBOL_GPL(zs_map_object);
-void zs_destroy_pool(struct zs_pool *pool)
+void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 {
-	int i;
+	struct page *page;
+	unsigned long obj_idx, off;
-	for (i = 0; i < zs_size_classes; i++) {
+	unsigned int class_idx;
-		int fg;
+	enum fullness_group fg;
-		struct size_class *class = pool->size_class[i];
+	struct size_class *class;
+	struct mapping_area *area;
-		if (!class)
+	BUG_ON(!handle);
-			continue;
-		if (class->index != i)
+	obj_handle_to_location(handle, &page, &obj_idx);
-			continue;
+	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+	class = pool->size_class[class_idx];
+	off = obj_idx_to_offset(page, obj_idx, class->size);
-		for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
+	area = this_cpu_ptr(&zs_map_area);
-			if (class->fullness_list[fg]) {
+	if (off + class->size <= PAGE_SIZE)
-				pr_info("Freeing non-empty class with size %db, fullness group %d\n",
+		kunmap_atomic(area->vm_addr);
-					class->size, fg);
+	else {
-			}
+		struct page *pages[2];
-		}
-		kfree(class);
-	}
-	kfree(pool->size_class);
+		pages[0] = page;
-	kfree(pool);
+		pages[1] = get_next_page(page);
+		BUG_ON(!pages[1]);
+		__zs_unmap_object(area, pages, off, class->size);
+	}
+	put_cpu_var(zs_map_area);
 }
-EXPORT_SYMBOL_GPL(zs_destroy_pool);
+EXPORT_SYMBOL_GPL(zs_unmap_object);
 /**
  * zs_malloc - Allocate block of given size from pool.
  * @pool: pool to allocate from
  * @size: size of block to allocate
  *
  * On success, handle to the allocated object is returned,
  * otherwise 0.
  * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
  */
 unsigned long zs_malloc(struct zs_pool *pool, size_t size)
 {
 	unsigned long obj;
 	struct link_free *link;
 	struct size_class *class;
 	void *vaddr;
 	struct page *first_page, *m_page;
 	unsigned long m_objidx, m_offset;
 	if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
 		return 0;
 	class = pool->size_class[get_size_class_index(size)];
 	spin_lock(&class->lock);
 	first_page = find_get_zspage(class);
 	if (!first_page) {
 		spin_unlock(&class->lock);
 		first_page = alloc_zspage(class, pool->flags);
 		if (unlikely(!first_page))
 			return 0;
 		set_zspage_mapping(first_page, class->index, ZS_EMPTY);
 		atomic_long_add(class->pages_per_zspage,
 					&pool->pages_allocated);
 		spin_lock(&class->lock);
 	}
 	obj = (unsigned long)first_page->freelist;
 	obj_handle_to_location(obj, &m_page, &m_objidx);
 	m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
 	vaddr = kmap_atomic(m_page);
 	link = (struct link_free *)vaddr + m_offset / sizeof(*link);
 	first_page->freelist = link->next;
 	memset(link, POISON_INUSE, sizeof(*link));
 	kunmap_atomic(vaddr);
 	first_page->inuse++;
 	/* Now move the zspage to another fullness group, if required */
 	fix_fullness_group(pool, first_page);
 	spin_unlock(&class->lock);
 	return obj;
 }
 EXPORT_SYMBOL_GPL(zs_malloc);
 void zs_free(struct zs_pool *pool, unsigned long obj)
 {
 	struct link_free *link;
 	struct page *first_page, *f_page;
 	unsigned long f_objidx, f_offset;
 	void *vaddr;
 	int class_idx;
 	struct size_class *class;
 	enum fullness_group fullness;
 	if (unlikely(!obj))
 		return;
 	obj_handle_to_location(obj, &f_page, &f_objidx);
 	first_page = get_first_page(f_page);
 	get_zspage_mapping(first_page, &class_idx, &fullness);
 	class = pool->size_class[class_idx];
 	f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
 	spin_lock(&class->lock);
 	/* Insert this object in containing zspage's freelist */
 	vaddr = kmap_atomic(f_page);
 	link = (struct link_free *)(vaddr + f_offset);
 	link->next = first_page->freelist;
 	kunmap_atomic(vaddr);
 	first_page->freelist = (void *)obj;
 	first_page->inuse--;
 	fullness = fix_fullness_group(pool, first_page);
 	spin_unlock(&class->lock);
 	if (fullness == ZS_EMPTY) {
 		atomic_long_sub(class->pages_per_zspage,
 				&pool->pages_allocated);
 		free_zspage(first_page);
 	}
 }
 EXPORT_SYMBOL_GPL(zs_free);
 /**
- * zs_map_object - get address of allocated object from handle.
+ * zs_create_pool - Creates an allocation pool to work from.
- * @pool: pool from which the object was allocated
+ * @flags: allocation flags used to allocate pool metadata
- * @handle: handle returned from zs_malloc
  *
- * Before using an object allocated from zs_malloc, it must be mapped using
+ * This function must be called before anything when using
- * this function. When done with the object, it must be unmapped using
+ * the zsmalloc allocator.
- * zs_unmap_object.
  *
- * Only one object can be mapped per cpu at a time. There is no protection
+ * On success, a pointer to the newly created pool is returned,
- * against nested mappings.
+ * otherwise NULL.
- *
- * This function returns with preemption and page faults disabled.
  */
-void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+struct zs_pool *zs_create_pool(gfp_t flags)
-			enum zs_mapmode mm)
 {
-	struct page *page;
+	int i;
-	unsigned long obj_idx, off;
+	struct zs_pool *pool;
+	struct size_class *prev_class = NULL;
-	unsigned int class_idx;
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
-	enum fullness_group fg;
+	if (!pool)
-	struct size_class *class;
+		return NULL;
-	struct mapping_area *area;
-	struct page *pages[2];
-	BUG_ON(!handle);
+	pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
+			GFP_KERNEL);
+	if (!pool->size_class) {
+		kfree(pool);
+		return NULL;
+	}
 	/*
-	 * Because we use per-cpu mapping areas shared among the
+	 * Iterate reversly, because, size of size_class that we want to use
-	 * pools/users, we can't allow mapping in interrupt context
+	 * for merging should be larger or equal to current size.
-	 * because it can corrupt another users mappings.
 	 */
-	BUG_ON(in_interrupt());
+	for (i = zs_size_classes - 1; i >= 0; i--) {
+		int size;
+		int pages_per_zspage;
+		struct size_class *class;
-	obj_handle_to_location(handle, &page, &obj_idx);
+		size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
-	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+		if (size > ZS_MAX_ALLOC_SIZE)
-	class = pool->size_class[class_idx];
+			size = ZS_MAX_ALLOC_SIZE;
-	off = obj_idx_to_offset(page, obj_idx, class->size);
+		pages_per_zspage = get_pages_per_zspage(size);
-	area = &get_cpu_var(zs_map_area);
+		/*
-	area->vm_mm = mm;
+		 * size_class is used for normal zsmalloc operation such
-	if (off + class->size <= PAGE_SIZE) {
+		 * as alloc/free for that size. Although it is natural that we
-		/* this object is contained entirely within a page */
+		 * have one size_class for each size, there is a chance that we
-		area->vm_addr = kmap_atomic(page);
+		 * can get more memory utilization if we use one size_class for
-		return area->vm_addr + off;
+		 * many different sizes whose size_class have same
+		 * characteristics. So, we makes size_class point to
+		 * previous size_class if possible.
+		 */
+		if (prev_class) {
+			if (can_merge(prev_class, size, pages_per_zspage)) {
+				pool->size_class[i] = prev_class;
+				continue;
+			}