Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

* SLUB: A slab allocator that limits cache line use instead of queuing

2

* SLUB: A slab allocator that limits cache line use instead of queuing

3

* objects in per cpu and per node lists.

3

* objects in per cpu and per node lists.

4

*

4

*

5

* The allocator synchronizes using per slab locks or atomic operatios

5

* The allocator synchronizes using per slab locks or atomic operatios

6

* and only uses a centralized lock to manage a pool of partial slabs.

6

* and only uses a centralized lock to manage a pool of partial slabs.

7

*

7

*

8

9

10

*/

10

*/

11

12

#include <linux/mm.h>

12

#include <linux/mm.h>

13

#include <linux/swap.h> /* struct reclaim_state */

13

#include <linux/swap.h> /* struct reclaim_state */

14

#include <linux/module.h>

14

#include <linux/module.h>

15

#include <linux/bit_spinlock.h>

15

#include <linux/bit_spinlock.h>

16

#include <linux/interrupt.h>

16

#include <linux/interrupt.h>

17

#include <linux/bitops.h>

17

#include <linux/bitops.h>

18

#include <linux/slab.h>

18

#include <linux/slab.h>

19

#include <linux/proc_fs.h>

19

#include <linux/proc_fs.h>

20

#include <linux/seq_file.h>

20

#include <linux/seq_file.h>

21

#include <linux/kmemcheck.h>

21

#include <linux/kmemcheck.h>

22

#include <linux/cpu.h>

22

#include <linux/cpu.h>

23

#include <linux/cpuset.h>

23

#include <linux/cpuset.h>

24

#include <linux/mempolicy.h>

24

#include <linux/mempolicy.h>

25

#include <linux/ctype.h>

25

#include <linux/ctype.h>

26

#include <linux/debugobjects.h>

26

#include <linux/debugobjects.h>

27

#include <linux/kallsyms.h>

27

#include <linux/kallsyms.h>

28

#include <linux/memory.h>

28

#include <linux/memory.h>

29

#include <linux/math64.h>

29

#include <linux/math64.h>

30

#include <linux/fault-inject.h>

30

#include <linux/fault-inject.h>

31

#include <linux/stacktrace.h>

31

#include <linux/stacktrace.h>

32

33

#include <trace/events/kmem.h>

33

#include <trace/events/kmem.h>

34

35

/*

35

/*

36

* Lock order:

36

* Lock order:

37

* 1. slub_lock (Global Semaphore)

37

* 1. slub_lock (Global Semaphore)

38

* 2. node->list_lock

38

* 2. node->list_lock

39

* 3. slab_lock(page) (Only on some arches and for debugging)

39

* 3. slab_lock(page) (Only on some arches and for debugging)

40

*

40

*

41

* slub_lock

41

* slub_lock

42

*

42

*

43

* The role of the slub_lock is to protect the list of all the slabs

43

* The role of the slub_lock is to protect the list of all the slabs

44

* and to synchronize major metadata changes to slab cache structures.

44

* and to synchronize major metadata changes to slab cache structures.

45

*

45

*

46

* The slab_lock is only used for debugging and on arches that do not

46

* The slab_lock is only used for debugging and on arches that do not

47

* have the ability to do a cmpxchg_double. It only protects the second

47

* have the ability to do a cmpxchg_double. It only protects the second

48

* double word in the page struct. Meaning

48

* double word in the page struct. Meaning

49

* A. page->freelist -> List of object free in a page

49

* A. page->freelist -> List of object free in a page

50

* B. page->counters -> Counters of objects

50

* B. page->counters -> Counters of objects

51

* C. page->frozen -> frozen state

51

* C. page->frozen -> frozen state

52

*

52

*

53

* If a slab is frozen then it is exempt from list management. It is not

53

* If a slab is frozen then it is exempt from list management. It is not

54

* on any list. The processor that froze the slab is the one who can

54

* on any list. The processor that froze the slab is the one who can

55

* perform list operations on the page. Other processors may put objects

55

* perform list operations on the page. Other processors may put objects

56

* onto the freelist but the processor that froze the slab is the only

56

* onto the freelist but the processor that froze the slab is the only

57

* one that can retrieve the objects from the page's freelist.

57

* one that can retrieve the objects from the page's freelist.

58

*

58

*

59

* The list_lock protects the partial and full list on each node and

59

* The list_lock protects the partial and full list on each node and

60

* the partial slab counter. If taken then no new slabs may be added or

60

* the partial slab counter. If taken then no new slabs may be added or

61

* removed from the lists nor make the number of partial slabs be modified.

61

* removed from the lists nor make the number of partial slabs be modified.

62

* (Note that the total number of slabs is an atomic value that may be

62

* (Note that the total number of slabs is an atomic value that may be

63

* modified without taking the list lock).

63

* modified without taking the list lock).

64

*

64

*

65

* The list_lock is a centralized lock and thus we avoid taking it as

65

* The list_lock is a centralized lock and thus we avoid taking it as

66

* much as possible. As long as SLUB does not have to handle partial

66

* much as possible. As long as SLUB does not have to handle partial

67

* slabs, operations can continue without any centralized lock. F.e.

67

* slabs, operations can continue without any centralized lock. F.e.

68

* allocating a long series of objects that fill up slabs does not require

68

* allocating a long series of objects that fill up slabs does not require

69

* the list lock.

69

* the list lock.

70

* Interrupts are disabled during allocation and deallocation in order to

70

* Interrupts are disabled during allocation and deallocation in order to

71

* make the slab allocator safe to use in the context of an irq. In addition

71

* make the slab allocator safe to use in the context of an irq. In addition

72

* interrupts are disabled to ensure that the processor does not change

72

* interrupts are disabled to ensure that the processor does not change

73

* while handling per_cpu slabs, due to kernel preemption.

73

* while handling per_cpu slabs, due to kernel preemption.

74

*

74

*

75

* SLUB assigns one slab for allocation to each processor.

75

* SLUB assigns one slab for allocation to each processor.

76

* Allocations only occur from these slabs called cpu slabs.

76

* Allocations only occur from these slabs called cpu slabs.

77

*

77

*

78

* Slabs with free elements are kept on a partial list and during regular

78

* Slabs with free elements are kept on a partial list and during regular

79

* operations no list for full slabs is used. If an object in a full slab is

79

* operations no list for full slabs is used. If an object in a full slab is

80

* freed then the slab will show up again on the partial lists.

80

* freed then the slab will show up again on the partial lists.

81

* We track full slabs for debugging purposes though because otherwise we

81

* We track full slabs for debugging purposes though because otherwise we

82

* cannot scan all objects.

82

* cannot scan all objects.

83

*

83

*

84

* Slabs are freed when they become empty. Teardown and setup is

84

* Slabs are freed when they become empty. Teardown and setup is

85

* minimal so we rely on the page allocators per cpu caches for

85

* minimal so we rely on the page allocators per cpu caches for

86

* fast frees and allocs.

86

* fast frees and allocs.

87

*

87

*

88

* Overloading of page flags that are otherwise used for LRU management.

88

* Overloading of page flags that are otherwise used for LRU management.

89

*

89

*

90

* PageActive The slab is frozen and exempt from list processing.

90

* PageActive The slab is frozen and exempt from list processing.

91

* This means that the slab is dedicated to a purpose

91

* This means that the slab is dedicated to a purpose

92

* such as satisfying allocations for a specific

92

* such as satisfying allocations for a specific

93

* processor. Objects may be freed in the slab while

93

* processor. Objects may be freed in the slab while

94

* it is frozen but slab_free will then skip the usual

94

* it is frozen but slab_free will then skip the usual

95

* list operations. It is up to the processor holding

95

* list operations. It is up to the processor holding

96

* the slab to integrate the slab into the slab lists

96

* the slab to integrate the slab into the slab lists

97

* when the slab is no longer needed.

97

* when the slab is no longer needed.

98

*

98

*

99

* One use of this flag is to mark slabs that are

99

* One use of this flag is to mark slabs that are

100

* used for allocations. Then such a slab becomes a cpu

100

* used for allocations. Then such a slab becomes a cpu

101

* slab. The cpu slab may be equipped with an additional

101

* slab. The cpu slab may be equipped with an additional

102

* freelist that allows lockless access to

102

* freelist that allows lockless access to

103

* free objects in addition to the regular freelist

103

* free objects in addition to the regular freelist

104

* that requires the slab lock.

104

* that requires the slab lock.

105

*

105

*

106

* PageError Slab requires special handling due to debug

106

* PageError Slab requires special handling due to debug

107

* options set. This moves slab handling out of

107

* options set. This moves slab handling out of

108

* the fast path and disables lockless freelists.

108

* the fast path and disables lockless freelists.

109

*/

109

*/

110

111

#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \

111

#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \

112

SLAB_TRACE | SLAB_DEBUG_FREE)

112

SLAB_TRACE | SLAB_DEBUG_FREE)

113

114

static inline int kmem_cache_debug(struct kmem_cache *s)

114

static inline int kmem_cache_debug(struct kmem_cache *s)

115

{

115

{

116

#ifdef CONFIG_SLUB_DEBUG

116

#ifdef CONFIG_SLUB_DEBUG

117

return unlikely(s->flags & SLAB_DEBUG_FLAGS);

117

return unlikely(s->flags & SLAB_DEBUG_FLAGS);

118

#else

118

#else

119

return 0;

119

return 0;

120

#endif

120

#endif

121

}

121

}

122

123

/*

123

/*

124

* Issues still to be resolved:

124

* Issues still to be resolved:

125

*

125

*

126

* - Support PAGE_ALLOC_DEBUG. Should be easy to do.

126

* - Support PAGE_ALLOC_DEBUG. Should be easy to do.

127

*

127

*

128

* - Variable sizing of the per node arrays

128

* - Variable sizing of the per node arrays

129

*/

129

*/

130

131

/* Enable to test recovery from slab corruption on boot */

131

/* Enable to test recovery from slab corruption on boot */

132

#undef SLUB_RESILIENCY_TEST

132

#undef SLUB_RESILIENCY_TEST

133

134

/* Enable to log cmpxchg failures */

134

/* Enable to log cmpxchg failures */

135

#undef SLUB_DEBUG_CMPXCHG

135

#undef SLUB_DEBUG_CMPXCHG

136

137

/*

137

/*

138

* Mininum number of partial slabs. These will be left on the partial

138

* Mininum number of partial slabs. These will be left on the partial

139

* lists even if they are empty. kmem_cache_shrink may reclaim them.

139

* lists even if they are empty. kmem_cache_shrink may reclaim them.

140

*/

140

*/

141

#define MIN_PARTIAL 5

141

#define MIN_PARTIAL 5

142

143

/*

143

/*

144

* Maximum number of desirable partial slabs.

144

* Maximum number of desirable partial slabs.

145

* The existence of more partial slabs makes kmem_cache_shrink

145

* The existence of more partial slabs makes kmem_cache_shrink

146

* sort the partial list by the number of objects in the.

146

* sort the partial list by the number of objects in the.

147

*/

147

*/

148

#define MAX_PARTIAL 10

148

#define MAX_PARTIAL 10

149

150

#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \

150

#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \

151

SLAB_POISON | SLAB_STORE_USER)

151

SLAB_POISON | SLAB_STORE_USER)

152

153

/*

153

/*

154

* Debugging flags that require metadata to be stored in the slab. These get

154

* Debugging flags that require metadata to be stored in the slab. These get

155

* disabled when slub_debug=O is used and a cache's min order increases with

155

* disabled when slub_debug=O is used and a cache's min order increases with

156

* metadata.

156

* metadata.

157

*/

157

*/

158

#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)

158

#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)

159

160

/*

160

/*

161

* Set of flags that will prevent slab merging

161

* Set of flags that will prevent slab merging

162

*/

162

*/

163

#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \

163

#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \

164

SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \

164

SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \

165

SLAB_FAILSLAB)

165

SLAB_FAILSLAB)

166

167

#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \

167

#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \

168

SLAB_CACHE_DMA | SLAB_NOTRACK)

168

SLAB_CACHE_DMA | SLAB_NOTRACK)

169

170

#define OO_SHIFT 16

170

#define OO_SHIFT 16

171

#define OO_MASK ((1 << OO_SHIFT) - 1)

171

#define OO_MASK ((1 << OO_SHIFT) - 1)

172

#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */

172

#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */

173

174

/* Internal SLUB flags */

174

/* Internal SLUB flags */

175

#define __OBJECT_POISON 0x80000000UL /* Poison object */

175

#define __OBJECT_POISON 0x80000000UL /* Poison object */

176

#define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */

176

#define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */

177

178

static int kmem_size = sizeof(struct kmem_cache);

178

static int kmem_size = sizeof(struct kmem_cache);

179

180

#ifdef CONFIG_SMP

180

#ifdef CONFIG_SMP

181

static struct notifier_block slab_notifier;

181

static struct notifier_block slab_notifier;

182

#endif

182

#endif

183

184

static enum {

184

static enum {

185

DOWN, /* No slab functionality available */

185

DOWN, /* No slab functionality available */

186

PARTIAL, /* Kmem_cache_node works */

186

PARTIAL, /* Kmem_cache_node works */

187

UP, /* Everything works but does not show up in sysfs */

187

UP, /* Everything works but does not show up in sysfs */

188

SYSFS /* Sysfs up */

188

SYSFS /* Sysfs up */

189

} slab_state = DOWN;

189

} slab_state = DOWN;

190

191

/* A list of all slab caches on the system */

191

/* A list of all slab caches on the system */

192

static DECLARE_RWSEM(slub_lock);

192

static DECLARE_RWSEM(slub_lock);

193

static LIST_HEAD(slab_caches);

193

static LIST_HEAD(slab_caches);

194

195

/*

195

/*

196

* Tracking user of a slab.

196

* Tracking user of a slab.

197

*/

197

*/

198

#define TRACK_ADDRS_COUNT 16

198

#define TRACK_ADDRS_COUNT 16

199

struct track {

199

struct track {

200

unsigned long addr; /* Called from address */

200

unsigned long addr; /* Called from address */

201

#ifdef CONFIG_STACKTRACE

201

#ifdef CONFIG_STACKTRACE

202

unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */

202

unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */

203

#endif

203

#endif

204

int cpu; /* Was running on cpu */

204

int cpu; /* Was running on cpu */

205

int pid; /* Pid context */

205

int pid; /* Pid context */

206

unsigned long when; /* When did the operation occur */

206

unsigned long when; /* When did the operation occur */

207

};

207

};

208

209

enum track_item { TRACK_ALLOC, TRACK_FREE };

209

enum track_item { TRACK_ALLOC, TRACK_FREE };

210

211

#ifdef CONFIG_SYSFS

211

#ifdef CONFIG_SYSFS

212

static int sysfs_slab_add(struct kmem_cache *);

212

static int sysfs_slab_add(struct kmem_cache *);

213

static int sysfs_slab_alias(struct kmem_cache *, const char *);

213

static int sysfs_slab_alias(struct kmem_cache *, const char *);

214

static void sysfs_slab_remove(struct kmem_cache *);

214

static void sysfs_slab_remove(struct kmem_cache *);

215

216

#else

216

#else

217

static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }

217

static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }

218

static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)

218

static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)

219

{ return 0; }

219

{ return 0; }

220

static inline void sysfs_slab_remove(struct kmem_cache *s)

220

static inline void sysfs_slab_remove(struct kmem_cache *s)

221

{

221

{

222

kfree(s->name);

222

kfree(s->name);

223

kfree(s);

223

kfree(s);

224

}

224

}

225

226

#endif

226

#endif

227

228

static inline void stat(const struct kmem_cache *s, enum stat_item si)

228

static inline void stat(const struct kmem_cache *s, enum stat_item si)

229

{

229

{

230

#ifdef CONFIG_SLUB_STATS

230

#ifdef CONFIG_SLUB_STATS

231

__this_cpu_inc(s->cpu_slab->stat[si]);

231

__this_cpu_inc(s->cpu_slab->stat[si]);

232

#endif

232

#endif

233

}

233

}

234

235

/********************************************************************

235

/********************************************************************

236

* Core slab cache functions

236

* Core slab cache functions

237

*******************************************************************/

237

*******************************************************************/

238

239

int slab_is_available(void)

239

int slab_is_available(void)

240

{

240

{

241

return slab_state >= UP;

241

return slab_state >= UP;

242

}

242

}

243

244

static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)

244

static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)

245

{

245

{

246

return s->node[node];

246

return s->node[node];

247

}

247

}

248

249

/* Verify that a pointer has an address that is valid within a slab page */

249

/* Verify that a pointer has an address that is valid within a slab page */

250

static inline int check_valid_pointer(struct kmem_cache *s,

250

static inline int check_valid_pointer(struct kmem_cache *s,

251

struct page *page, const void *object)

251

struct page *page, const void *object)

252

{

252

{

253

void *base;

253

void *base;

254

255

if (!object)

255

if (!object)

256

return 1;

256

return 1;

257

258

base = page_address(page);

258

base = page_address(page);

259

if (object < base || object >= base + page->objects * s->size ||

259

if (object < base || object >= base + page->objects * s->size ||

260

(object - base) % s->size) {

260

(object - base) % s->size) {

261

return 0;

261

return 0;

262

}

262

}

263

264

return 1;

264

return 1;

265

}

265

}

266

267

static inline void *get_freepointer(struct kmem_cache *s, void *object)

267

static inline void *get_freepointer(struct kmem_cache *s, void *object)

268

{

268

{

269

return *(void **)(object + s->offset);

269

return *(void **)(object + s->offset);

270

}

270

}

271

272

static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)

272

static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)

273

{

273

{

274

void *p;

274

void *p;

275

276

#ifdef CONFIG_DEBUG_PAGEALLOC

276

#ifdef CONFIG_DEBUG_PAGEALLOC

277

probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));

277

probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));

278

#else

278

#else

279

p = get_freepointer(s, object);

279

p = get_freepointer(s, object);

280

#endif

280

#endif

281

return p;

281

return p;

282

}

282

}

283

284

static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)

284

static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)

285

{

285

{

286

*(void **)(object + s->offset) = fp;

286

*(void **)(object + s->offset) = fp;

287

}

287

}

288

289

/* Loop over all objects in a slab */

289

/* Loop over all objects in a slab */

290

#define for_each_object(__p, __s, __addr, __objects) \

290

#define for_each_object(__p, __s, __addr, __objects) \

291

for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\

291

for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\

292

__p += (__s)->size)

292

__p += (__s)->size)

293

294

/* Determine object index from a given position */

294

/* Determine object index from a given position */

295

static inline int slab_index(void *p, struct kmem_cache *s, void *addr)

295

static inline int slab_index(void *p, struct kmem_cache *s, void *addr)

296

{

296

{

297

return (p - addr) / s->size;

297

return (p - addr) / s->size;

298

}

298

}

299

300

static inline size_t slab_ksize(const struct kmem_cache *s)

300

static inline size_t slab_ksize(const struct kmem_cache *s)

301

{

301

{

302

#ifdef CONFIG_SLUB_DEBUG

302

#ifdef CONFIG_SLUB_DEBUG

303

/*

303

/*

304

* Debugging requires use of the padding between object

304

* Debugging requires use of the padding between object

305

* and whatever may come after it.

305

* and whatever may come after it.

306

*/

306

*/

307

if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))

307

if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))

308

return s->objsize;

308

return s->objsize;

309

310

#endif

310

#endif

311

/*

311

/*

312

* If we have the need to store the freelist pointer

312

* If we have the need to store the freelist pointer

313

* back there or track user information then we can

313

* back there or track user information then we can

314

* only use the space before that information.

314

* only use the space before that information.

315

*/

315

*/

316

if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))

316

if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))

317

return s->inuse;

317

return s->inuse;

318

/*

318

/*

319

* Else we can use all the padding etc for the allocation

319

* Else we can use all the padding etc for the allocation

320

*/

320

*/

321

return s->size;

321

return s->size;

322

}

322

}

323

324

static inline int order_objects(int order, unsigned long size, int reserved)

324

static inline int order_objects(int order, unsigned long size, int reserved)

325

{

325

{

326

return ((PAGE_SIZE << order) - reserved) / size;

326

return ((PAGE_SIZE << order) - reserved) / size;

327

}

327

}

328

329

static inline struct kmem_cache_order_objects oo_make(int order,

329

static inline struct kmem_cache_order_objects oo_make(int order,

330

unsigned long size, int reserved)

330

unsigned long size, int reserved)

331

{

331

{

332

struct kmem_cache_order_objects x = {

332

struct kmem_cache_order_objects x = {

333

(order << OO_SHIFT) + order_objects(order, size, reserved)

333

(order << OO_SHIFT) + order_objects(order, size, reserved)

334

};

334

};

335

336

return x;

336

return x;

337

}

337

}

338

339

static inline int oo_order(struct kmem_cache_order_objects x)

339

static inline int oo_order(struct kmem_cache_order_objects x)

340

{

340

{

341

return x.x >> OO_SHIFT;

341

return x.x >> OO_SHIFT;

342

}

342

}

343

344

static inline int oo_objects(struct kmem_cache_order_objects x)

344

static inline int oo_objects(struct kmem_cache_order_objects x)

345

{

345

{

346

return x.x & OO_MASK;

346

return x.x & OO_MASK;

347

}

347

}

348

349

/*

349

/*

350

* Per slab locking using the pagelock

350

* Per slab locking using the pagelock

351

*/

351

*/

352

static __always_inline void slab_lock(struct page *page)

352

static __always_inline void slab_lock(struct page *page)

353

{

353

{

354

bit_spin_lock(PG_locked, &page->flags);

354

bit_spin_lock(PG_locked, &page->flags);

355

}

355

}

356

357

static __always_inline void slab_unlock(struct page *page)

357

static __always_inline void slab_unlock(struct page *page)

358

{

358

{

359

__bit_spin_unlock(PG_locked, &page->flags);

359

__bit_spin_unlock(PG_locked, &page->flags);

360

}

360

}

361

362

/* Interrupts must be disabled (for the fallback code to work right) */

362

/* Interrupts must be disabled (for the fallback code to work right) */

363

static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,

363

static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,

364

void *freelist_old, unsigned long counters_old,

364

void *freelist_old, unsigned long counters_old,

365

void *freelist_new, unsigned long counters_new,

365

void *freelist_new, unsigned long counters_new,

366

const char *n)

366

const char *n)

367

{

367

{

368

VM_BUG_ON(!irqs_disabled());

368

VM_BUG_ON(!irqs_disabled());

369

#ifdef CONFIG_CMPXCHG_DOUBLE

369

#ifdef CONFIG_CMPXCHG_DOUBLE

370

if (s->flags & __CMPXCHG_DOUBLE) {

370

if (s->flags & __CMPXCHG_DOUBLE) {

371

if (cmpxchg_double(&page->freelist,

371

if (cmpxchg_double(&page->freelist,

372

freelist_old, counters_old,

372

freelist_old, counters_old,

373

freelist_new, counters_new))

373

freelist_new, counters_new))

374

return 1;

374

return 1;

375

} else

375

} else

376

#endif

376

#endif

377

{

377

{

378

slab_lock(page);

378

slab_lock(page);

379

if (page->freelist == freelist_old && page->counters == counters_old) {

379

if (page->freelist == freelist_old && page->counters == counters_old) {

380

page->freelist = freelist_new;

380

page->freelist = freelist_new;

381

page->counters = counters_new;

381

page->counters = counters_new;

382

slab_unlock(page);

382

slab_unlock(page);

383

return 1;

383

return 1;

384

}

384

}

385

slab_unlock(page);

385

slab_unlock(page);

386

}

386

}

387

388

cpu_relax();

388

cpu_relax();

389

stat(s, CMPXCHG_DOUBLE_FAIL);

389

stat(s, CMPXCHG_DOUBLE_FAIL);

390

391

#ifdef SLUB_DEBUG_CMPXCHG

391

#ifdef SLUB_DEBUG_CMPXCHG

392

printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);

392

printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);

393

#endif

393

#endif

394

395

return 0;

395

return 0;

396

}

396

}

397

398

static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,

398

static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,

399

void *freelist_old, unsigned long counters_old,

399

void *freelist_old, unsigned long counters_old,

400

void *freelist_new, unsigned long counters_new,

400

void *freelist_new, unsigned long counters_new,

401

const char *n)

401

const char *n)

402

{

402

{

403

#ifdef CONFIG_CMPXCHG_DOUBLE

403

#ifdef CONFIG_CMPXCHG_DOUBLE

404

if (s->flags & __CMPXCHG_DOUBLE) {

404

if (s->flags & __CMPXCHG_DOUBLE) {

405

if (cmpxchg_double(&page->freelist,

405

if (cmpxchg_double(&page->freelist,

406

freelist_old, counters_old,

406

freelist_old, counters_old,

407

freelist_new, counters_new))

407

freelist_new, counters_new))

408

return 1;

408

return 1;

409

} else

409

} else

410

#endif

410

#endif

411

{

411

{

412

unsigned long flags;

412

unsigned long flags;

413

414

local_irq_save(flags);

414

local_irq_save(flags);

415

slab_lock(page);

415

slab_lock(page);

416

if (page->freelist == freelist_old && page->counters == counters_old) {

416

if (page->freelist == freelist_old && page->counters == counters_old) {

417

page->freelist = freelist_new;

417

page->freelist = freelist_new;

418

page->counters = counters_new;

418

page->counters = counters_new;

419

slab_unlock(page);

419

slab_unlock(page);

420

local_irq_restore(flags);

420

local_irq_restore(flags);

421

return 1;

421

return 1;

422

}

422

}

423

slab_unlock(page);

423

slab_unlock(page);

424

local_irq_restore(flags);

424

local_irq_restore(flags);

425

}

425

}

426

427

cpu_relax();

427

cpu_relax();

428

stat(s, CMPXCHG_DOUBLE_FAIL);

428

stat(s, CMPXCHG_DOUBLE_FAIL);

429

430

#ifdef SLUB_DEBUG_CMPXCHG

430

#ifdef SLUB_DEBUG_CMPXCHG

431

printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);

431

printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);

432

#endif

432

#endif

433

434

return 0;

434

return 0;

435

}

435

}

436

437

#ifdef CONFIG_SLUB_DEBUG

437

#ifdef CONFIG_SLUB_DEBUG

438

/*

438

/*

439

* Determine a map of object in use on a page.

439

* Determine a map of object in use on a page.

440

*

440

*

441

* Node listlock must be held to guarantee that the page does

441

* Node listlock must be held to guarantee that the page does

442

* not vanish from under us.

442

* not vanish from under us.

443

*/

443

*/

444

static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)

444

static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)

445

{

445

{

446

void *p;

446

void *p;

447

void *addr = page_address(page);

447

void *addr = page_address(page);

448

449

for (p = page->freelist; p; p = get_freepointer(s, p))

449

for (p = page->freelist; p; p = get_freepointer(s, p))

450

set_bit(slab_index(p, s, addr), map);

450

set_bit(slab_index(p, s, addr), map);

451

}

451

}

452

453

/*

453

/*

454

* Debug settings:

454

* Debug settings:

455

*/

455

*/

456

#ifdef CONFIG_SLUB_DEBUG_ON

456

#ifdef CONFIG_SLUB_DEBUG_ON

457

static int slub_debug = DEBUG_DEFAULT_FLAGS;

457

static int slub_debug = DEBUG_DEFAULT_FLAGS;

458

#else

458

#else

459

static int slub_debug;

459

static int slub_debug;

460

#endif

460

#endif

461

462

static char *slub_debug_slabs;

462

static char *slub_debug_slabs;

463

static int disable_higher_order_debug;

463

static int disable_higher_order_debug;

464

465

/*

465

/*

466

* Object debugging

466

* Object debugging

467

*/

467

*/

468

static void print_section(char *text, u8 *addr, unsigned int length)

468

static void print_section(char *text, u8 *addr, unsigned int length)

469

{

469

{

470

print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,

470

print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,

471

length, 1);

471

length, 1);

472

}

472

}

473

474

static struct track *get_track(struct kmem_cache *s, void *object,

474

static struct track *get_track(struct kmem_cache *s, void *object,

475

enum track_item alloc)

475

enum track_item alloc)

476

{

476

{

477

struct track *p;

477

struct track *p;

478

479

if (s->offset)

479

if (s->offset)

480

p = object + s->offset + sizeof(void *);

480

p = object + s->offset + sizeof(void *);

481

else

481

else

482

p = object + s->inuse;

482

p = object + s->inuse;

483

484

return p + alloc;

484

return p + alloc;

485

}

485

}

486

487

static void set_track(struct kmem_cache *s, void *object,

487

static void set_track(struct kmem_cache *s, void *object,

488

enum track_item alloc, unsigned long addr)

488

enum track_item alloc, unsigned long addr)

489

{

489

{

490

struct track *p = get_track(s, object, alloc);

490

struct track *p = get_track(s, object, alloc);

491

492

if (addr) {

492

if (addr) {

493

#ifdef CONFIG_STACKTRACE

493

#ifdef CONFIG_STACKTRACE

494

struct stack_trace trace;

494

struct stack_trace trace;

495

int i;

495

int i;

496

497

trace.nr_entries = 0;

497

trace.nr_entries = 0;

498

trace.max_entries = TRACK_ADDRS_COUNT;

498

trace.max_entries = TRACK_ADDRS_COUNT;

499

trace.entries = p->addrs;

499

trace.entries = p->addrs;

500

trace.skip = 3;

500

trace.skip = 3;

501

save_stack_trace(&trace);

501

save_stack_trace(&trace);

502

503

/* See rant in lockdep.c */

503

/* See rant in lockdep.c */

504

if (trace.nr_entries != 0 &&

504

if (trace.nr_entries != 0 &&

505

trace.entries[trace.nr_entries - 1] == ULONG_MAX)

505

trace.entries[trace.nr_entries - 1] == ULONG_MAX)

506

trace.nr_entries--;

506

trace.nr_entries--;

507

508

for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)

508

for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)

509

p->addrs[i] = 0;

509

p->addrs[i] = 0;

510

#endif

510

#endif

511

p->addr = addr;

511

p->addr = addr;

512

p->cpu = smp_processor_id();

512

p->cpu = smp_processor_id();

513

p->pid = current->pid;

513

p->pid = current->pid;

514

p->when = jiffies;

514

p->when = jiffies;

515

} else

515

} else

516

memset(p, 0, sizeof(struct track));

516

memset(p, 0, sizeof(struct track));

517

}

517

}

518

519

static void init_tracking(struct kmem_cache *s, void *object)

519

static void init_tracking(struct kmem_cache *s, void *object)

520

{

520

{

521

if (!(s->flags & SLAB_STORE_USER))

521

if (!(s->flags & SLAB_STORE_USER))

522

return;

522

return;

523

524

set_track(s, object, TRACK_FREE, 0UL);

524

set_track(s, object, TRACK_FREE, 0UL);

525

set_track(s, object, TRACK_ALLOC, 0UL);

525

set_track(s, object, TRACK_ALLOC, 0UL);

526

}

526

}

527

528

static void print_track(const char *s, struct track *t)

528

static void print_track(const char *s, struct track *t)

529

{

529

{

530

if (!t->addr)

530

if (!t->addr)

531

return;

531

return;

532

533

printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",

533

printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",

534

s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);

534

s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);

535

#ifdef CONFIG_STACKTRACE

535

#ifdef CONFIG_STACKTRACE

536

{

536

{

537

int i;

537

int i;

538

for (i = 0; i < TRACK_ADDRS_COUNT; i++)

538

for (i = 0; i < TRACK_ADDRS_COUNT; i++)

539

if (t->addrs[i])

539

if (t->addrs[i])

540

printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);

540

printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);

541

else

541

else

542

break;

542

break;

543

}

543

}

544

#endif

544

#endif

545

}

545

}

546

547

static void print_tracking(struct kmem_cache *s, void *object)

547

static void print_tracking(struct kmem_cache *s, void *object)

548

{

548

{

549

if (!(s->flags & SLAB_STORE_USER))

549

if (!(s->flags & SLAB_STORE_USER))

550

return;

550

return;

551

552

print_track("Allocated", get_track(s, object, TRACK_ALLOC));

552

print_track("Allocated", get_track(s, object, TRACK_ALLOC));

553

print_track("Freed", get_track(s, object, TRACK_FREE));

553

print_track("Freed", get_track(s, object, TRACK_FREE));

554

}

554

}

555

556

static void print_page_info(struct page *page)

556

static void print_page_info(struct page *page)

557

{

557

{

558

printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",

558

printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",

559

page, page->objects, page->inuse, page->freelist, page->flags);

559

page, page->objects, page->inuse, page->freelist, page->flags);

560

561

}

561

}

562

563

static void slab_bug(struct kmem_cache *s, char *fmt, ...)

563

static void slab_bug(struct kmem_cache *s, char *fmt, ...)

564

{

564

{

565

va_list args;

565

va_list args;

566

char buf[100];

566

char buf[100];

567

568

va_start(args, fmt);

568

va_start(args, fmt);

569

vsnprintf(buf, sizeof(buf), fmt, args);

569

vsnprintf(buf, sizeof(buf), fmt, args);

570

va_end(args);

570

va_end(args);

571

printk(KERN_ERR "========================================"

571

printk(KERN_ERR "========================================"

572

"=====================================\n");

572

"=====================================\n");

573

printk(KERN_ERR "BUG %s: %s\n", s->name, buf);

573

printk(KERN_ERR "BUG %s: %s\n", s->name, buf);

574

printk(KERN_ERR "----------------------------------------"

574

printk(KERN_ERR "----------------------------------------"

575

"-------------------------------------\n\n");

575

"-------------------------------------\n\n");

576

}

576

}

577

578

static void slab_fix(struct kmem_cache *s, char *fmt, ...)

578

static void slab_fix(struct kmem_cache *s, char *fmt, ...)

579

{

579

{

580

va_list args;

580

va_list args;

581

char buf[100];

581

char buf[100];

582

583

va_start(args, fmt);

583

va_start(args, fmt);

584

vsnprintf(buf, sizeof(buf), fmt, args);

584

vsnprintf(buf, sizeof(buf), fmt, args);

585

va_end(args);

585

va_end(args);

586

printk(KERN_ERR "FIX %s: %s\n", s->name, buf);

586

printk(KERN_ERR "FIX %s: %s\n", s->name, buf);

587

}

587

}

588

589

static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)

589

static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)

590

{

590

{

591

unsigned int off; /* Offset of last byte */

591

unsigned int off; /* Offset of last byte */

592

u8 *addr = page_address(page);

592

u8 *addr = page_address(page);

593

594

print_tracking(s, p);

594

print_tracking(s, p);

595

596

print_page_info(page);

596

print_page_info(page);

597

598

printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",

598

printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",

599

p, p - addr, get_freepointer(s, p));

599

p, p - addr, get_freepointer(s, p));

600

601

if (p > addr + 16)

601

if (p > addr + 16)

602

print_section("Bytes b4 ", p - 16, 16);

602

print_section("Bytes b4 ", p - 16, 16);

603

604

print_section("Object ", p, min_t(unsigned long, s->objsize,

604

print_section("Object ", p, min_t(unsigned long, s->objsize,

605

PAGE_SIZE));

605

PAGE_SIZE));

606

if (s->flags & SLAB_RED_ZONE)

606

if (s->flags & SLAB_RED_ZONE)

607

print_section("Redzone ", p + s->objsize,

607

print_section("Redzone ", p + s->objsize,

608

s->inuse - s->objsize);

608

s->inuse - s->objsize);

609

610

if (s->offset)

610

if (s->offset)

611

off = s->offset + sizeof(void *);

611

off = s->offset + sizeof(void *);

612

else

612

else

613

off = s->inuse;

613

off = s->inuse;

614

615

if (s->flags & SLAB_STORE_USER)

615

if (s->flags & SLAB_STORE_USER)

616

off += 2 * sizeof(struct track);

616

off += 2 * sizeof(struct track);

617

618

if (off != s->size)

618

if (off != s->size)

619

/* Beginning of the filler is the free pointer */

619

/* Beginning of the filler is the free pointer */

620

print_section("Padding ", p + off, s->size - off);

620

print_section("Padding ", p + off, s->size - off);

621

622

dump_stack();

622

dump_stack();

623

}

623

}

624

625

static void object_err(struct kmem_cache *s, struct page *page,

625

static void object_err(struct kmem_cache *s, struct page *page,

626

u8 *object, char *reason)

626

u8 *object, char *reason)

627

{

627

{

628

slab_bug(s, "%s", reason);

628

slab_bug(s, "%s", reason);

629

print_trailer(s, page, object);

629

print_trailer(s, page, object);

630

}

630

}

631

632

static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)

632

static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)

633

{

633

{

634

va_list args;

634

va_list args;

635

char buf[100];

635

char buf[100];

636

637

va_start(args, fmt);

637

va_start(args, fmt);

638

vsnprintf(buf, sizeof(buf), fmt, args);

638

vsnprintf(buf, sizeof(buf), fmt, args);

639

va_end(args);

639

va_end(args);

640

slab_bug(s, "%s", buf);

640

slab_bug(s, "%s", buf);

641

print_page_info(page);

641

print_page_info(page);

642

dump_stack();

642

dump_stack();

643

}

643

}

644

645

static void init_object(struct kmem_cache *s, void *object, u8 val)

645

static void init_object(struct kmem_cache *s, void *object, u8 val)

646

{

646

{

647

u8 *p = object;

647

u8 *p = object;

648

649

if (s->flags & __OBJECT_POISON) {

649

if (s->flags & __OBJECT_POISON) {

650

memset(p, POISON_FREE, s->objsize - 1);

650

memset(p, POISON_FREE, s->objsize - 1);

651

p[s->objsize - 1] = POISON_END;

651

p[s->objsize - 1] = POISON_END;

652

}

652

}

653

654

if (s->flags & SLAB_RED_ZONE)

654

if (s->flags & SLAB_RED_ZONE)

655

memset(p + s->objsize, val, s->inuse - s->objsize);

655

memset(p + s->objsize, val, s->inuse - s->objsize);

656

}

656

}

657

658

static void restore_bytes(struct kmem_cache *s, char *message, u8 data,

658

static void restore_bytes(struct kmem_cache *s, char *message, u8 data,

659

void *from, void *to)

659

void *from, void *to)

660

{

660

{

661

slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);

661

slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);

662

memset(from, data, to - from);

662

memset(from, data, to - from);

663

}

663

}

664

665

static int check_bytes_and_report(struct kmem_cache *s, struct page *page,

665

static int check_bytes_and_report(struct kmem_cache *s, struct page *page,

666

u8 *object, char *what,

666

u8 *object, char *what,

667

u8 *start, unsigned int value, unsigned int bytes)

667

u8 *start, unsigned int value, unsigned int bytes)

668

{

668

{

669

u8 *fault;

669

u8 *fault;

670

u8 *end;

670

u8 *end;

671

672

fault = memchr_inv(start, value, bytes);

672

fault = memchr_inv(start, value, bytes);

673

if (!fault)

673

if (!fault)

674

return 1;

674

return 1;

675

676

end = start + bytes;

676

end = start + bytes;

677

while (end > fault && end[-1] == value)

677

while (end > fault && end[-1] == value)

678

end--;

678

end--;

679

680

slab_bug(s, "%s overwritten", what);

680

slab_bug(s, "%s overwritten", what);

681

printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",

681

printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",

682

fault, end - 1, fault[0], value);

682

fault, end - 1, fault[0], value);

683

print_trailer(s, page, object);

683

print_trailer(s, page, object);

684

685

restore_bytes(s, what, value, fault, end);

685

restore_bytes(s, what, value, fault, end);

686

return 0;

686

return 0;

687

}

687

}

688

689

/*

689

/*

690

* Object layout:

690

* Object layout:

691

*

691

*

692

* object address

692

* object address

693

* Bytes of the object to be managed.

693

* Bytes of the object to be managed.

694

* If the freepointer may overlay the object then the free

694

* If the freepointer may overlay the object then the free

695

* pointer is the first word of the object.

695

* pointer is the first word of the object.

696

*

696

*

697

* Poisoning uses 0x6b (POISON_FREE) and the last byte is

697

* Poisoning uses 0x6b (POISON_FREE) and the last byte is

698

* 0xa5 (POISON_END)

698

* 0xa5 (POISON_END)

699

*

699

*

700

* object + s->objsize

700

* object + s->objsize

701

* Padding to reach word boundary. This is also used for Redzoning.

701

* Padding to reach word boundary. This is also used for Redzoning.

702

* Padding is extended by another word if Redzoning is enabled and

702

* Padding is extended by another word if Redzoning is enabled and

703

* objsize == inuse.

703

* objsize == inuse.

704

*

704

*

705

* We fill with 0xbb (RED_INACTIVE) for inactive objects and with

705

* We fill with 0xbb (RED_INACTIVE) for inactive objects and with

706

* 0xcc (RED_ACTIVE) for objects in use.

706

* 0xcc (RED_ACTIVE) for objects in use.

707

*

707

*

708

* object + s->inuse

708

* object + s->inuse

709

* Meta data starts here.

709

* Meta data starts here.

710

*

710

*

711

* A. Free pointer (if we cannot overwrite object on free)

711

* A. Free pointer (if we cannot overwrite object on free)

712

* B. Tracking data for SLAB_STORE_USER

712

* B. Tracking data for SLAB_STORE_USER

713

* C. Padding to reach required alignment boundary or at mininum

713

* C. Padding to reach required alignment boundary or at mininum

714

* one word if debugging is on to be able to detect writes

714

* one word if debugging is on to be able to detect writes

715

* before the word boundary.

715

* before the word boundary.

716

*

716

*

717

* Padding is done using 0x5a (POISON_INUSE)

717

* Padding is done using 0x5a (POISON_INUSE)

718

*

718

*

719

* object + s->size

719

* object + s->size

720

* Nothing is used beyond s->size.

720

* Nothing is used beyond s->size.

721

*

721

*

722

* If slabcaches are merged then the objsize and inuse boundaries are mostly

722

* If slabcaches are merged then the objsize and inuse boundaries are mostly

723

* ignored. And therefore no slab options that rely on these boundaries

723

* ignored. And therefore no slab options that rely on these boundaries

724

* may be used with merged slabcaches.

724

* may be used with merged slabcaches.

725

*/

725

*/

726

727

static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)

727

static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)

728

{

728

{

729

unsigned long off = s->inuse; /* The end of info */

729

unsigned long off = s->inuse; /* The end of info */

730

731

if (s->offset)

731

if (s->offset)

732

/* Freepointer is placed after the object. */

732

/* Freepointer is placed after the object. */

733

off += sizeof(void *);

733

off += sizeof(void *);

734

735

if (s->flags & SLAB_STORE_USER)

735

if (s->flags & SLAB_STORE_USER)

736

/* We also have user information there */

736

/* We also have user information there */

737

off += 2 * sizeof(struct track);

737

off += 2 * sizeof(struct track);

738

739

if (s->size == off)

739

if (s->size == off)

740

return 1;

740

return 1;

741

742

return check_bytes_and_report(s, page, p, "Object padding",

742

return check_bytes_and_report(s, page, p, "Object padding",

743

p + off, POISON_INUSE, s->size - off);

743

p + off, POISON_INUSE, s->size - off);

744

}

744

}

745

746

/* Check the pad bytes at the end of a slab page */

746

/* Check the pad bytes at the end of a slab page */

747

static int slab_pad_check(struct kmem_cache *s, struct page *page)

747

static int slab_pad_check(struct kmem_cache *s, struct page *page)

748

{

748

{

749

u8 *start;

749

u8 *start;

750

u8 *fault;

750

u8 *fault;

751

u8 *end;

751

u8 *end;

752

int length;

752

int length;

753

int remainder;

753

int remainder;

754

755

if (!(s->flags & SLAB_POISON))

755

if (!(s->flags & SLAB_POISON))

756

return 1;

756

return 1;

757

758

start = page_address(page);

758

start = page_address(page);

759

length = (PAGE_SIZE << compound_order(page)) - s->reserved;

759

length = (PAGE_SIZE << compound_order(page)) - s->reserved;

760

end = start + length;

760

end = start + length;

761

remainder = length % s->size;

761

remainder = length % s->size;

762

if (!remainder)

762

if (!remainder)

763

return 1;

763

return 1;

764

765

fault = memchr_inv(end - remainder, POISON_INUSE, remainder);

765

fault = memchr_inv(end - remainder, POISON_INUSE, remainder);

766

if (!fault)

766

if (!fault)

767

return 1;

767

return 1;

768

while (end > fault && end[-1] == POISON_INUSE)

768

while (end > fault && end[-1] == POISON_INUSE)

769

end--;

769

end--;

770

771

slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);

771

slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);

772

print_section("Padding ", end - remainder, remainder);

772

print_section("Padding ", end - remainder, remainder);

773

774

restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);

774

restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);

775

return 0;

775

return 0;

776

}

776

}

777

778

static int check_object(struct kmem_cache *s, struct page *page,

778

static int check_object(struct kmem_cache *s, struct page *page,

779

void *object, u8 val)

779

void *object, u8 val)

780

{

780

{

781

u8 *p = object;

781

u8 *p = object;

782

u8 *endobject = object + s->objsize;

782

u8 *endobject = object + s->objsize;

783

784

if (s->flags & SLAB_RED_ZONE) {

784

if (s->flags & SLAB_RED_ZONE) {

785

if (!check_bytes_and_report(s, page, object, "Redzone",

785

if (!check_bytes_and_report(s, page, object, "Redzone",

786

endobject, val, s->inuse - s->objsize))

786

endobject, val, s->inuse - s->objsize))

787

return 0;

787

return 0;

788

} else {

788

} else {

789

if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {

789

if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {

790

check_bytes_and_report(s, page, p, "Alignment padding",

790

check_bytes_and_report(s, page, p, "Alignment padding",

791

endobject, POISON_INUSE, s->inuse - s->objsize);

791

endobject, POISON_INUSE, s->inuse - s->objsize);

792

}

792

}

793

}

793

}

794

795

if (s->flags & SLAB_POISON) {

795

if (s->flags & SLAB_POISON) {

796

if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&

796

if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&

797

(!check_bytes_and_report(s, page, p, "Poison", p,

797

(!check_bytes_and_report(s, page, p, "Poison", p,

798

POISON_FREE, s->objsize - 1) ||

798

POISON_FREE, s->objsize - 1) ||

799

!check_bytes_and_report(s, page, p, "Poison",

799

!check_bytes_and_report(s, page, p, "Poison",

800

p + s->objsize - 1, POISON_END, 1)))

800

p + s->objsize - 1, POISON_END, 1)))

801

return 0;

801

return 0;

802

/*

802

/*

803

* check_pad_bytes cleans up on its own.

803

* check_pad_bytes cleans up on its own.

804

*/

804

*/

805

check_pad_bytes(s, page, p);

805

check_pad_bytes(s, page, p);

806

}

806

}

807

808

if (!s->offset && val == SLUB_RED_ACTIVE)

808

if (!s->offset && val == SLUB_RED_ACTIVE)

809

/*

809

/*

810

* Object and freepointer overlap. Cannot check

810

* Object and freepointer overlap. Cannot check

811

* freepointer while object is allocated.

811

* freepointer while object is allocated.

812

*/

812

*/

813

return 1;

813

return 1;

814

815

/* Check free pointer validity */

815

/* Check free pointer validity */

816

if (!check_valid_pointer(s, page, get_freepointer(s, p))) {

816

if (!check_valid_pointer(s, page, get_freepointer(s, p))) {

817

object_err(s, page, p, "Freepointer corrupt");

817

object_err(s, page, p, "Freepointer corrupt");

818

/*

818

/*

819

* No choice but to zap it and thus lose the remainder

819

* No choice but to zap it and thus lose the remainder

820

* of the free objects in this slab. May cause

820

* of the free objects in this slab. May cause

821

* another error because the object count is now wrong.

821

* another error because the object count is now wrong.

822

*/

822

*/

823

set_freepointer(s, p, NULL);

823

set_freepointer(s, p, NULL);

824

return 0;

824

return 0;

825

}

825

}

826

return 1;

826

return 1;

827

}

827

}

828

829

static int check_slab(struct kmem_cache *s, struct page *page)

829

static int check_slab(struct kmem_cache *s, struct page *page)

830

{

830

{

831

int maxobj;

831

int maxobj;

832

833

VM_BUG_ON(!irqs_disabled());

833

VM_BUG_ON(!irqs_disabled());

834

835

if (!PageSlab(page)) {

835

if (!PageSlab(page)) {

836

slab_err(s, page, "Not a valid slab page");

836

slab_err(s, page, "Not a valid slab page");

837

return 0;

837

return 0;

838

}

838

}

839

840

maxobj = order_objects(compound_order(page), s->size, s->reserved);

840

maxobj = order_objects(compound_order(page), s->size, s->reserved);

841

if (page->objects > maxobj) {

841

if (page->objects > maxobj) {

842

slab_err(s, page, "objects %u > max %u",

842

slab_err(s, page, "objects %u > max %u",

843

s->name, page->objects, maxobj);

843

s->name, page->objects, maxobj);

844

return 0;

844

return 0;

845

}

845

}

846

if (page->inuse > page->objects) {

846

if (page->inuse > page->objects) {

847

slab_err(s, page, "inuse %u > max %u",

847

slab_err(s, page, "inuse %u > max %u",

848

s->name, page->inuse, page->objects);

848

s->name, page->inuse, page->objects);

849

return 0;

849

return 0;

850

}

850

}

851

/* Slab_pad_check fixes things up after itself */

851

/* Slab_pad_check fixes things up after itself */

852

slab_pad_check(s, page);

852

slab_pad_check(s, page);

853

return 1;

853

return 1;

854

}

854

}

855

856

/*

856

/*

857

* Determine if a certain object on a page is on the freelist. Must hold the

857

* Determine if a certain object on a page is on the freelist. Must hold the

858

* slab lock to guarantee that the chains are in a consistent state.

858

* slab lock to guarantee that the chains are in a consistent state.

859

*/

859

*/

860

static int on_freelist(struct kmem_cache *s, struct page *page, void *search)

860

static int on_freelist(struct kmem_cache *s, struct page *page, void *search)

861

{

861

{

862

int nr = 0;

862

int nr = 0;

863

void *fp;

863

void *fp;

864

void *object = NULL;

864

void *object = NULL;

865

unsigned long max_objects;

865

unsigned long max_objects;

866

867

fp = page->freelist;

867

fp = page->freelist;

868

while (fp && nr <= page->objects) {

868

while (fp && nr <= page->objects) {

869

if (fp == search)

869

if (fp == search)

870

return 1;

870

return 1;

871

if (!check_valid_pointer(s, page, fp)) {

871

if (!check_valid_pointer(s, page, fp)) {

872

if (object) {

872

if (object) {

873

object_err(s, page, object,

873

object_err(s, page, object,

874

"Freechain corrupt");

874

"Freechain corrupt");

875

set_freepointer(s, object, NULL);

875

set_freepointer(s, object, NULL);

876

break;

876

break;

877

} else {

877

} else {

878

slab_err(s, page, "Freepointer corrupt");

878

slab_err(s, page, "Freepointer corrupt");

879

page->freelist = NULL;

879

page->freelist = NULL;

880

page->inuse = page->objects;

880

page->inuse = page->objects;

881

slab_fix(s, "Freelist cleared");

881

slab_fix(s, "Freelist cleared");

882

return 0;

882

return 0;

883

}

883

}

884

break;

884

break;

885

}

885

}

886

object = fp;

886

object = fp;

887

fp = get_freepointer(s, object);

887

fp = get_freepointer(s, object);

888

nr++;

888

nr++;

889

}

889

}

890

891

max_objects = order_objects(compound_order(page), s->size, s->reserved);

891

max_objects = order_objects(compound_order(page), s->size, s->reserved);

892

if (max_objects > MAX_OBJS_PER_PAGE)

892

if (max_objects > MAX_OBJS_PER_PAGE)

893

max_objects = MAX_OBJS_PER_PAGE;

893

max_objects = MAX_OBJS_PER_PAGE;

894

895

if (page->objects != max_objects) {

895

if (page->objects != max_objects) {

896

slab_err(s, page, "Wrong number of objects. Found %d but "

896

slab_err(s, page, "Wrong number of objects. Found %d but "

897

"should be %d", page->objects, max_objects);

897

"should be %d", page->objects, max_objects);

898

page->objects = max_objects;

898

page->objects = max_objects;

899

slab_fix(s, "Number of objects adjusted.");

899

slab_fix(s, "Number of objects adjusted.");

900

}

900

}

901

if (page->inuse != page->objects - nr) {

901

if (page->inuse != page->objects - nr) {

902

slab_err(s, page, "Wrong object count. Counter is %d but "

902

slab_err(s, page, "Wrong object count. Counter is %d but "

903

"counted were %d", page->inuse, page->objects - nr);

903

"counted were %d", page->inuse, page->objects - nr);

904

page->inuse = page->objects - nr;

904

page->inuse = page->objects - nr;

905

slab_fix(s, "Object count adjusted.");

905

slab_fix(s, "Object count adjusted.");

906

}

906

}

907

return search == NULL;

907

return search == NULL;

908

}

908

}

909

910

static void trace(struct kmem_cache *s, struct page *page, void *object,

910

static void trace(struct kmem_cache *s, struct page *page, void *object,

911

int alloc)

911

int alloc)

912

{

912

{

913

if (s->flags & SLAB_TRACE) {

913

if (s->flags & SLAB_TRACE) {

914

printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",

914

printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",

915

s->name,

915

s->name,

916

alloc ? "alloc" : "free",

916

alloc ? "alloc" : "free",

917

object, page->inuse,

917

object, page->inuse,

918

page->freelist);

918

page->freelist);

919

920

if (!alloc)

920

if (!alloc)

921

print_section("Object ", (void *)object, s->objsize);

921

print_section("Object ", (void *)object, s->objsize);

922

923

dump_stack();

923

dump_stack();

924

}

924

}

925

}

925

}

926

927

/*

927

/*

928

* Hooks for other subsystems that check memory allocations. In a typical

928

* Hooks for other subsystems that check memory allocations. In a typical

929

* production configuration these hooks all should produce no code at all.

929

* production configuration these hooks all should produce no code at all.

930

*/

930

*/

931

static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)

931

static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)

932

{

932

{

933

flags &= gfp_allowed_mask;

933

flags &= gfp_allowed_mask;

934

lockdep_trace_alloc(flags);

934

lockdep_trace_alloc(flags);

935

might_sleep_if(flags & __GFP_WAIT);

935

might_sleep_if(flags & __GFP_WAIT);

936

937

return should_failslab(s->objsize, flags, s->flags);

937

return should_failslab(s->objsize, flags, s->flags);

938

}

938

}

939

940

static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)

940

static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)

941

{

941

{

942

flags &= gfp_allowed_mask;

942

flags &= gfp_allowed_mask;

943

kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));

943

kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));

944

kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);

944

kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);

945

}

945

}

946

947

static inline void slab_free_hook(struct kmem_cache *s, void *x)

947

static inline void slab_free_hook(struct kmem_cache *s, void *x)

948

{

948

{

949

kmemleak_free_recursive(x, s->flags);

949

kmemleak_free_recursive(x, s->flags);

950

951

/*

951

/*

952

* Trouble is that we may no longer disable interupts in the fast path

952

* Trouble is that we may no longer disable interupts in the fast path

953

* So in order to make the debug calls that expect irqs to be

953

* So in order to make the debug calls that expect irqs to be

954

* disabled we need to disable interrupts temporarily.

954

* disabled we need to disable interrupts temporarily.

955

*/

955

*/

956

#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)

956

#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)

957

{

957

{

958

unsigned long flags;

958

unsigned long flags;

959

960

local_irq_save(flags);

960

local_irq_save(flags);

961

kmemcheck_slab_free(s, x, s->objsize);

961

kmemcheck_slab_free(s, x, s->objsize);

962

debug_check_no_locks_freed(x, s->objsize);

962

debug_check_no_locks_freed(x, s->objsize);

963

local_irq_restore(flags);

963

local_irq_restore(flags);

964

}

964

}

965

#endif

965

#endif

966

if (!(s->flags & SLAB_DEBUG_OBJECTS))

966

if (!(s->flags & SLAB_DEBUG_OBJECTS))

967

debug_check_no_obj_freed(x, s->objsize);

967

debug_check_no_obj_freed(x, s->objsize);

968

}

968

}

969

970

/*

970

/*

971

* Tracking of fully allocated slabs for debugging purposes.

971

* Tracking of fully allocated slabs for debugging purposes.

972

*

972

*

973

* list_lock must be held.

973

* list_lock must be held.

974

*/

974

*/

975

static void add_full(struct kmem_cache *s,

975

static void add_full(struct kmem_cache *s,

976

struct kmem_cache_node *n, struct page *page)

976

struct kmem_cache_node *n, struct page *page)

977

{

977

{

978

if (!(s->flags & SLAB_STORE_USER))

978

if (!(s->flags & SLAB_STORE_USER))

979

return;

979

return;

980

981

list_add(&page->lru, &n->full);

981

list_add(&page->lru, &n->full);

982

}

982

}

983

984

/*

984

/*

985

* list_lock must be held.

985

* list_lock must be held.

986

*/

986

*/

987

static void remove_full(struct kmem_cache *s, struct page *page)

987

static void remove_full(struct kmem_cache *s, struct page *page)

988

{

988

{

989

if (!(s->flags & SLAB_STORE_USER))

989

if (!(s->flags & SLAB_STORE_USER))

990

return;

990

return;

991

992

list_del(&page->lru);

992

list_del(&page->lru);

993

}

993

}

994

995

/* Tracking of the number of slabs for debugging purposes */

995

/* Tracking of the number of slabs for debugging purposes */

996

static inline unsigned long slabs_node(struct kmem_cache *s, int node)

996

static inline unsigned long slabs_node(struct kmem_cache *s, int node)

997

{

997

{

998

struct kmem_cache_node *n = get_node(s, node);

998

struct kmem_cache_node *n = get_node(s, node);

999

1000

return atomic_long_read(&n->nr_slabs);

1000

return atomic_long_read(&n->nr_slabs);

1001

}

1001

}

1002

1003

static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)

1003

static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)

1004

{

1004

{

1005

return atomic_long_read(&n->nr_slabs);

1005

return atomic_long_read(&n->nr_slabs);

1006

}

1006

}

1007

1008

static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)

1008

static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)

1009

{

1009

{

1010

struct kmem_cache_node *n = get_node(s, node);

1010

struct kmem_cache_node *n = get_node(s, node);

1011

1012

/*

1012

/*

1013

* May be called early in order to allocate a slab for the

1013

* May be called early in order to allocate a slab for the

1014

* kmem_cache_node structure. Solve the chicken-egg

1014

* kmem_cache_node structure. Solve the chicken-egg

1015

* dilemma by deferring the increment of the count during

1015

* dilemma by deferring the increment of the count during

1016

* bootstrap (see early_kmem_cache_node_alloc).

1016

* bootstrap (see early_kmem_cache_node_alloc).

1017

*/

1017

*/

1018

if (n) {

1018

if (n) {

1019

atomic_long_inc(&n->nr_slabs);

1019

atomic_long_inc(&n->nr_slabs);

1020

atomic_long_add(objects, &n->total_objects);

1020

atomic_long_add(objects, &n->total_objects);

1021

}

1021

}

1022

}

1022

}

1023

static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)

1023

static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)

1024

{

1024

{

1025

struct kmem_cache_node *n = get_node(s, node);

1025

struct kmem_cache_node *n = get_node(s, node);

1026

1027

atomic_long_dec(&n->nr_slabs);

1027

atomic_long_dec(&n->nr_slabs);

1028

atomic_long_sub(objects, &n->total_objects);

1028

atomic_long_sub(objects, &n->total_objects);

1029

}

1029

}

1030

1031

/* Object debug checks for alloc/free paths */

1031

/* Object debug checks for alloc/free paths */

1032

static void setup_object_debug(struct kmem_cache *s, struct page *page,

1032

static void setup_object_debug(struct kmem_cache *s, struct page *page,

1033

void *object)

1033

void *object)

1034

{

1034

{

1035

if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))

1035

if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))

1036

return;

1036

return;

1037

1038

init_object(s, object, SLUB_RED_INACTIVE);

1038

init_object(s, object, SLUB_RED_INACTIVE);

1039

init_tracking(s, object);

1039

init_tracking(s, object);

1040

}

1040

}

1041

1042

static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,

1042

static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,

1043

void *object, unsigned long addr)

1043

void *object, unsigned long addr)

1044

{

1044

{

1045

if (!check_slab(s, page))

1045

if (!check_slab(s, page))

1046

goto bad;

1046

goto bad;

1047

1048

if (!check_valid_pointer(s, page, object)) {

1048

if (!check_valid_pointer(s, page, object)) {

1049

object_err(s, page, object, "Freelist Pointer check fails");

1049

object_err(s, page, object, "Freelist Pointer check fails");

1050

goto bad;

1050

goto bad;

1051

}

1051

}

1052

1053

if (!check_object(s, page, object, SLUB_RED_INACTIVE))

1053

if (!check_object(s, page, object, SLUB_RED_INACTIVE))

1054

goto bad;

1054

goto bad;

1055

1056

/* Success perform special debug activities for allocs */

1056

/* Success perform special debug activities for allocs */

1057

if (s->flags & SLAB_STORE_USER)

1057

if (s->flags & SLAB_STORE_USER)

1058

set_track(s, object, TRACK_ALLOC, addr);

1058

set_track(s, object, TRACK_ALLOC, addr);

1059

trace(s, page, object, 1);

1059

trace(s, page, object, 1);

1060

init_object(s, object, SLUB_RED_ACTIVE);

1060

init_object(s, object, SLUB_RED_ACTIVE);

1061

return 1;

1061

return 1;

1062

1063

bad:

1063

bad:

1064

if (PageSlab(page)) {

1064

if (PageSlab(page)) {

1065

/*

1065

/*

1066

* If this is a slab page then lets do the best we can

1066

* If this is a slab page then lets do the best we can

1067

* to avoid issues in the future. Marking all objects

1067

* to avoid issues in the future. Marking all objects

1068

* as used avoids touching the remaining objects.

1068

* as used avoids touching the remaining objects.

1069

*/

1069

*/

1070

slab_fix(s, "Marking all objects used");

1070

slab_fix(s, "Marking all objects used");

1071

page->inuse = page->objects;

1071

page->inuse = page->objects;

1072

page->freelist = NULL;

1072

page->freelist = NULL;

1073

}

1073

}

1074

return 0;

1074

return 0;

1075

}

1075

}

1076

1077

static noinline int free_debug_processing(struct kmem_cache *s,

1077

static noinline int free_debug_processing(struct kmem_cache *s,

1078

struct page *page, void *object, unsigned long addr)

1078

struct page *page, void *object, unsigned long addr)

1079

{

1079

{

1080

unsigned long flags;

1080

unsigned long flags;

1081

int rc = 0;

1081

int rc = 0;

1082

1083

local_irq_save(flags);

1083

local_irq_save(flags);

1084

slab_lock(page);

1084

slab_lock(page);

1085

1086

if (!check_slab(s, page))

1086

if (!check_slab(s, page))

1087

goto fail;

1087

goto fail;

1088

1089

if (!check_valid_pointer(s, page, object)) {

1089

if (!check_valid_pointer(s, page, object)) {

1090

slab_err(s, page, "Invalid object pointer 0x%p", object);

1090

slab_err(s, page, "Invalid object pointer 0x%p", object);

1091

goto fail;

1091

goto fail;

1092

}

1092

}

1093

1094

if (on_freelist(s, page, object)) {

1094

if (on_freelist(s, page, object)) {

1095

object_err(s, page, object, "Object already free");

1095

object_err(s, page, object, "Object already free");

1096

goto fail;

1096

goto fail;

1097

}

1097

}

1098

1099

if (!check_object(s, page, object, SLUB_RED_ACTIVE))

1099

if (!check_object(s, page, object, SLUB_RED_ACTIVE))

1100

goto out;

1100

goto out;

1101

1102

if (unlikely(s != page->slab)) {

1102

if (unlikely(s != page->slab)) {

1103

if (!PageSlab(page)) {

1103

if (!PageSlab(page)) {

1104

slab_err(s, page, "Attempt to free object(0x%p) "

1104

slab_err(s, page, "Attempt to free object(0x%p) "

1105

"outside of slab", object);

1105

"outside of slab", object);

1106

} else if (!page->slab) {

1106

} else if (!page->slab) {

1107

printk(KERN_ERR

1107

printk(KERN_ERR

1108

"SLUB <none>: no slab for object 0x%p.\n",

1108

"SLUB <none>: no slab for object 0x%p.\n",

1109

object);

1109

object);

1110

dump_stack();

1110

dump_stack();

1111

} else

1111

} else

1112

object_err(s, page, object,

1112

object_err(s, page, object,

1113

"page slab pointer corrupt.");

1113

"page slab pointer corrupt.");

1114

goto fail;

1114

goto fail;

1115

}

1115

}

1116

1117

if (s->flags & SLAB_STORE_USER)

1117

if (s->flags & SLAB_STORE_USER)

1118

set_track(s, object, TRACK_FREE, addr);

1118

set_track(s, object, TRACK_FREE, addr);

1119

trace(s, page, object, 0);

1119

trace(s, page, object, 0);

1120

init_object(s, object, SLUB_RED_INACTIVE);

1120

init_object(s, object, SLUB_RED_INACTIVE);

1121

rc = 1;

1121

rc = 1;

1122

out:

1122

out:

1123

slab_unlock(page);

1123

slab_unlock(page);

1124

local_irq_restore(flags);

1124

local_irq_restore(flags);

1125

return rc;

1125

return rc;

1126

1127

fail:

1127

fail:

1128

slab_fix(s, "Object at 0x%p not freed", object);

1128

slab_fix(s, "Object at 0x%p not freed", object);

1129

goto out;

1129

goto out;

1130

}

1130

}

1131

1132

static int __init setup_slub_debug(char *str)

1132

static int __init setup_slub_debug(char *str)

1133

{

1133

{

1134

slub_debug = DEBUG_DEFAULT_FLAGS;

1134

slub_debug = DEBUG_DEFAULT_FLAGS;

1135

if (*str++ != '=' || !*str)

1135

if (*str++ != '=' || !*str)

1136

/*

1136

/*

1137

* No options specified. Switch on full debugging.

1137

* No options specified. Switch on full debugging.

1138

*/

1138

*/

1139

goto out;

1139

goto out;

1140

1141

if (*str == ',')

1141

if (*str == ',')

1142

/*

1142

/*

1143

* No options but restriction on slabs. This means full

1143

* No options but restriction on slabs. This means full

1144

* debugging for slabs matching a pattern.

1144

* debugging for slabs matching a pattern.

1145

*/

1145

*/

1146

goto check_slabs;

1146

goto check_slabs;

1147

1148

if (tolower(*str) == 'o') {

1148

if (tolower(*str) == 'o') {

1149

/*

1149

/*

1150

* Avoid enabling debugging on caches if its minimum order

1150

* Avoid enabling debugging on caches if its minimum order

1151

* would increase as a result.

1151

* would increase as a result.

1152

*/

1152

*/

1153

disable_higher_order_debug = 1;

1153

disable_higher_order_debug = 1;

1154

goto out;

1154

goto out;

1155

}

1155

}

1156

1157

slub_debug = 0;

1157

slub_debug = 0;

1158

if (*str == '-')

1158

if (*str == '-')

1159

/*

1159

/*

1160

* Switch off all debugging measures.

1160

* Switch off all debugging measures.

1161

*/

1161

*/

1162

goto out;

1162

goto out;

1163

1164

/*

1164

/*

1165

* Determine which debug features should be switched on

1165

* Determine which debug features should be switched on

1166

*/

1166

*/

1167

for (; *str && *str != ','; str++) {

1167

for (; *str && *str != ','; str++) {

1168

switch (tolower(*str)) {

1168

switch (tolower(*str)) {

1169

case 'f':

1169

case 'f':

1170

slub_debug |= SLAB_DEBUG_FREE;

1170

slub_debug |= SLAB_DEBUG_FREE;

1171

break;

1171

break;

1172

case 'z':

1172

case 'z':

1173

slub_debug |= SLAB_RED_ZONE;

1173

slub_debug |= SLAB_RED_ZONE;

1174

break;

1174

break;

1175

case 'p':

1175

case 'p':

1176

slub_debug |= SLAB_POISON;

1176

slub_debug |= SLAB_POISON;

1177

break;

1177

break;

1178

case 'u':

1178

case 'u':

1179

slub_debug |= SLAB_STORE_USER;

1179

slub_debug |= SLAB_STORE_USER;

1180

break;

1180

break;

1181

case 't':

1181

case 't':

1182

slub_debug |= SLAB_TRACE;

1182

slub_debug |= SLAB_TRACE;

1183

break;

1183

break;

1184

case 'a':

1184

case 'a':

1185

slub_debug |= SLAB_FAILSLAB;

1185

slub_debug |= SLAB_FAILSLAB;

1186

break;

1186

break;

1187

default:

1187

default:

1188

printk(KERN_ERR "slub_debug option '%c' "

1188

printk(KERN_ERR "slub_debug option '%c' "

1189

"unknown. skipped\n", *str);

1189

"unknown. skipped\n", *str);

1190

}

1190

}

1191

}

1191

}

1192

1193

check_slabs:

1193

check_slabs:

1194

if (*str == ',')

1194

if (*str == ',')

1195

slub_debug_slabs = str + 1;

1195

slub_debug_slabs = str + 1;

1196

out:

1196

out:

1197

return 1;

1197

return 1;

1198

}

1198

}

1199

1200

__setup("slub_debug", setup_slub_debug);

1200

__setup("slub_debug", setup_slub_debug);

1201

1202

static unsigned long kmem_cache_flags(unsigned long objsize,

1202

static unsigned long kmem_cache_flags(unsigned long objsize,

1203

unsigned long flags, const char *name,

1203

unsigned long flags, const char *name,

1204

void (*ctor)(void *))

1204

void (*ctor)(void *))

1205

{

1205

{

1206

/*

1206

/*

1207

* Enable debugging if selected on the kernel commandline.

1207

* Enable debugging if selected on the kernel commandline.

1208

*/

1208

*/

1209

if (slub_debug && (!slub_debug_slabs ||

1209

if (slub_debug && (!slub_debug_slabs ||

1210

!strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))

1210

!strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))

1211

flags |= slub_debug;

1211

flags |= slub_debug;

1212

1213

return flags;

1213

return flags;

1214

}

1214

}

1215

#else

1215

#else

1216

static inline void setup_object_debug(struct kmem_cache *s,

1216

static inline void setup_object_debug(struct kmem_cache *s,

1217

struct page *page, void *object) {}

1217

struct page *page, void *object) {}

1218

1219

static inline int alloc_debug_processing(struct kmem_cache *s,

1219

static inline int alloc_debug_processing(struct kmem_cache *s,

1220

struct page *page, void *object, unsigned long addr) { return 0; }

1220

struct page *page, void *object, unsigned long addr) { return 0; }

1221

1222

static inline int free_debug_processing(struct kmem_cache *s,

1222

static inline int free_debug_processing(struct kmem_cache *s,

1223

struct page *page, void *object, unsigned long addr) { return 0; }

1223

struct page *page, void *object, unsigned long addr) { return 0; }

1224

1225

static inline int slab_pad_check(struct kmem_cache *s, struct page *page)

1225

static inline int slab_pad_check(struct kmem_cache *s, struct page *page)

1226

{ return 1; }

1226

{ return 1; }

1227

static inline int check_object(struct kmem_cache *s, struct page *page,

1227

static inline int check_object(struct kmem_cache *s, struct page *page,

1228

void *object, u8 val) { return 1; }

1228

void *object, u8 val) { return 1; }

1229

static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,

1229

static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,

1230

struct page *page) {}

1230

struct page *page) {}

1231

static inline void remove_full(struct kmem_cache *s, struct page *page) {}

1231

static inline void remove_full(struct kmem_cache *s, struct page *page) {}

1232

static inline unsigned long kmem_cache_flags(unsigned long objsize,

1232

static inline unsigned long kmem_cache_flags(unsigned long objsize,

1233

unsigned long flags, const char *name,

1233

unsigned long flags, const char *name,

1234

void (*ctor)(void *))

1234

void (*ctor)(void *))

1235

{

1235

{

1236

return flags;

1236

return flags;

1237

}

1237

}

1238

#define slub_debug 0

1238

#define slub_debug 0

1239

1240

#define disable_higher_order_debug 0

1240

#define disable_higher_order_debug 0

1241

1242

static inline unsigned long slabs_node(struct kmem_cache *s, int node)

1242

static inline unsigned long slabs_node(struct kmem_cache *s, int node)

1243

{ return 0; }

1243

{ return 0; }

1244

static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)

1244

static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)

1245

{ return 0; }

1245

{ return 0; }

1246

static inline void inc_slabs_node(struct kmem_cache *s, int node,

1246

static inline void inc_slabs_node(struct kmem_cache *s, int node,

1247

int objects) {}

1247

int objects) {}

1248

static inline void dec_slabs_node(struct kmem_cache *s, int node,

1248

static inline void dec_slabs_node(struct kmem_cache *s, int node,

1249

int objects) {}

1249

int objects) {}

1250

1251

static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)

1251

static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)

1252

{ return 0; }

1252

{ return 0; }

1253

1254

static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,

1254

static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,

1255

void *object) {}

1255

void *object) {}

1256

1257

static inline void slab_free_hook(struct kmem_cache *s, void *x) {}

1257

static inline void slab_free_hook(struct kmem_cache *s, void *x) {}

1258

1259

#endif /* CONFIG_SLUB_DEBUG */

1259

#endif /* CONFIG_SLUB_DEBUG */

1260

1261

/*

1261

/*

1262

* Slab allocation and freeing

1262

* Slab allocation and freeing

1263

*/

1263

*/

1264

static inline struct page *alloc_slab_page(gfp_t flags, int node,

1264

static inline struct page *alloc_slab_page(gfp_t flags, int node,

1265

struct kmem_cache_order_objects oo)

1265

struct kmem_cache_order_objects oo)

1266

{

1266

{

1267

int order = oo_order(oo);

1267

int order = oo_order(oo);

1268

1269

flags |= __GFP_NOTRACK;

1269

flags |= __GFP_NOTRACK;

1270

1271

if (node == NUMA_NO_NODE)

1271

if (node == NUMA_NO_NODE)

1272

return alloc_pages(flags, order);

1272

return alloc_pages(flags, order);

1273

else

1273

else

1274

return alloc_pages_exact_node(node, flags, order);

1274

return alloc_pages_exact_node(node, flags, order);

1275

}

1275

}

1276

1277

static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)

1277

static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)

1278

{

1278

{

1279

struct page *page;

1279

struct page *page;

1280

struct kmem_cache_order_objects oo = s->oo;

1280

struct kmem_cache_order_objects oo = s->oo;

1281

gfp_t alloc_gfp;

1281

gfp_t alloc_gfp;

1282

1283

flags &= gfp_allowed_mask;

1283

flags &= gfp_allowed_mask;

1284

1285

if (flags & __GFP_WAIT)

1285

if (flags & __GFP_WAIT)

1286

local_irq_enable();

1286

local_irq_enable();

1287

1288

flags |= s->allocflags;

1288

flags |= s->allocflags;

1289

1290

/*

1290

/*

1291

* Let the initial higher-order allocation fail under memory pressure

1291

* Let the initial higher-order allocation fail under memory pressure

1292

* so we fall-back to the minimum order allocation.

1292

* so we fall-back to the minimum order allocation.

1293

*/

1293

*/

1294

alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;

1294

alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;

1295

1296

page = alloc_slab_page(alloc_gfp, node, oo);

1296

page = alloc_slab_page(alloc_gfp, node, oo);

1297

if (unlikely(!page)) {

1297

if (unlikely(!page)) {

1298

oo = s->min;

1298

oo = s->min;

1299

/*

1299

/*

1300

* Allocation may have failed due to fragmentation.

1300

* Allocation may have failed due to fragmentation.

1301

* Try a lower order alloc if possible

1301

* Try a lower order alloc if possible

1302

*/

1302

*/

1303

page = alloc_slab_page(flags, node, oo);

1303

page = alloc_slab_page(flags, node, oo);

1304

1305

if (page)

1305

if (page)

1306

stat(s, ORDER_FALLBACK);

1306

stat(s, ORDER_FALLBACK);

1307

}

1307

}

1308

1309

if (flags & __GFP_WAIT)

1309

if (flags & __GFP_WAIT)

1310

local_irq_disable();

1310

local_irq_disable();

1311

1312

if (!page)

1312

if (!page)

1313

return NULL;

1313

return NULL;

1314

1315

if (kmemcheck_enabled

1315

if (kmemcheck_enabled

1316

&& !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {

1316

&& !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {

1317

int pages = 1 << oo_order(oo);

1317

int pages = 1 << oo_order(oo);

1318

1319

kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);

1319

kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);

1320

1321

/*

1321

/*

1322

* Objects from caches that have a constructor don't get

1322

* Objects from caches that have a constructor don't get

1323

* cleared when they're allocated, so we need to do it here.

1323

* cleared when they're allocated, so we need to do it here.

1324

*/

1324

*/

1325

if (s->ctor)

1325

if (s->ctor)

1326

kmemcheck_mark_uninitialized_pages(page, pages);

1326

kmemcheck_mark_uninitialized_pages(page, pages);

1327

else

1327

else

1328

kmemcheck_mark_unallocated_pages(page, pages);

1328

kmemcheck_mark_unallocated_pages(page, pages);

1329

}

1329

}

1330

1331

page->objects = oo_objects(oo);

1331

page->objects = oo_objects(oo);

1332

mod_zone_page_state(page_zone(page),

1332

mod_zone_page_state(page_zone(page),

1333

(s->flags & SLAB_RECLAIM_ACCOUNT) ?

1333

(s->flags & SLAB_RECLAIM_ACCOUNT) ?

1334

NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,

1334

NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,

1335

1 << oo_order(oo));

1335

1 << oo_order(oo));

1336

1337

return page;

1337

return page;

1338

}

1338

}

1339

1340

static void setup_object(struct kmem_cache *s, struct page *page,

1340

static void setup_object(struct kmem_cache *s, struct page *page,

1341

void *object)

1341

void *object)

1342

{

1342

{

1343

setup_object_debug(s, page, object);

1343

setup_object_debug(s, page, object);

1344

if (unlikely(s->ctor))

1344

if (unlikely(s->ctor))

1345

s->ctor(object);

1345

s->ctor(object);

1346

}

1346

}

1347

1348

static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)

1348

static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)

1349

{

1349

{

1350

struct page *page;

1350

struct page *page;

1351

void *start;

1351

void *start;

1352

void *last;

1352

void *last;

1353

void *p;

1353

void *p;

1354

1355

BUG_ON(flags & GFP_SLAB_BUG_MASK);

1355

BUG_ON(flags & GFP_SLAB_BUG_MASK);

1356

1357

page = allocate_slab(s,

1357

page = allocate_slab(s,

1358

flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);

1358

flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);

1359

if (!page)

1359

if (!page)

1360

goto out;

1360

goto out;

1361

1362

inc_slabs_node(s, page_to_nid(page), page->objects);

1362

inc_slabs_node(s, page_to_nid(page), page->objects);

1363

page->slab = s;

1363

page->slab = s;

1364

page->flags |= 1 << PG_slab;

1364

page->flags |= 1 << PG_slab;

1365

1366

start = page_address(page);

1366

start = page_address(page);

1367

1368

if (unlikely(s->flags & SLAB_POISON))

1368

if (unlikely(s->flags & SLAB_POISON))

1369

memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));

1369

memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));

1370

1371

last = start;

1371

last = start;

1372

for_each_object(p, s, start, page->objects) {

1372

for_each_object(p, s, start, page->objects) {

1373

setup_object(s, page, last);

1373

setup_object(s, page, last);

1374

set_freepointer(s, last, p);

1374

set_freepointer(s, last, p);

1375

last = p;

1375

last = p;

1376

}

1376

}

1377

setup_object(s, page, last);

1377

setup_object(s, page, last);

1378

set_freepointer(s, last, NULL);

1378

set_freepointer(s, last, NULL);

1379

1380

page->freelist = start;

1380

page->freelist = start;

1381

page->inuse = page->objects;

1381

page->inuse = page->objects;

1382

page->frozen = 1;

1382

page->frozen = 1;

1383

out:

1383

out:

1384

return page;

1384

return page;

1385

}

1385

}

1386

1387

static void __free_slab(struct kmem_cache *s, struct page *page)

1387

static void __free_slab(struct kmem_cache *s, struct page *page)

1388

{

1388

{

1389

int order = compound_order(page);

1389

int order = compound_order(page);

1390

int pages = 1 << order;

1390

int pages = 1 << order;

1391

1392

if (kmem_cache_debug(s)) {

1392

if (kmem_cache_debug(s)) {

1393

void *p;

1393

void *p;

1394

1395

slab_pad_check(s, page);

1395

slab_pad_check(s, page);

1396

for_each_object(p, s, page_address(page),

1396

for_each_object(p, s, page_address(page),

1397

page->objects)

1397

page->objects)

1398

check_object(s, page, p, SLUB_RED_INACTIVE);

1398

check_object(s, page, p, SLUB_RED_INACTIVE);

1399

}

1399

}

1400

1401

kmemcheck_free_shadow(page, compound_order(page));

1401

kmemcheck_free_shadow(page, compound_order(page));

1402

1403

mod_zone_page_state(page_zone(page),

1403

mod_zone_page_state(page_zone(page),

1404

(s->flags & SLAB_RECLAIM_ACCOUNT) ?

1404

(s->flags & SLAB_RECLAIM_ACCOUNT) ?

1405

NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,

1405

NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,

1406

-pages);

1406

-pages);

1407

1408

__ClearPageSlab(page);

1408

__ClearPageSlab(page);

1409

reset_page_mapcount(page);

1409

reset_page_mapcount(page);

1410

if (current->reclaim_state)

1410

if (current->reclaim_state)

1411

current->reclaim_state->reclaimed_slab += pages;

1411

current->reclaim_state->reclaimed_slab += pages;

1412

__free_pages(page, order);

1412

__free_pages(page, order);

1413

}

1413

}

1414

1415

#define need_reserve_slab_rcu \

1415

#define need_reserve_slab_rcu \

1416

(sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))

1416

(sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))

1417

1418

static void rcu_free_slab(struct rcu_head *h)

1418

static void rcu_free_slab(struct rcu_head *h)

1419

{

1419

{

1420

struct page *page;

1420

struct page *page;

1421

1422

if (need_reserve_slab_rcu)

1422

if (need_reserve_slab_rcu)

1423

page = virt_to_head_page(h);

1423

page = virt_to_head_page(h);

1424

else

1424

else

1425

page = container_of((struct list_head *)h, struct page, lru);

1425

page = container_of((struct list_head *)h, struct page, lru);

1426

1427

__free_slab(page->slab, page);

1427

__free_slab(page->slab, page);

1428

}

1428

}

1429

1430

static void free_slab(struct kmem_cache *s, struct page *page)

1430

static void free_slab(struct kmem_cache *s, struct page *page)

1431

{

1431

{

1432

if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {

1432

if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {

1433

struct rcu_head *head;

1433

struct rcu_head *head;

1434

1435

if (need_reserve_slab_rcu) {

1435

if (need_reserve_slab_rcu) {

1436

int order = compound_order(page);

1436

int order = compound_order(page);

1437

int offset = (PAGE_SIZE << order) - s->reserved;

1437

int offset = (PAGE_SIZE << order) - s->reserved;

1438

1439

VM_BUG_ON(s->reserved != sizeof(*head));

1439

VM_BUG_ON(s->reserved != sizeof(*head));

1440

head = page_address(page) + offset;

1440

head = page_address(page) + offset;

1441

} else {

1441

} else {

1442

/*

1442

/*

1443

* RCU free overloads the RCU head over the LRU

1443

* RCU free overloads the RCU head over the LRU

1444

*/

1444

*/

1445

head = (void *)&page->lru;

1445

head = (void *)&page->lru;

1446

}

1446

}

1447

1448

call_rcu(head, rcu_free_slab);

1448

call_rcu(head, rcu_free_slab);

1449

} else

1449

} else

1450

__free_slab(s, page);

1450

__free_slab(s, page);

1451

}

1451

}

1452

1453

static void discard_slab(struct kmem_cache *s, struct page *page)

1453

static void discard_slab(struct kmem_cache *s, struct page *page)

1454

{

1454

{

1455

dec_slabs_node(s, page_to_nid(page), page->objects);

1455

dec_slabs_node(s, page_to_nid(page), page->objects);

1456

free_slab(s, page);

1456

free_slab(s, page);

1457

}

1457

}

1458

1459

/*

1459

/*

1460

* Management of partially allocated slabs.

1460

* Management of partially allocated slabs.

1461

*

1461

*

1462

* list_lock must be held.

1462

* list_lock must be held.

1463

*/

1463

*/

1464

static inline void add_partial(struct kmem_cache_node *n,

1464

static inline void add_partial(struct kmem_cache_node *n,

1465

struct page *page, int tail)

1465

struct page *page, int tail)

1466

{

1466

{

1467

n->nr_partial++;

1467

n->nr_partial++;

1468

if (tail == DEACTIVATE_TO_TAIL)

1468

if (tail == DEACTIVATE_TO_TAIL)

1469

list_add_tail(&page->lru, &n->partial);

1469

list_add_tail(&page->lru, &n->partial);

1470

else

1470

else

1471

list_add(&page->lru, &n->partial);

1471

list_add(&page->lru, &n->partial);

1472

}

1472

}

1473

1474

/*

1474

/*

1475

* list_lock must be held.

1475

* list_lock must be held.

1476

*/

1476

*/

1477

static inline void remove_partial(struct kmem_cache_node *n,

1477

static inline void remove_partial(struct kmem_cache_node *n,

1478

struct page *page)

1478

struct page *page)

1479

{

1479

{

1480

list_del(&page->lru);

1480

list_del(&page->lru);

1481

n->nr_partial--;

1481

n->nr_partial--;

1482

}

1482

}

1483

1484

/*

1484

/*

1485

* Lock slab, remove from the partial list and put the object into the

1485

* Lock slab, remove from the partial list and put the object into the

1486

* per cpu freelist.

1486

* per cpu freelist.

1487

*

1487

*

1488

* Returns a list of objects or NULL if it fails.

1488

* Returns a list of objects or NULL if it fails.

1489

*

1489

*

1490

* Must hold list_lock.

1490

* Must hold list_lock.

1491

*/

1491

*/

1492

static inline void *acquire_slab(struct kmem_cache *s,

1492

static inline void *acquire_slab(struct kmem_cache *s,

1493

struct kmem_cache_node *n, struct page *page,

1493

struct kmem_cache_node *n, struct page *page,

1494

int mode)

1494

int mode)

1495

{

1495

{

1496

void *freelist;

1496

void *freelist;

1497

unsigned long counters;

1497

unsigned long counters;

1498

struct page new;

1498

struct page new;

1499

1500

/*

1500

/*

1501

* Zap the freelist and set the frozen bit.

1501

* Zap the freelist and set the frozen bit.

1502

* The old freelist is the list of objects for the

1502

* The old freelist is the list of objects for the

1503

* per cpu allocation list.

1503

* per cpu allocation list.

1504

*/

1504

*/

1505

do {

1505

do {

1506

freelist = page->freelist;

1506

freelist = page->freelist;

1507

counters = page->counters;

1507

counters = page->counters;

1508

new.counters = counters;

1508

new.counters = counters;

1509

if (mode)

1509

if (mode)

1510

new.inuse = page->objects;

1510

new.inuse = page->objects;

1511

1512

VM_BUG_ON(new.frozen);

1512

VM_BUG_ON(new.frozen);

1513

new.frozen = 1;

1513

new.frozen = 1;

1514

1515

} while (!__cmpxchg_double_slab(s, page,

1515

} while (!__cmpxchg_double_slab(s, page,

1516

freelist, counters,

1516

freelist, counters,

1517

NULL, new.counters,

1517

NULL, new.counters,

1518

"lock and freeze"));

1518

"lock and freeze"));

1519

1520

remove_partial(n, page);

1520

remove_partial(n, page);

1521

return freelist;

1521

return freelist;

1522

}

1522

}

1523

1524

static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);

1524

static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);

1525

1526

/*

1526

/*

1527

* Try to allocate a partial slab from a specific node.

1527

* Try to allocate a partial slab from a specific node.

1528

*/

1528

*/

1529

static void *get_partial_node(struct kmem_cache *s,

1529

static void *get_partial_node(struct kmem_cache *s,

1530

struct kmem_cache_node *n, struct kmem_cache_cpu *c)

1530

struct kmem_cache_node *n, struct kmem_cache_cpu *c)

1531

{

1531

{

1532

struct page *page, *page2;

1532

struct page *page, *page2;

1533

void *object = NULL;

1533

void *object = NULL;

1534

1535

/*

1535

/*

1536

* Racy check. If we mistakenly see no partial slabs then we

1536

* Racy check. If we mistakenly see no partial slabs then we

1537

* just allocate an empty slab. If we mistakenly try to get a

1537

* just allocate an empty slab. If we mistakenly try to get a

1538

* partial slab and there is none available then get_partials()

1538

* partial slab and there is none available then get_partials()

1539

* will return NULL.

1539

* will return NULL.

1540

*/

1540

*/

1541

if (!n || !n->nr_partial)

1541

if (!n || !n->nr_partial)

1542

return NULL;

1542

return NULL;

1543

1544

spin_lock(&n->list_lock);

1544

spin_lock(&n->list_lock);

1545

list_for_each_entry_safe(page, page2, &n->partial, lru) {

1545

list_for_each_entry_safe(page, page2, &n->partial, lru) {

1546

void *t = acquire_slab(s, n, page, object == NULL);

1546

void *t = acquire_slab(s, n, page, object == NULL);

1547

int available;

1547

int available;

1548

1549

if (!t)

1549

if (!t)

1550

break;

1550

break;

1551

1552

if (!object) {

1552

if (!object) {

1553

c->page = page;

1553

c->page = page;

1554

c->node = page_to_nid(page);

1554

c->node = page_to_nid(page);

1555

stat(s, ALLOC_FROM_PARTIAL);

1555

stat(s, ALLOC_FROM_PARTIAL);

1556

object = t;

1556

object = t;

1557

available = page->objects - page->inuse;

1557

available = page->objects - page->inuse;

1558

} else {

1558

} else {

1559

page->freelist = t;

1559

page->freelist = t;

1560

available = put_cpu_partial(s, page, 0);

1560

available = put_cpu_partial(s, page, 0);

1561

}

1561

}

1562

if (kmem_cache_debug(s) || available > s->cpu_partial / 2)

1562

if (kmem_cache_debug(s) || available > s->cpu_partial / 2)

1563

break;

1563

break;

1564

1565

}

1565

}

1566

spin_unlock(&n->list_lock);

1566

spin_unlock(&n->list_lock);

1567

return object;

1567

return object;

1568

}

1568

}

1569

1570

/*

1570

/*

1571

* Get a page from somewhere. Search in increasing NUMA distances.

1571

* Get a page from somewhere. Search in increasing NUMA distances.

1572

*/

1572

*/

1573

static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,

1573

static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,

1574

struct kmem_cache_cpu *c)

1574

struct kmem_cache_cpu *c)

1575

{

1575

{

1576

#ifdef CONFIG_NUMA

1576

#ifdef CONFIG_NUMA

1577

struct zonelist *zonelist;

1577

struct zonelist *zonelist;

1578

struct zoneref *z;

1578

struct zoneref *z;

1579

struct zone *zone;

1579

struct zone *zone;

1580

enum zone_type high_zoneidx = gfp_zone(flags);

1580

enum zone_type high_zoneidx = gfp_zone(flags);

1581

void *object;

1581

void *object;

1582

1583

/*

1583

/*

1584

* The defrag ratio allows a configuration of the tradeoffs between

1584

* The defrag ratio allows a configuration of the tradeoffs between

1585

* inter node defragmentation and node local allocations. A lower

1585

* inter node defragmentation and node local allocations. A lower

1586

* defrag_ratio increases the tendency to do local allocations

1586

* defrag_ratio increases the tendency to do local allocations

1587

* instead of attempting to obtain partial slabs from other nodes.

1587

* instead of attempting to obtain partial slabs from other nodes.

1588

*

1588

*

1589

* If the defrag_ratio is set to 0 then kmalloc() always

1589

* If the defrag_ratio is set to 0 then kmalloc() always

1590

* returns node local objects. If the ratio is higher then kmalloc()

1590

* returns node local objects. If the ratio is higher then kmalloc()

1591

* may return off node objects because partial slabs are obtained

1591

* may return off node objects because partial slabs are obtained

1592

* from other nodes and filled up.

1592

* from other nodes and filled up.

1593

*

1593

*

1594

* If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes

1594

* If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes

1595

* defrag_ratio = 1000) then every (well almost) allocation will

1595

* defrag_ratio = 1000) then every (well almost) allocation will

1596

* first attempt to defrag slab caches on other nodes. This means

1596

* first attempt to defrag slab caches on other nodes. This means

1597

* scanning over all nodes to look for partial slabs which may be

1597

* scanning over all nodes to look for partial slabs which may be

1598

* expensive if we do it every time we are trying to find a slab

1598

* expensive if we do it every time we are trying to find a slab

1599

* with available objects.

1599

* with available objects.

1600

*/

1600

*/

1601

if (!s->remote_node_defrag_ratio ||

1601

if (!s->remote_node_defrag_ratio ||

1602

get_cycles() % 1024 > s->remote_node_defrag_ratio)

1602

get_cycles() % 1024 > s->remote_node_defrag_ratio)

1603

return NULL;

1603

return NULL;

1604

1605

get_mems_allowed();

1605

get_mems_allowed();

1606

zonelist = node_zonelist(slab_node(current->mempolicy), flags);

1606

zonelist = node_zonelist(slab_node(current->mempolicy), flags);

1607

for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {

1607

for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {

1608

struct kmem_cache_node *n;

1608

struct kmem_cache_node *n;

1609

1610

n = get_node(s, zone_to_nid(zone));

1610

n = get_node(s, zone_to_nid(zone));

1611

1612

if (n && cpuset_zone_allowed_hardwall(zone, flags) &&

1612

if (n && cpuset_zone_allowed_hardwall(zone, flags) &&

1613

n->nr_partial > s->min_partial) {

1613

n->nr_partial > s->min_partial) {

1614

object = get_partial_node(s, n, c);

1614

object = get_partial_node(s, n, c);

1615

if (object) {

1615

if (object) {

1616

put_mems_allowed();

1616

put_mems_allowed();

1617

return object;

1617

return object;

1618

}

1618

}

1619

}

1619

}

1620

}

1620

}

1621

put_mems_allowed();

1621

put_mems_allowed();

1622

#endif

1622

#endif

1623

return NULL;

1623

return NULL;

1624

}

1624

}

1625

1626

/*

1626

/*

1627

* Get a partial page, lock it and return it.

1627

* Get a partial page, lock it and return it.

1628

*/

1628

*/

1629

static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,

1629

static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,

1630

struct kmem_cache_cpu *c)

1630

struct kmem_cache_cpu *c)

1631

{

1631

{

1632

void *object;

1632

void *object;

1633

int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;

1633

int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;

1634

1635

object = get_partial_node(s, get_node(s, searchnode), c);

1635

object = get_partial_node(s, get_node(s, searchnode), c);

1636

if (object || node != NUMA_NO_NODE)

1636

if (object || node != NUMA_NO_NODE)

1637

return object;

1637

return object;

1638

1639

return get_any_partial(s, flags, c);

1639

return get_any_partial(s, flags, c);

1640

}

1640

}

1641

1642

#ifdef CONFIG_PREEMPT

1642

#ifdef CONFIG_PREEMPT

1643

/*

1643

/*

1644

* Calculate the next globally unique transaction for disambiguiation

1644

* Calculate the next globally unique transaction for disambiguiation

1645

* during cmpxchg. The transactions start with the cpu number and are then

1645

* during cmpxchg. The transactions start with the cpu number and are then

1646

* incremented by CONFIG_NR_CPUS.

1646

* incremented by CONFIG_NR_CPUS.

1647

*/

1647

*/

1648

#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)

1648

#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)

1649

#else

1649

#else

1650

/*

1650

/*

1651

* No preemption supported therefore also no need to check for

1651

* No preemption supported therefore also no need to check for

1652

* different cpus.

1652

* different cpus.

1653

*/

1653

*/

1654

#define TID_STEP 1

1654

#define TID_STEP 1

1655

#endif

1655

#endif

1656

1657

static inline unsigned long next_tid(unsigned long tid)

1657

static inline unsigned long next_tid(unsigned long tid)

1658

{

1658

{

1659

return tid + TID_STEP;

1659

return tid + TID_STEP;

1660

}

1660

}

1661

1662

static inline unsigned int tid_to_cpu(unsigned long tid)

1662

static inline unsigned int tid_to_cpu(unsigned long tid)

1663

{

1663

{

1664

return tid % TID_STEP;

1664

return tid % TID_STEP;

1665

}

1665

}

1666

1667

static inline unsigned long tid_to_event(unsigned long tid)

1667

static inline unsigned long tid_to_event(unsigned long tid)

1668

{

1668

{

1669

return tid / TID_STEP;

1669

return tid / TID_STEP;

1670

}

1670

}

1671

1672

static inline unsigned int init_tid(int cpu)

1672

static inline unsigned int init_tid(int cpu)

1673

{

1673

{

1674

return cpu;

1674

return cpu;

1675

}

1675

}

1676

1677

static inline void note_cmpxchg_failure(const char *n,

1677

static inline void note_cmpxchg_failure(const char *n,

1678

const struct kmem_cache *s, unsigned long tid)

1678

const struct kmem_cache *s, unsigned long tid)

1679

{

1679

{

1680

#ifdef SLUB_DEBUG_CMPXCHG

1680

#ifdef SLUB_DEBUG_CMPXCHG

1681

unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);

1681

unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);

1682

1683

printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);

1683

printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);

1684

1685

#ifdef CONFIG_PREEMPT

1685

#ifdef CONFIG_PREEMPT

1686

if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))

1686

if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))

1687

printk("due to cpu change %d -> %d\n",

1687

printk("due to cpu change %d -> %d\n",

1688

tid_to_cpu(tid), tid_to_cpu(actual_tid));

1688

tid_to_cpu(tid), tid_to_cpu(actual_tid));

1689

else

1689

else

1690

#endif

1690

#endif

1691

if (tid_to_event(tid) != tid_to_event(actual_tid))

1691

if (tid_to_event(tid) != tid_to_event(actual_tid))

1692

printk("due to cpu running other code. Event %ld->%ld\n",

1692

printk("due to cpu running other code. Event %ld->%ld\n",

1693

tid_to_event(tid), tid_to_event(actual_tid));

1693

tid_to_event(tid), tid_to_event(actual_tid));

1694

else

1694

else

1695

printk("for unknown reason: actual=%lx was=%lx target=%lx\n",

1695

printk("for unknown reason: actual=%lx was=%lx target=%lx\n",

1696

actual_tid, tid, next_tid(tid));

1696

actual_tid, tid, next_tid(tid));

1697

#endif

1697

#endif

1698

stat(s, CMPXCHG_DOUBLE_CPU_FAIL);

1698

stat(s, CMPXCHG_DOUBLE_CPU_FAIL);

1699

}

1699

}

1700

1701

void init_kmem_cache_cpus(struct kmem_cache *s)

1701

void init_kmem_cache_cpus(struct kmem_cache *s)

1702

{

1702

{

1703

int cpu;

1703

int cpu;

1704

1705

for_each_possible_cpu(cpu)

1705

for_each_possible_cpu(cpu)

1706

per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);

1706

per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);

1707

}

1707

}

1708

1709

/*

1709

/*

1710

* Remove the cpu slab

1710

* Remove the cpu slab

1711

*/

1711

*/

1712

static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)

1712

static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)

1713

{

1713

{

1714

enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };

1714

enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };

1715

struct page *page = c->page;

1715

struct page *page = c->page;

1716

struct kmem_cache_node *n = get_node(s, page_to_nid(page));

1716

struct kmem_cache_node *n = get_node(s, page_to_nid(page));

1717

int lock = 0;

1717

int lock = 0;

1718

enum slab_modes l = M_NONE, m = M_NONE;

1718

enum slab_modes l = M_NONE, m = M_NONE;

1719

void *freelist;

1719

void *freelist;

1720

void *nextfree;

1720

void *nextfree;

1721

int tail = DEACTIVATE_TO_HEAD;

1721

int tail = DEACTIVATE_TO_HEAD;

1722

struct page new;

1722

struct page new;

1723

struct page old;

1723

struct page old;

1724

1725

if (page->freelist) {

1725

if (page->freelist) {

1726

stat(s, DEACTIVATE_REMOTE_FREES);

1726

stat(s, DEACTIVATE_REMOTE_FREES);

1727

tail = DEACTIVATE_TO_TAIL;

1727

tail = DEACTIVATE_TO_TAIL;

1728

}

1728

}

1729

1730

c->tid = next_tid(c->tid);

1730

c->tid = next_tid(c->tid);

1731

c->page = NULL;

1731

c->page = NULL;

1732

freelist = c->freelist;

1732

freelist = c->freelist;

1733

c->freelist = NULL;

1733

c->freelist = NULL;

1734

1735

/*

1735

/*

1736

* Stage one: Free all available per cpu objects back

1736

* Stage one: Free all available per cpu objects back

1737

* to the page freelist while it is still frozen. Leave the

1737

* to the page freelist while it is still frozen. Leave the

1738

* last one.

1738

* last one.

1739

*

1739

*

1740

* There is no need to take the list->lock because the page

1740

* There is no need to take the list->lock because the page

1741

* is still frozen.

1741

* is still frozen.

1742

*/

1742

*/

1743

while (freelist && (nextfree = get_freepointer(s, freelist))) {

1743

while (freelist && (nextfree = get_freepointer(s, freelist))) {

1744

void *prior;

1744

void *prior;

1745

unsigned long counters;

1745

unsigned long counters;

1746

1747

do {

1747

do {

1748

prior = page->freelist;

1748

prior = page->freelist;

1749

counters = page->counters;

1749

counters = page->counters;

1750

set_freepointer(s, freelist, prior);

1750

set_freepointer(s, freelist, prior);

1751

new.counters = counters;

1751

new.counters = counters;

1752

new.inuse--;

1752

new.inuse--;

1753

VM_BUG_ON(!new.frozen);

1753

VM_BUG_ON(!new.frozen);

1754

1755

} while (!__cmpxchg_double_slab(s, page,

1755

} while (!__cmpxchg_double_slab(s, page,

1756

prior, counters,

1756

prior, counters,

1757

freelist, new.counters,

1757

freelist, new.counters,

1758

"drain percpu freelist"));

1758

"drain percpu freelist"));

1759

1760

freelist = nextfree;

1760

freelist = nextfree;

1761

}

1761

}

1762

1763

/*

1763

/*

1764

* Stage two: Ensure that the page is unfrozen while the

1764

* Stage two: Ensure that the page is unfrozen while the

1765

* list presence reflects the actual number of objects

1765

* list presence reflects the actual number of objects

1766

* during unfreeze.

1766

* during unfreeze.

1767

*

1767

*

1768

* We setup the list membership and then perform a cmpxchg

1768

* We setup the list membership and then perform a cmpxchg

1769

* with the count. If there is a mismatch then the page

1769

* with the count. If there is a mismatch then the page

1770

* is not unfrozen but the page is on the wrong list.

1770

* is not unfrozen but the page is on the wrong list.

1771

*

1771

*

1772

* Then we restart the process which may have to remove

1772

* Then we restart the process which may have to remove

1773

* the page from the list that we just put it on again

1773

* the page from the list that we just put it on again

1774

* because the number of objects in the slab may have

1774

* because the number of objects in the slab may have

1775

* changed.

1775

* changed.

1776

*/

1776

*/

1777

redo:

1777

redo:

1778

1779

old.freelist = page->freelist;

1779

old.freelist = page->freelist;

1780

old.counters = page->counters;

1780

old.counters = page->counters;

1781

VM_BUG_ON(!old.frozen);

1781

VM_BUG_ON(!old.frozen);

1782

1783

/* Determine target state of the slab */

1783

/* Determine target state of the slab */

1784

new.counters = old.counters;

1784

new.counters = old.counters;

1785

if (freelist) {

1785

if (freelist) {

1786

new.inuse--;

1786

new.inuse--;

1787

set_freepointer(s, freelist, old.freelist);

1787

set_freepointer(s, freelist, old.freelist);

1788

new.freelist = freelist;

1788

new.freelist = freelist;

1789

} else

1789

} else

1790

new.freelist = old.freelist;

1790

new.freelist = old.freelist;

1791

1792

new.frozen = 0;

1792

new.frozen = 0;

1793

1794

if (!new.inuse && n->nr_partial > s->min_partial)

1794

if (!new.inuse && n->nr_partial > s->min_partial)

1795

m = M_FREE;

1795

m = M_FREE;

1796

else if (new.freelist) {

1796

else if (new.freelist) {

1797

m = M_PARTIAL;

1797

m = M_PARTIAL;

1798

if (!lock) {

1798

if (!lock) {

1799

lock = 1;

1799

lock = 1;

1800

/*

1800

/*

1801

* Taking the spinlock removes the possiblity

1801

* Taking the spinlock removes the possiblity

1802

* that acquire_slab() will see a slab page that

1802

* that acquire_slab() will see a slab page that

1803

* is frozen

1803

* is frozen

1804

*/

1804

*/

1805

spin_lock(&n->list_lock);

1805

spin_lock(&n->list_lock);

1806

}

1806

}

1807

} else {

1807

} else {

1808

m = M_FULL;

1808

m = M_FULL;

1809

if (kmem_cache_debug(s) && !lock) {

1809

if (kmem_cache_debug(s) && !lock) {

1810

lock = 1;

1810

lock = 1;

1811

/*

1811

/*

1812

* This also ensures that the scanning of full

1812

* This also ensures that the scanning of full

1813

* slabs from diagnostic functions will not see

1813

* slabs from diagnostic functions will not see

1814

* any frozen slabs.

1814

* any frozen slabs.

1815

*/

1815

*/

1816

spin_lock(&n->list_lock);

1816

spin_lock(&n->list_lock);

1817

}

1817

}

1818

}

1818

}

1819

1820

if (l != m) {

1820

if (l != m) {

1821

1822

if (l == M_PARTIAL)

1822

if (l == M_PARTIAL)

1823

1824

remove_partial(n, page);

1824

remove_partial(n, page);

1825

1826

else if (l == M_FULL)

1826

else if (l == M_FULL)

1827

1828

remove_full(s, page);

1828

remove_full(s, page);

1829

1830

if (m == M_PARTIAL) {

1830

if (m == M_PARTIAL) {

1831

1832

add_partial(n, page, tail);

1832

add_partial(n, page, tail);

1833

stat(s, tail);

1833

stat(s, tail);

1834

1835

} else if (m == M_FULL) {

1835

} else if (m == M_FULL) {

1836

1837

stat(s, DEACTIVATE_FULL);

1837

stat(s, DEACTIVATE_FULL);

1838

add_full(s, n, page);

1838

add_full(s, n, page);

1839

1840

}

1840

}

1841

}

1841

}

1842

1843

l = m;

1843

l = m;

1844

if (!__cmpxchg_double_slab(s, page,

1844

if (!__cmpxchg_double_slab(s, page,

1845

old.freelist, old.counters,

1845

old.freelist, old.counters,

1846

new.freelist, new.counters,

1846

new.freelist, new.counters,

1847

"unfreezing slab"))

1847

"unfreezing slab"))

1848

goto redo;

1848

goto redo;

1849

1850

if (lock)

1850

if (lock)

1851

spin_unlock(&n->list_lock);

1851

spin_unlock(&n->list_lock);

1852

1853

if (m == M_FREE) {

1853

if (m == M_FREE) {

1854

stat(s, DEACTIVATE_EMPTY);

1854

stat(s, DEACTIVATE_EMPTY);

1855

discard_slab(s, page);

1855

discard_slab(s, page);

1856

stat(s, FREE_SLAB);

1856

stat(s, FREE_SLAB);

1857

}

1857

}

1858

}

1858

}

1859

1860

/* Unfreeze all the cpu partial slabs */

1860

/* Unfreeze all the cpu partial slabs */

1861

static void unfreeze_partials(struct kmem_cache *s)

1861

static void unfreeze_partials(struct kmem_cache *s)

1862

{

1862

{

1863

struct kmem_cache_node *n = NULL;

1863

struct kmem_cache_node *n = NULL;

1864

struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);

1864

struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);

1865

struct page *page, *discard_page = NULL;

1865

struct page *page, *discard_page = NULL;

1866

1867

while ((page = c->partial)) {

1867

while ((page = c->partial)) {

1868

enum slab_modes { M_PARTIAL, M_FREE };

1868

enum slab_modes { M_PARTIAL, M_FREE };

1869

enum slab_modes l, m;

1869

enum slab_modes l, m;

1870

struct page new;

1870

struct page new;

1871

struct page old;

1871

struct page old;

1872

1873

c->partial = page->next;

1873

c->partial = page->next;

1874

l = M_FREE;

1874

l = M_FREE;

1875

1876

do {

1876

do {

1877

1878

old.freelist = page->freelist;

1878

old.freelist = page->freelist;

1879

old.counters = page->counters;

1879

old.counters = page->counters;

1880

VM_BUG_ON(!old.frozen);

1880

VM_BUG_ON(!old.frozen);

1881

1882

new.counters = old.counters;

1882

new.counters = old.counters;

1883

new.freelist = old.freelist;

1883

new.freelist = old.freelist;

1884

1885

new.frozen = 0;

1885

new.frozen = 0;

1886

1887

if (!new.inuse && (!n || n->nr_partial > s->min_partial))

1887

if (!new.inuse && (!n || n->nr_partial > s->min_partial))

1888

m = M_FREE;

1888

m = M_FREE;

1889

else {

1889

else {

1890

struct kmem_cache_node *n2 = get_node(s,

1890

struct kmem_cache_node *n2 = get_node(s,

1891

page_to_nid(page));

1891

page_to_nid(page));

1892

1893

m = M_PARTIAL;

1893

m = M_PARTIAL;

1894

if (n != n2) {

1894

if (n != n2) {

1895

if (n)

1895

if (n)

1896

spin_unlock(&n->list_lock);

1896

spin_unlock(&n->list_lock);

1897

1898

n = n2;

1898

n = n2;

1899

spin_lock(&n->list_lock);

1899

spin_lock(&n->list_lock);

1900

}

1900

}

1901

}

1901

}

1902

1903

if (l != m) {

1903

if (l != m) {

1904

if (l == M_PARTIAL)

1904

if (l == M_PARTIAL)

1905

remove_partial(n, page);

1905

remove_partial(n, page);

1906

else

1906

else

1907

add_partial(n, page,

1907

add_partial(n, page,

1908

DEACTIVATE_TO_TAIL);

1908

DEACTIVATE_TO_TAIL);

1909

1910

l = m;

1910

l = m;

1911

}

1911

}

1912

1913

} while (!cmpxchg_double_slab(s, page,

1913

} while (!cmpxchg_double_slab(s, page,

1914

old.freelist, old.counters,

1914

old.freelist, old.counters,

1915

new.freelist, new.counters,

1915

new.freelist, new.counters,

1916

"unfreezing slab"));

1916

"unfreezing slab"));

1917

1918

if (m == M_FREE) {

1918

if (m == M_FREE) {

1919

page->next = discard_page;

1919

page->next = discard_page;

1920

discard_page = page;

1920

discard_page = page;

1921

}

1921

}

1922

}

1922

}

1923

1924

if (n)

1924

if (n)

1925

spin_unlock(&n->list_lock);

1925

spin_unlock(&n->list_lock);

1926

1927

while (discard_page) {

1927

while (discard_page) {

1928

page = discard_page;

1928

page = discard_page;

1929

discard_page = discard_page->next;

1929

discard_page = discard_page->next;

1930

1931

stat(s, DEACTIVATE_EMPTY);

1931

stat(s, DEACTIVATE_EMPTY);

1932

discard_slab(s, page);

1932

discard_slab(s, page);

1933

stat(s, FREE_SLAB);

1933

stat(s, FREE_SLAB);

1934

}

1934

}

1935

}

1935

}

1936

1937

/*

1937

/*

1938

* Put a page that was just frozen (in __slab_free) into a partial page

1938

* Put a page that was just frozen (in __slab_free) into a partial page

1939

* slot if available. This is done without interrupts disabled and without

1939

* slot if available. This is done without interrupts disabled and without

1940

* preemption disabled. The cmpxchg is racy and may put the partial page

1940

* preemption disabled. The cmpxchg is racy and may put the partial page

1941

* onto a random cpus partial slot.

1941

* onto a random cpus partial slot.

1942

*

1942

*

1943

* If we did not find a slot then simply move all the partials to the

1943

* If we did not find a slot then simply move all the partials to the

1944

* per node partial list.

1944

* per node partial list.

1945

*/

1945

*/

1946

int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)

1946

int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)

1947

{

1947

{

1948

struct page *oldpage;

1948

struct page *oldpage;

1949

int pages;

1949

int pages;

1950

int pobjects;

1950

int pobjects;

1951

1952

do {

1952

do {

1953

pages = 0;

1953

pages = 0;

1954

pobjects = 0;

1954

pobjects = 0;

1955

oldpage = this_cpu_read(s->cpu_slab->partial);

1955

oldpage = this_cpu_read(s->cpu_slab->partial);

1956

1957

if (oldpage) {

1957

if (oldpage) {

1958

pobjects = oldpage->pobjects;

1958

pobjects = oldpage->pobjects;

1959

pages = oldpage->pages;

1959

pages = oldpage->pages;

1960

if (drain && pobjects > s->cpu_partial) {

1960

if (drain && pobjects > s->cpu_partial) {

1961

unsigned long flags;

1961

unsigned long flags;

1962

/*

1962

/*

1963

* partial array is full. Move the existing

1963

* partial array is full. Move the existing

1964

* set to the per node partial list.

1964

* set to the per node partial list.

1965

*/

1965

*/

1966

local_irq_save(flags);

1966

local_irq_save(flags);

1967

unfreeze_partials(s);

1967

unfreeze_partials(s);

1968

local_irq_restore(flags);

1968

local_irq_restore(flags);

1969

pobjects = 0;

1969

pobjects = 0;

1970

pages = 0;

1970

pages = 0;

1971

}

1971

}

1972

}

1972

}

1973

1974

pages++;

1974

pages++;

1975

pobjects += page->objects - page->inuse;

1975

pobjects += page->objects - page->inuse;

1976

1977

page->pages = pages;

1977

page->pages = pages;

1978

page->pobjects = pobjects;

1978

page->pobjects = pobjects;

1979

page->next = oldpage;

1979

page->next = oldpage;

1980

1981

} while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);

1981

} while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);

1982

stat(s, CPU_PARTIAL_FREE);

1982

stat(s, CPU_PARTIAL_FREE);

1983

return pobjects;

1983

return pobjects;

1984

}

1984

}

1985

1986

static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)

1986

static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)

1987

{

1987

{

1988

stat(s, CPUSLAB_FLUSH);

1988

stat(s, CPUSLAB_FLUSH);

1989

deactivate_slab(s, c);

1989

deactivate_slab(s, c);

1990

}

1990

}

1991

1992

/*

1992

/*

1993

* Flush cpu slab.

1993

* Flush cpu slab.

1994

*

1994

*

1995

* Called from IPI handler with interrupts disabled.

1995

* Called from IPI handler with interrupts disabled.

1996

*/

1996

*/

1997

static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)

1997

static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)

1998

{

1998

{

1999

struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);

1999

struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);

2000

2001

if (likely(c)) {

2001

if (likely(c)) {

2002

if (c->page)

2002

if (c->page)

2003

flush_slab(s, c);

2003

flush_slab(s, c);

2004

2005

unfreeze_partials(s);

2005

unfreeze_partials(s);

2006

}

2006

}

2007

}

2007

}

2008

2009

static void flush_cpu_slab(void *d)

2009

static void flush_cpu_slab(void *d)

2010

{

2010

{

2011

struct kmem_cache *s = d;

2011

struct kmem_cache *s = d;

2012

2013

__flush_cpu_slab(s, smp_processor_id());

2013

__flush_cpu_slab(s, smp_processor_id());

2014

}

2014

}

2015

2016

static void flush_all(struct kmem_cache *s)

2016

static void flush_all(struct kmem_cache *s)

2017

{

2017

{

2018

on_each_cpu(flush_cpu_slab, s, 1);

2018

on_each_cpu(flush_cpu_slab, s, 1);

2019

}

2019

}

2020

2021

/*

2021

/*

2022

* Check if the objects in a per cpu structure fit numa

2022

* Check if the objects in a per cpu structure fit numa

2023

* locality expectations.

2023

* locality expectations.

2024

*/

2024

*/

2025

static inline int node_match(struct kmem_cache_cpu *c, int node)

2025

static inline int node_match(struct kmem_cache_cpu *c, int node)

2026

{

2026

{

2027

#ifdef CONFIG_NUMA

2027

#ifdef CONFIG_NUMA

2028

if (node != NUMA_NO_NODE && c->node != node)

2028

if (node != NUMA_NO_NODE && c->node != node)

2029

return 0;

2029

return 0;

2030

#endif

2030

#endif

2031

return 1;

2031

return 1;

2032

}

2032

}

2033

2034

static int count_free(struct page *page)

2034

static int count_free(struct page *page)

2035

{

2035

{

2036

return page->objects - page->inuse;

2036

return page->objects - page->inuse;

2037

}

2037

}

2038

2039

static unsigned long count_partial(struct kmem_cache_node *n,

2039

static unsigned long count_partial(struct kmem_cache_node *n,

2040

int (*get_count)(struct page *))

2040

int (*get_count)(struct page *))

2041

{

2041

{

2042

unsigned long flags;

2042

unsigned long flags;

2043

unsigned long x = 0;

2043

unsigned long x = 0;

2044

struct page *page;

2044

struct page *page;

2045

2046

spin_lock_irqsave(&n->list_lock, flags);

2046

spin_lock_irqsave(&n->list_lock, flags);

2047

list_for_each_entry(page, &n->partial, lru)

2047

list_for_each_entry(page, &n->partial, lru)

2048

x += get_count(page);

2048

x += get_count(page);

2049

spin_unlock_irqrestore(&n->list_lock, flags);

2049

spin_unlock_irqrestore(&n->list_lock, flags);

2050

return x;

2050

return x;

2051

}

2051

}

2052

2053

static inline unsigned long node_nr_objs(struct kmem_cache_node *n)

2053

static inline unsigned long node_nr_objs(struct kmem_cache_node *n)

2054

{

2054

{

2055

#ifdef CONFIG_SLUB_DEBUG

2055

#ifdef CONFIG_SLUB_DEBUG

2056

return atomic_long_read(&n->total_objects);

2056

return atomic_long_read(&n->total_objects);

2057

#else

2057

#else

2058

return 0;

2058

return 0;

2059

#endif

2059

#endif

2060

}

2060

}

2061

2062

static noinline void

2062

static noinline void

2063

slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)

2063

slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)

2064

{

2064

{

2065

int node;

2065

int node;

2066

2067

printk(KERN_WARNING

2067

printk(KERN_WARNING

2068

"SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",

2068

"SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",

2069

nid, gfpflags);

2069

nid, gfpflags);

2070

printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "

2070

printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "

2071

"default order: %d, min order: %d\n", s->name, s->objsize,

2071

"default order: %d, min order: %d\n", s->name, s->objsize,

2072

s->size, oo_order(s->oo), oo_order(s->min));

2072

s->size, oo_order(s->oo), oo_order(s->min));

2073

2074

if (oo_order(s->min) > get_order(s->objsize))

2074

if (oo_order(s->min) > get_order(s->objsize))

2075

printk(KERN_WARNING " %s debugging increased min order, use "

2075

printk(KERN_WARNING " %s debugging increased min order, use "

2076

"slub_debug=O to disable.\n", s->name);

2076

"slub_debug=O to disable.\n", s->name);

2077

2078

for_each_online_node(node) {

2078

for_each_online_node(node) {

2079

struct kmem_cache_node *n = get_node(s, node);

2079

struct kmem_cache_node *n = get_node(s, node);

2080

unsigned long nr_slabs;

2080

unsigned long nr_slabs;

2081

unsigned long nr_objs;

2081

unsigned long nr_objs;

2082

unsigned long nr_free;

2082

unsigned long nr_free;

2083

2084

if (!n)

2084

if (!n)

2085

continue;

2085

continue;

2086

2087

nr_free = count_partial(n, count_free);

2087

nr_free = count_partial(n, count_free);

2088

nr_slabs = node_nr_slabs(n);

2088

nr_slabs = node_nr_slabs(n);

2089

nr_objs = node_nr_objs(n);

2089

nr_objs = node_nr_objs(n);

2090

2091

printk(KERN_WARNING

2091

printk(KERN_WARNING

2092

" node %d: slabs: %ld, objs: %ld, free: %ld\n",

2092

" node %d: slabs: %ld, objs: %ld, free: %ld\n",

2093

node, nr_slabs, nr_objs, nr_free);

2093

node, nr_slabs, nr_objs, nr_free);

2094

}

2094

}

2095

}

2095

}

2096

2097

static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,

2097

static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,

2098

int node, struct kmem_cache_cpu **pc)

2098

int node, struct kmem_cache_cpu **pc)

2099

{

2099

{

2100

void *object;

2100

void *object;

2101

struct kmem_cache_cpu *c;

2101

struct kmem_cache_cpu *c;

2102

struct page *page = new_slab(s, flags, node);

2102

struct page *page = new_slab(s, flags, node);

2103

2104

if (page) {

2104

if (page) {

2105

c = __this_cpu_ptr(s->cpu_slab);

2105

c = __this_cpu_ptr(s->cpu_slab);

2106

if (c->page)

2106

if (c->page)

2107

flush_slab(s, c);

2107

flush_slab(s, c);

2108

2109

/*

2109

/*

2110

* No other reference to the page yet so we can

2110

* No other reference to the page yet so we can

2111

* muck around with it freely without cmpxchg

2111

* muck around with it freely without cmpxchg

2112

*/

2112

*/

2113

object = page->freelist;

2113

object = page->freelist;

2114

page->freelist = NULL;

2114

page->freelist = NULL;

2115

2116

stat(s, ALLOC_SLAB);

2116

stat(s, ALLOC_SLAB);

2117

c->node = page_to_nid(page);

2117

c->node = page_to_nid(page);

2118

c->page = page;

2118

c->page = page;

2119

*pc = c;

2119

*pc = c;

2120

} else

2120

} else

2121

object = NULL;

2121

object = NULL;

2122

2123

return object;

2123

return object;

2124

}

2124

}

2125

2126

/*

2126

/*

2127

* Slow path. The lockless freelist is empty or we need to perform

2127

* Slow path. The lockless freelist is empty or we need to perform

2128

* debugging duties.

2128

* debugging duties.

2129

*

2129

*

2130

* Processing is still very fast if new objects have been freed to the

2130

* Processing is still very fast if new objects have been freed to the

2131

* regular freelist. In that case we simply take over the regular freelist

2131

* regular freelist. In that case we simply take over the regular freelist

2132

* as the lockless freelist and zap the regular freelist.

2132

* as the lockless freelist and zap the regular freelist.

2133

*

2133

*

2134

* If that is not working then we fall back to the partial lists. We take the

2134

* If that is not working then we fall back to the partial lists. We take the

2135

* first element of the freelist as the object to allocate now and move the

2135

* first element of the freelist as the object to allocate now and move the

2136

* rest of the freelist to the lockless freelist.

2136

* rest of the freelist to the lockless freelist.

2137

*

2137

*

2138

* And if we were unable to get a new slab from the partial slab lists then

2138

* And if we were unable to get a new slab from the partial slab lists then

2139

* we need to allocate a new slab. This is the slowest path since it involves

2139

* we need to allocate a new slab. This is the slowest path since it involves

2140

* a call to the page allocator and the setup of a new slab.

2140

* a call to the page allocator and the setup of a new slab.

2141

*/

2141

*/

2142

static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,

2142

static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,

2143

unsigned long addr, struct kmem_cache_cpu *c)

2143

unsigned long addr, struct kmem_cache_cpu *c)

2144

{

2144

{

2145

void **object;

2145

void **object;

2146

unsigned long flags;

2146

unsigned long flags;

2147

struct page new;

2147

struct page new;

2148

unsigned long counters;

2148

unsigned long counters;

2149

2150

local_irq_save(flags);

2150

local_irq_save(flags);

2151

#ifdef CONFIG_PREEMPT

2151

#ifdef CONFIG_PREEMPT

2152

/*

2152

/*

2153

* We may have been preempted and rescheduled on a different

2153

* We may have been preempted and rescheduled on a different

2154

* cpu before disabling interrupts. Need to reload cpu area

2154

* cpu before disabling interrupts. Need to reload cpu area

2155

* pointer.

2155

* pointer.

2156

*/

2156

*/

2157

c = this_cpu_ptr(s->cpu_slab);

2157

c = this_cpu_ptr(s->cpu_slab);

2158

#endif

2158

#endif

2159

2160

if (!c->page)

2160

if (!c->page)

2161

goto new_slab;

2161

goto new_slab;

2162

redo:

2162

redo:

2163

if (unlikely(!node_match(c, node))) {

2163

if (unlikely(!node_match(c, node))) {

2164

stat(s, ALLOC_NODE_MISMATCH);

2164

stat(s, ALLOC_NODE_MISMATCH);

2165

deactivate_slab(s, c);

2165

deactivate_slab(s, c);

2166

goto new_slab;

2166

goto new_slab;

2167

}

2167

}

2168

2169

/* must check again c->freelist in case of cpu migration or IRQ */

2169

/* must check again c->freelist in case of cpu migration or IRQ */

2170

object = c->freelist;

2170

object = c->freelist;

2171

if (object)

2171

if (object)

2172

goto load_freelist;

2172

goto load_freelist;

2173

2174

stat(s, ALLOC_SLOWPATH);

2174

stat(s, ALLOC_SLOWPATH);

2175

2176

do {

2176

do {

2177

object = c->page->freelist;

2177

object = c->page->freelist;

2178

counters = c->page->counters;

2178

counters = c->page->counters;

2179

new.counters = counters;

2179

new.counters = counters;

2180

VM_BUG_ON(!new.frozen);

2180

VM_BUG_ON(!new.frozen);

2181

2182

/*

2182

/*

2183

* If there is no object left then we use this loop to

2183

* If there is no object left then we use this loop to

2184

* deactivate the slab which is simple since no objects

2184

* deactivate the slab which is simple since no objects

2185

* are left in the slab and therefore we do not need to

2185

* are left in the slab and therefore we do not need to

2186

* put the page back onto the partial list.

2186

* put the page back onto the partial list.

2187

*

2187

*

2188

* If there are objects left then we retrieve them

2188

* If there are objects left then we retrieve them

2189

* and use them to refill the per cpu queue.

2189

* and use them to refill the per cpu queue.

2190

*/

2190

*/

2191

2192

new.inuse = c->page->objects;

2192

new.inuse = c->page->objects;

2193

new.frozen = object != NULL;

2193

new.frozen = object != NULL;

2194

2195

} while (!__cmpxchg_double_slab(s, c->page,

2195

} while (!__cmpxchg_double_slab(s, c->page,

2196

object, counters,

2196

object, counters,

2197

NULL, new.counters,

2197

NULL, new.counters,

2198

"__slab_alloc"));

2198

"__slab_alloc"));

2199

2200

if (!object) {

2200

if (!object) {

2201

c->page = NULL;

2201

c->page = NULL;

2202

stat(s, DEACTIVATE_BYPASS);

2202

stat(s, DEACTIVATE_BYPASS);

2203

goto new_slab;

2203

goto new_slab;

2204

}

2204

}

2205

2206

stat(s, ALLOC_REFILL);

2206

stat(s, ALLOC_REFILL);

2207

2208

load_freelist:

2208

load_freelist:

2209

c->freelist = get_freepointer(s, object);

2209

c->freelist = get_freepointer(s, object);

2210

c->tid = next_tid(c->tid);

2210

c->tid = next_tid(c->tid);

2211

local_irq_restore(flags);

2211

local_irq_restore(flags);

2212

return object;

2212

return object;

2213

2214

new_slab:

2214

new_slab:

2215

2216

if (c->partial) {

2216

if (c->partial) {

2217

c->page = c->partial;

2217

c->page = c->partial;

2218

c->partial = c->page->next;

2218

c->partial = c->page->next;

2219

c->node = page_to_nid(c->page);

2219

c->node = page_to_nid(c->page);

2220

stat(s, CPU_PARTIAL_ALLOC);

2220

stat(s, CPU_PARTIAL_ALLOC);

2221

c->freelist = NULL;

2221

c->freelist = NULL;

2222

goto redo;

2222

goto redo;

2223

}

2223

}

2224

2225

/* Then do expensive stuff like retrieving pages from the partial lists */

2225

/* Then do expensive stuff like retrieving pages from the partial lists */

2226

object = get_partial(s, gfpflags, node, c);

2226

object = get_partial(s, gfpflags, node, c);

2227

2228

if (unlikely(!object)) {

2228

if (unlikely(!object)) {

2229

2230

object = new_slab_objects(s, gfpflags, node, &c);

2230

object = new_slab_objects(s, gfpflags, node, &c);

2231

2232

if (unlikely(!object)) {

2232

if (unlikely(!object)) {

2233

if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())

2233

if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())

2234

slab_out_of_memory(s, gfpflags, node);

2234

slab_out_of_memory(s, gfpflags, node);

2235

2236

local_irq_restore(flags);

2236

local_irq_restore(flags);

2237

return NULL;

2237

return NULL;

2238

}

2238

}

2239

}

2239

}

2240

2241

if (likely(!kmem_cache_debug(s)))

2241

if (likely(!kmem_cache_debug(s)))

2242

goto load_freelist;

2242

goto load_freelist;

2243

2244

/* Only entered in the debug case */

2244

/* Only entered in the debug case */

2245

if (!alloc_debug_processing(s, c->page, object, addr))

2245

if (!alloc_debug_processing(s, c->page, object, addr))

2246

goto new_slab; /* Slab failed checks. Next slab needed */

2246

goto new_slab; /* Slab failed checks. Next slab needed */

2247

2248

c->freelist = get_freepointer(s, object);

2248

c->freelist = get_freepointer(s, object);

2249

deactivate_slab(s, c);

2249

deactivate_slab(s, c);

2250

c->node = NUMA_NO_NODE;

2250

c->node = NUMA_NO_NODE;

2251

local_irq_restore(flags);

2251

local_irq_restore(flags);

2252

return object;

2252

return object;

2253

}

2253

}

2254

2255

/*

2255

/*

2256

* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)

2256

* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)

2257

* have the fastpath folded into their functions. So no function call

2257

* have the fastpath folded into their functions. So no function call

2258

* overhead for requests that can be satisfied on the fastpath.

2258

* overhead for requests that can be satisfied on the fastpath.

2259

*

2259

*

2260

* The fastpath works by first checking if the lockless freelist can be used.

2260

* The fastpath works by first checking if the lockless freelist can be used.

2261

* If not then __slab_alloc is called for slow processing.

2261

* If not then __slab_alloc is called for slow processing.

2262

*

2262

*

2263

* Otherwise we can simply pick the next object from the lockless free list.

2263

* Otherwise we can simply pick the next object from the lockless free list.

2264

*/

2264

*/

2265

static __always_inline void *slab_alloc(struct kmem_cache *s,

2265

static __always_inline void *slab_alloc(struct kmem_cache *s,

2266

gfp_t gfpflags, int node, unsigned long addr)

2266

gfp_t gfpflags, int node, unsigned long addr)

2267

{

2267

{

2268

void **object;

2268

void **object;

2269

struct kmem_cache_cpu *c;

2269

struct kmem_cache_cpu *c;

2270

unsigned long tid;

2270

unsigned long tid;

2271

2272

if (slab_pre_alloc_hook(s, gfpflags))

2272

if (slab_pre_alloc_hook(s, gfpflags))

2273

return NULL;

2273

return NULL;

2274

2275

redo:

2275

redo:

2276

2277

/*

2277

/*

2278

* Must read kmem_cache cpu data via this cpu ptr. Preemption is

2278

* Must read kmem_cache cpu data via this cpu ptr. Preemption is

2279

* enabled. We may switch back and forth between cpus while

2279

* enabled. We may switch back and forth between cpus while

2280

* reading from one cpu area. That does not matter as long

2280

* reading from one cpu area. That does not matter as long

2281

* as we end up on the original cpu again when doing the cmpxchg.

2281

* as we end up on the original cpu again when doing the cmpxchg.

2282

*/

2282

*/

2283

c = __this_cpu_ptr(s->cpu_slab);

2283

c = __this_cpu_ptr(s->cpu_slab);

2284

2285

/*

2285

/*

2286

* The transaction ids are globally unique per cpu and per operation on

2286

* The transaction ids are globally unique per cpu and per operation on

2287

* a per cpu queue. Thus they can be guarantee that the cmpxchg_double

2287

* a per cpu queue. Thus they can be guarantee that the cmpxchg_double

2288

* occurs on the right processor and that there was no operation on the

2288

* occurs on the right processor and that there was no operation on the

2289

* linked list in between.

2289

* linked list in between.

2290

*/

2290

*/

2291

tid = c->tid;

2291

tid = c->tid;

2292

barrier();

2292

barrier();

2293

2294

object = c->freelist;

2294

object = c->freelist;

2295

if (unlikely(!object || !node_match(c, node)))

2295

if (unlikely(!object || !node_match(c, node)))

2296

2297

object = __slab_alloc(s, gfpflags, node, addr, c);

2297

object = __slab_alloc(s, gfpflags, node, addr, c);

2298

2299

else {

2299

else {

2300

/*

2300

/*

2301

* The cmpxchg will only match if there was no additional

2301

* The cmpxchg will only match if there was no additional

2302

* operation and if we are on the right processor.

2302

* operation and if we are on the right processor.

2303

*

2303

*

2304

* The cmpxchg does the following atomically (without lock semantics!)

2304

* The cmpxchg does the following atomically (without lock semantics!)

2305

* 1. Relocate first pointer to the current per cpu area.

2305

* 1. Relocate first pointer to the current per cpu area.

2306

* 2. Verify that tid and freelist have not been changed

2306

* 2. Verify that tid and freelist have not been changed

2307

* 3. If they were not changed replace tid and freelist

2307

* 3. If they were not changed replace tid and freelist

2308

*

2308

*

2309

* Since this is without lock semantics the protection is only against

2309

* Since this is without lock semantics the protection is only against

2310

* code executing on this cpu *not* from access by other cpus.

2310

* code executing on this cpu *not* from access by other cpus.

2311

*/

2311

*/

2312

if (unlikely(!irqsafe_cpu_cmpxchg_double(

2312

if (unlikely(!irqsafe_cpu_cmpxchg_double(

2313

s->cpu_slab->freelist, s->cpu_slab->tid,

2313

s->cpu_slab->freelist, s->cpu_slab->tid,

2314

object, tid,

2314

object, tid,

2315

get_freepointer_safe(s, object), next_tid(tid)))) {

2315

get_freepointer_safe(s, object), next_tid(tid)))) {

2316

2317

note_cmpxchg_failure("slab_alloc", s, tid);

2317

note_cmpxchg_failure("slab_alloc", s, tid);

2318

goto redo;

2318

goto redo;

2319

}

2319

}

2320

stat(s, ALLOC_FASTPATH);

2320

stat(s, ALLOC_FASTPATH);

2321

}

2321

}

2322

2323

if (unlikely(gfpflags & __GFP_ZERO) && object)

2323

if (unlikely(gfpflags & __GFP_ZERO) && object)

2324

memset(object, 0, s->objsize);

2324

memset(object, 0, s->objsize);

2325

2326

slab_post_alloc_hook(s, gfpflags, object);

2326

slab_post_alloc_hook(s, gfpflags, object);

2327

2328

return object;

2328

return object;

2329

}

2329

}

2330

2331

void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)

2331

void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)

2332

{

2332

{

2333

void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);

2333

void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);

2334

2335

trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);

2335

trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);

2336

2337

return ret;

2337

return ret;

2338

}

2338

}

2339

EXPORT_SYMBOL(kmem_cache_alloc);

2339

EXPORT_SYMBOL(kmem_cache_alloc);

2340

2341

#ifdef CONFIG_TRACING

2341

#ifdef CONFIG_TRACING

2342

void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)

2342

void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)

2343

{

2343

{

2344

void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);

2344

void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);

2345

trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);

2345

trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);

2346

return ret;

2346

return ret;

2347

}

2347

}

2348

EXPORT_SYMBOL(kmem_cache_alloc_trace);

2348

EXPORT_SYMBOL(kmem_cache_alloc_trace);

2349

2350

void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)

2350

void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)

2351

{

2351

{

2352

void *ret = kmalloc_order(size, flags, order);

2352

void *ret = kmalloc_order(size, flags, order);

2353

trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);

2353

trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);

2354

return ret;

2354

return ret;

2355

}

2355

}

2356

EXPORT_SYMBOL(kmalloc_order_trace);

2356

EXPORT_SYMBOL(kmalloc_order_trace);

2357

#endif

2357

#endif

2358

2359

#ifdef CONFIG_NUMA

2359

#ifdef CONFIG_NUMA

2360

void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)

2360

void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)

2361

{

2361

{

2362

void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);

2362

void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);

2363

2364

trace_kmem_cache_alloc_node(_RET_IP_, ret,

2364

trace_kmem_cache_alloc_node(_RET_IP_, ret,

2365

s->objsize, s->size, gfpflags, node);

2365

s->objsize, s->size, gfpflags, node);

2366

2367

return ret;

2367

return ret;

2368

}

2368

}

2369

EXPORT_SYMBOL(kmem_cache_alloc_node);

2369

EXPORT_SYMBOL(kmem_cache_alloc_node);

2370

2371

#ifdef CONFIG_TRACING

2371

#ifdef CONFIG_TRACING

2372

void *kmem_cache_alloc_node_trace(struct kmem_cache *s,

2372

void *kmem_cache_alloc_node_trace(struct kmem_cache *s,

2373

gfp_t gfpflags,

2373

gfp_t gfpflags,

2374

int node, size_t size)

2374

int node, size_t size)

2375

{

2375

{

2376

void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);

2376

void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);

2377

2378

trace_kmalloc_node(_RET_IP_, ret,

2378

trace_kmalloc_node(_RET_IP_, ret,

2379

size, s->size, gfpflags, node);

2379

size, s->size, gfpflags, node);

2380

return ret;

2380

return ret;

2381

}

2381

}

2382

EXPORT_SYMBOL(kmem_cache_alloc_node_trace);

2382

EXPORT_SYMBOL(kmem_cache_alloc_node_trace);

2383

#endif

2383

#endif

2384

#endif

2384

#endif

2385

2386

/*

2386

/*

2387

* Slow patch handling. This may still be called frequently since objects

2387

* Slow patch handling. This may still be called frequently since objects

2388

* have a longer lifetime than the cpu slabs in most processing loads.

2388

* have a longer lifetime than the cpu slabs in most processing loads.

2389

*

2389

*

2390

* So we still attempt to reduce cache line usage. Just take the slab

2390

* So we still attempt to reduce cache line usage. Just take the slab

2391

* lock and free the item. If there is no additional partial page

2391

* lock and free the item. If there is no additional partial page

2392

* handling required then we can return immediately.

2392

* handling required then we can return immediately.

2393

*/

2393

*/

2394

static void __slab_free(struct kmem_cache *s, struct page *page,

2394

static void __slab_free(struct kmem_cache *s, struct page *page,

2395

void *x, unsigned long addr)

2395

void *x, unsigned long addr)

2396

{

2396

{

2397

void *prior;

2397

void *prior;

2398

void **object = (void *)x;

2398

void **object = (void *)x;

2399

int was_frozen;

2399

int was_frozen;

2400

int inuse;

2400

int inuse;

2401

struct page new;

2401

struct page new;

2402

unsigned long counters;

2402

unsigned long counters;

2403

struct kmem_cache_node *n = NULL;

2403

struct kmem_cache_node *n = NULL;

2404

unsigned long uninitialized_var(flags);

2404

unsigned long uninitialized_var(flags);

2405

2406

stat(s, FREE_SLOWPATH);

2406

stat(s, FREE_SLOWPATH);

2407

2408

if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))

2408

if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))

2409

return;

2409

return;

2410

2411

do {

2411

do {

2412

prior = page->freelist;

2412

prior = page->freelist;

2413

counters = page->counters;

2413

counters = page->counters;

2414

set_freepointer(s, object, prior);

2414

set_freepointer(s, object, prior);

2415

new.counters = counters;

2415

new.counters = counters;

2416

was_frozen = new.frozen;

2416

was_frozen = new.frozen;

2417

new.inuse--;

2417

new.inuse--;

2418

if ((!new.inuse || !prior) && !was_frozen && !n) {

2418

if ((!new.inuse || !prior) && !was_frozen && !n) {

2419

2420

if (!kmem_cache_debug(s) && !prior)

2420

if (!kmem_cache_debug(s) && !prior)

2421

2422

/*

2422

/*

2423

* Slab was on no list before and will be partially empty

2423

* Slab was on no list before and will be partially empty

2424

* We can defer the list move and instead freeze it.

2424

* We can defer the list move and instead freeze it.

2425

*/

2425

*/

2426

new.frozen = 1;

2426

new.frozen = 1;

2427

2428

else { /* Needs to be taken off a list */

2428

else { /* Needs to be taken off a list */

2429

2430

n = get_node(s, page_to_nid(page));

2430

n = get_node(s, page_to_nid(page));

2431

/*

2431

/*

2432

* Speculatively acquire the list_lock.

2432

* Speculatively acquire the list_lock.

2433

* If the cmpxchg does not succeed then we may

2433

* If the cmpxchg does not succeed then we may

2434

* drop the list_lock without any processing.

2434

* drop the list_lock without any processing.

2435

*

2435

*

2436

* Otherwise the list_lock will synchronize with

2436

* Otherwise the list_lock will synchronize with

2437

* other processors updating the list of slabs.

2437

* other processors updating the list of slabs.

2438

*/

2438

*/

2439

spin_lock_irqsave(&n->list_lock, flags);

2439

spin_lock_irqsave(&n->list_lock, flags);

2440

2441

}

2441

}

2442

}

2442

}

2443

inuse = new.inuse;

2443

inuse = new.inuse;

2444

2445

} while (!cmpxchg_double_slab(s, page,

2445

} while (!cmpxchg_double_slab(s, page,

2446

prior, counters,

2446

prior, counters,

2447

object, new.counters,

2447

object, new.counters,

2448

"__slab_free"));

2448

"__slab_free"));

2449

2450

if (likely(!n)) {

2450

if (likely(!n)) {

2451

2452

/*

2452

/*

2453

* If we just froze the page then put it onto the

2453

* If we just froze the page then put it onto the

2454

* per cpu partial list.

2454

* per cpu partial list.

2455

*/

2455

*/

2456

if (new.frozen && !was_frozen)

2456

if (new.frozen && !was_frozen)

2457

put_cpu_partial(s, page, 1);

2457

put_cpu_partial(s, page, 1);

2458

2459

/*

2459

/*

2460

* The list lock was not taken therefore no list

2460

* The list lock was not taken therefore no list

2461

* activity can be necessary.

2461

* activity can be necessary.

2462

*/

2462

*/

2463

if (was_frozen)

2463

if (was_frozen)

2464

stat(s, FREE_FROZEN);

2464

stat(s, FREE_FROZEN);

2465

return;

2465

return;

2466

}

2466

}

2467

2468

/*

2468

/*

2469

* was_frozen may have been set after we acquired the list_lock in

2469

* was_frozen may have been set after we acquired the list_lock in

2470

* an earlier loop. So we need to check it here again.

2470

* an earlier loop. So we need to check it here again.

2471

*/

2471

*/

2472

if (was_frozen)

2472

if (was_frozen)

2473

stat(s, FREE_FROZEN);

2473

stat(s, FREE_FROZEN);

2474

else {

2474

else {

2475

if (unlikely(!inuse && n->nr_partial > s->min_partial))

2475

if (unlikely(!inuse && n->nr_partial > s->min_partial))

2476

goto slab_empty;

2476

goto slab_empty;

2477

2478

/*

2478

/*

2479

* Objects left in the slab. If it was not on the partial list before

2479

* Objects left in the slab. If it was not on the partial list before

2480

* then add it.

2480

* then add it.

2481

*/

2481

*/

2482

if (unlikely(!prior)) {

2482

if (unlikely(!prior)) {

2483

remove_full(s, page);

2483

remove_full(s, page);

2484

add_partial(n, page, DEACTIVATE_TO_TAIL);

2484

add_partial(n, page, DEACTIVATE_TO_TAIL);

2485

stat(s, FREE_ADD_PARTIAL);

2485

stat(s, FREE_ADD_PARTIAL);

2486

}

2486

}

2487

}

2487

}

2488

spin_unlock_irqrestore(&n->list_lock, flags);

2488

spin_unlock_irqrestore(&n->list_lock, flags);

2489

return;

2489

return;

2490

2491

slab_empty:

2491

slab_empty:

2492

if (prior) {

2492

if (prior) {

2493

/*

2493

/*

2494

* Slab on the partial list.

2494

* Slab on the partial list.

2495

*/

2495

*/

2496

remove_partial(n, page);

2496

remove_partial(n, page);

2497

stat(s, FREE_REMOVE_PARTIAL);

2497

stat(s, FREE_REMOVE_PARTIAL);

2498

} else

2498

} else

2499

/* Slab must be on the full list */

2499

/* Slab must be on the full list */

2500

remove_full(s, page);

2500

remove_full(s, page);

2501

2502

spin_unlock_irqrestore(&n->list_lock, flags);

2502

spin_unlock_irqrestore(&n->list_lock, flags);

2503

stat(s, FREE_SLAB);

2503

stat(s, FREE_SLAB);

2504

discard_slab(s, page);

2504

discard_slab(s, page);

2505

}

2505

}

2506

2507

/*

2507

/*

2508

* Fastpath with forced inlining to produce a kfree and kmem_cache_free that

2508

* Fastpath with forced inlining to produce a kfree and kmem_cache_free that

2509

* can perform fastpath freeing without additional function calls.

2509

* can perform fastpath freeing without additional function calls.

2510

*

2510

*

2511

* The fastpath is only possible if we are freeing to the current cpu slab

2511

* The fastpath is only possible if we are freeing to the current cpu slab

2512

* of this processor. This typically the case if we have just allocated

2512

* of this processor. This typically the case if we have just allocated

2513

* the item before.

2513

* the item before.

2514

*

2514

*

2515

* If fastpath is not possible then fall back to __slab_free where we deal

2515

* If fastpath is not possible then fall back to __slab_free where we deal

2516

* with all sorts of special processing.

2516

* with all sorts of special processing.

2517

*/

2517

*/

2518

static __always_inline void slab_free(struct kmem_cache *s,

2518

static __always_inline void slab_free(struct kmem_cache *s,

2519

struct page *page, void *x, unsigned long addr)

2519

struct page *page, void *x, unsigned long addr)

2520

{

2520

{

2521

void **object = (void *)x;

2521

void **object = (void *)x;

2522

struct kmem_cache_cpu *c;

2522

struct kmem_cache_cpu *c;

2523

unsigned long tid;

2523

unsigned long tid;

2524

2525

slab_free_hook(s, x);

2525

slab_free_hook(s, x);

2526

2527

redo:

2527

redo:

2528

/*

2528

/*

2529

* Determine the currently cpus per cpu slab.

2529

* Determine the currently cpus per cpu slab.

2530

* The cpu may change afterward. However that does not matter since

2530

* The cpu may change afterward. However that does not matter since

2531

* data is retrieved via this pointer. If we are on the same cpu

2531

* data is retrieved via this pointer. If we are on the same cpu

2532

* during the cmpxchg then the free will succedd.

2532

* during the cmpxchg then the free will succedd.

2533

*/

2533

*/

2534

c = __this_cpu_ptr(s->cpu_slab);

2534

c = __this_cpu_ptr(s->cpu_slab);

2535

2536

tid = c->tid;

2536

tid = c->tid;

2537

barrier();

2537

barrier();

2538

2539

if (likely(page == c->page)) {

2539

if (likely(page == c->page)) {

2540

set_freepointer(s, object, c->freelist);

2540

set_freepointer(s, object, c->freelist);

2541

2542

if (unlikely(!irqsafe_cpu_cmpxchg_double(

2542

if (unlikely(!irqsafe_cpu_cmpxchg_double(

2543

s->cpu_slab->freelist, s->cpu_slab->tid,

2543

s->cpu_slab->freelist, s->cpu_slab->tid,

2544

c->freelist, tid,

2544

c->freelist, tid,

2545

object, next_tid(tid)))) {

2545

object, next_tid(tid)))) {

2546

2547

note_cmpxchg_failure("slab_free", s, tid);

2547

note_cmpxchg_failure("slab_free", s, tid);

2548

goto redo;

2548

goto redo;

2549

}

2549

}

2550

stat(s, FREE_FASTPATH);

2550

stat(s, FREE_FASTPATH);

2551

} else

2551

} else

2552

__slab_free(s, page, x, addr);

2552

__slab_free(s, page, x, addr);

2553

2554

}

2554

}

2555

2556

void kmem_cache_free(struct kmem_cache *s, void *x)

2556

void kmem_cache_free(struct kmem_cache *s, void *x)

2557

{

2557

{

2558

struct page *page;

2558

struct page *page;

2559

2560

page = virt_to_head_page(x);

2560

page = virt_to_head_page(x);

2561

2562

slab_free(s, page, x, _RET_IP_);

2562

slab_free(s, page, x, _RET_IP_);

2563

2564

trace_kmem_cache_free(_RET_IP_, x);

2564

trace_kmem_cache_free(_RET_IP_, x);

2565

}

2565

}

2566

EXPORT_SYMBOL(kmem_cache_free);

2566

EXPORT_SYMBOL(kmem_cache_free);

2567

2568

/*

2568

/*

2569

* Object placement in a slab is made very easy because we always start at

2569

* Object placement in a slab is made very easy because we always start at

2570

* offset 0. If we tune the size of the object to the alignment then we can

2570

* offset 0. If we tune the size of the object to the alignment then we can

2571

* get the required alignment by putting one properly sized object after

2571

* get the required alignment by putting one properly sized object after

2572

* another.

2572

* another.

2573

*

2573

*

2574

* Notice that the allocation order determines the sizes of the per cpu

2574

* Notice that the allocation order determines the sizes of the per cpu

2575

* caches. Each processor has always one slab available for allocations.

2575

* caches. Each processor has always one slab available for allocations.

2576

* Increasing the allocation order reduces the number of times that slabs

2576

* Increasing the allocation order reduces the number of times that slabs

2577

* must be moved on and off the partial lists and is therefore a factor in

2577

* must be moved on and off the partial lists and is therefore a factor in

2578

* locking overhead.

2578

* locking overhead.

2579

*/

2579

*/

2580

2581

/*

2581

/*

2582

* Mininum / Maximum order of slab pages. This influences locking overhead

2582

* Mininum / Maximum order of slab pages. This influences locking overhead

2583

* and slab fragmentation. A higher order reduces the number of partial slabs

2583

* and slab fragmentation. A higher order reduces the number of partial slabs

2584

* and increases the number of allocations possible without having to

2584

* and increases the number of allocations possible without having to

2585

* take the list_lock.

2585

* take the list_lock.

2586

*/

2586

*/

2587

static int slub_min_order;

2587

static int slub_min_order;

2588

static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;

2588

static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;

2589

static int slub_min_objects;

2589

static int slub_min_objects;

2590

2591

/*

2591

/*

2592

* Merge control. If this is set then no merging of slab caches will occur.

2592

* Merge control. If this is set then no merging of slab caches will occur.

2593

* (Could be removed. This was introduced to pacify the merge skeptics.)

2593

* (Could be removed. This was introduced to pacify the merge skeptics.)

2594

*/

2594

*/

2595

static int slub_nomerge;

2595

static int slub_nomerge;

2596

2597

/*

2597

/*

2598

* Calculate the order of allocation given an slab object size.

2598

* Calculate the order of allocation given an slab object size.

2599

*

2599

*

2600

* The order of allocation has significant impact on performance and other

2600

* The order of allocation has significant impact on performance and other

2601

* system components. Generally order 0 allocations should be preferred since

2601

* system components. Generally order 0 allocations should be preferred since

2602

* order 0 does not cause fragmentation in the page allocator. Larger objects

2602

* order 0 does not cause fragmentation in the page allocator. Larger objects

2603

* be problematic to put into order 0 slabs because there may be too much

2603

* be problematic to put into order 0 slabs because there may be too much

2604

* unused space left. We go to a higher order if more than 1/16th of the slab

2604

* unused space left. We go to a higher order if more than 1/16th of the slab

2605

* would be wasted.

2605

* would be wasted.

2606

*

2606

*

2607

* In order to reach satisfactory performance we must ensure that a minimum

2607

* In order to reach satisfactory performance we must ensure that a minimum

2608

* number of objects is in one slab. Otherwise we may generate too much

2608

* number of objects is in one slab. Otherwise we may generate too much

2609

* activity on the partial lists which requires taking the list_lock. This is

2609

* activity on the partial lists which requires taking the list_lock. This is

2610

* less a concern for large slabs though which are rarely used.

2610

* less a concern for large slabs though which are rarely used.

2611

*

2611

*

2612

* slub_max_order specifies the order where we begin to stop considering the

2612

* slub_max_order specifies the order where we begin to stop considering the

2613

* number of objects in a slab as critical. If we reach slub_max_order then

2613

* number of objects in a slab as critical. If we reach slub_max_order then

2614

* we try to keep the page order as low as possible. So we accept more waste

2614

* we try to keep the page order as low as possible. So we accept more waste

2615

* of space in favor of a small page order.

2615

* of space in favor of a small page order.

2616

*

2616

*

2617

* Higher order allocations also allow the placement of more objects in a

2617

* Higher order allocations also allow the placement of more objects in a

2618

* slab and thereby reduce object handling overhead. If the user has

2618

* slab and thereby reduce object handling overhead. If the user has

2619

* requested a higher mininum order then we start with that one instead of

2619

* requested a higher mininum order then we start with that one instead of

2620

* the smallest order which will fit the object.

2620

* the smallest order which will fit the object.

2621

*/

2621

*/

2622

static inline int slab_order(int size, int min_objects,

2622

static inline int slab_order(int size, int min_objects,

2623

int max_order, int fract_leftover, int reserved)

2623

int max_order, int fract_leftover, int reserved)

2624

{

2624

{

2625

int order;

2625

int order;

2626

int rem;

2626

int rem;

2627

int min_order = slub_min_order;

2627

int min_order = slub_min_order;

2628

2629

if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)

2629

if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)

2630

return get_order(size * MAX_OBJS_PER_PAGE) - 1;

2630

return get_order(size * MAX_OBJS_PER_PAGE) - 1;

2631

2632

for (order = max(min_order,

2632

for (order = max(min_order,

2633

fls(min_objects * size - 1) - PAGE_SHIFT);

2633

fls(min_objects * size - 1) - PAGE_SHIFT);

2634

order <= max_order; order++) {

2634

order <= max_order; order++) {

2635

2636

unsigned long slab_size = PAGE_SIZE << order;

2636

unsigned long slab_size = PAGE_SIZE << order;

2637

2638

if (slab_size < min_objects * size + reserved)

2638

if (slab_size < min_objects * size + reserved)

2639

continue;

2639

continue;

2640

2641

rem = (slab_size - reserved) % size;

2641

rem = (slab_size - reserved) % size;

2642

2643

if (rem <= slab_size / fract_leftover)

2643

if (rem <= slab_size / fract_leftover)

2644

break;

2644

break;

2645

2646

}

2646

}

2647

2648

return order;

2648

return order;

2649

}

2649

}

2650

2651

static inline int calculate_order(int size, int reserved)

2651

static inline int calculate_order(int size, int reserved)

2652

{

2652

{

2653

int order;

2653

int order;

2654

int min_objects;

2654

int min_objects;

2655

int fraction;

2655

int fraction;

2656

int max_objects;

2656

int max_objects;

2657

2658

/*

2658

/*

2659

* Attempt to find best configuration for a slab. This

2659

* Attempt to find best configuration for a slab. This

2660

* works by first attempting to generate a layout with

2660

* works by first attempting to generate a layout with

2661

* the best configuration and backing off gradually.

2661

* the best configuration and backing off gradually.

2662

*

2662

*

2663

* First we reduce the acceptable waste in a slab. Then

2663

* First we reduce the acceptable waste in a slab. Then

2664

* we reduce the minimum objects required in a slab.

2664

* we reduce the minimum objects required in a slab.

2665

*/

2665

*/

2666

min_objects = slub_min_objects;

2666

min_objects = slub_min_objects;

2667

if (!min_objects)

2667

if (!min_objects)

2668

min_objects = 4 * (fls(nr_cpu_ids) + 1);

2668

min_objects = 4 * (fls(nr_cpu_ids) + 1);

2669

max_objects = order_objects(slub_max_order, size, reserved);

2669

max_objects = order_objects(slub_max_order, size, reserved);

2670

min_objects = min(min_objects, max_objects);

2670

min_objects = min(min_objects, max_objects);

2671

2672

while (min_objects > 1) {

2672

while (min_objects > 1) {

2673

fraction = 16;

2673

fraction = 16;

2674

while (fraction >= 4) {

2674

while (fraction >= 4) {

2675

order = slab_order(size, min_objects,

2675

order = slab_order(size, min_objects,

2676

slub_max_order, fraction, reserved);

2676

slub_max_order, fraction, reserved);

2677

if (order <= slub_max_order)

2677

if (order <= slub_max_order)

2678

return order;

2678

return order;

2679

fraction /= 2;

2679

fraction /= 2;

2680

}

2680

}

2681

min_objects--;

2681

min_objects--;

2682

}

2682

}

2683

2684

/*

2684

/*

2685

* We were unable to place multiple objects in a slab. Now

2685

* We were unable to place multiple objects in a slab. Now

2686

* lets see if we can place a single object there.

2686

* lets see if we can place a single object there.

2687

*/

2687

*/

2688

order = slab_order(size, 1, slub_max_order, 1, reserved);

2688

order = slab_order(size, 1, slub_max_order, 1, reserved);

2689

if (order <= slub_max_order)

2689

if (order <= slub_max_order)

2690

return order;

2690

return order;

2691

2692

/*

2692

/*

2693

* Doh this slab cannot be placed using slub_max_order.

2693

* Doh this slab cannot be placed using slub_max_order.

2694

*/

2694

*/

2695

order = slab_order(size, 1, MAX_ORDER, 1, reserved);

2695

order = slab_order(size, 1, MAX_ORDER, 1, reserved);

2696

if (order < MAX_ORDER)

2696

if (order < MAX_ORDER)

2697

return order;

2697

return order;

2698

return -ENOSYS;

2698

return -ENOSYS;

2699

}

2699

}

2700

2701

/*

2701

/*

2702

* Figure out what the alignment of the objects will be.

2702

* Figure out what the alignment of the objects will be.

2703

*/

2703

*/

2704

static unsigned long calculate_alignment(unsigned long flags,

2704

static unsigned long calculate_alignment(unsigned long flags,

2705

unsigned long align, unsigned long size)

2705

unsigned long align, unsigned long size)

2706

{

2706

{

2707

/*

2707

/*

2708

* If the user wants hardware cache aligned objects then follow that

2708

* If the user wants hardware cache aligned objects then follow that

2709

* suggestion if the object is sufficiently large.

2709

* suggestion if the object is sufficiently large.

2710

*

2710

*

2711

* The hardware cache alignment cannot override the specified

2711

* The hardware cache alignment cannot override the specified

2712

* alignment though. If that is greater then use it.

2712

* alignment though. If that is greater then use it.

2713

*/

2713

*/

2714

if (flags & SLAB_HWCACHE_ALIGN) {

2714

if (flags & SLAB_HWCACHE_ALIGN) {

2715

unsigned long ralign = cache_line_size();

2715

unsigned long ralign = cache_line_size();

2716

while (size <= ralign / 2)

2716

while (size <= ralign / 2)

2717

ralign /= 2;

2717

ralign /= 2;

2718

align = max(align, ralign);

2718

align = max(align, ralign);

2719

}

2719

}

2720

2721

if (align < ARCH_SLAB_MINALIGN)

2721

if (align < ARCH_SLAB_MINALIGN)

2722

align = ARCH_SLAB_MINALIGN;

2722

align = ARCH_SLAB_MINALIGN;

2723

2724

return ALIGN(align, sizeof(void *));

2724

return ALIGN(align, sizeof(void *));

2725

}

2725

}

2726

2727

static void

2727

static void

2728

init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)

2728

init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)

2729

{

2729

{

2730

n->nr_partial = 0;

2730

n->nr_partial = 0;

2731

spin_lock_init(&n->list_lock);

2731

spin_lock_init(&n->list_lock);

2732

INIT_LIST_HEAD(&n->partial);

2732

INIT_LIST_HEAD(&n->partial);

2733

#ifdef CONFIG_SLUB_DEBUG

2733

#ifdef CONFIG_SLUB_DEBUG

2734

atomic_long_set(&n->nr_slabs, 0);

2734

atomic_long_set(&n->nr_slabs, 0);

2735

atomic_long_set(&n->total_objects, 0);

2735

atomic_long_set(&n->total_objects, 0);

2736

INIT_LIST_HEAD(&n->full);

2736

INIT_LIST_HEAD(&n->full);

2737

#endif

2737

#endif

2738

}

2738

}

2739

2740

static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)

2740

static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)

2741

{

2741

{

2742

BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <

2742

BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <

2743

SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));

2743

SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));

2744

2745

/*

2745

/*

2746

* Must align to double word boundary for the double cmpxchg

2746

* Must align to double word boundary for the double cmpxchg

2747

* instructions to work; see __pcpu_double_call_return_bool().

2747

* instructions to work; see __pcpu_double_call_return_bool().

2748

*/

2748

*/

2749

s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),

2749

s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),

2750

2 * sizeof(void *));

2750

2 * sizeof(void *));

2751

2752

if (!s->cpu_slab)

2752

if (!s->cpu_slab)

2753

return 0;

2753

return 0;

2754

2755

init_kmem_cache_cpus(s);

2755

init_kmem_cache_cpus(s);

2756

2757

return 1;

2757

return 1;

2758

}

2758

}

2759

2760

static struct kmem_cache *kmem_cache_node;

2760

static struct kmem_cache *kmem_cache_node;

2761

2762

/*

2762

/*

2763

* No kmalloc_node yet so do it by hand. We know that this is the first

2763

* No kmalloc_node yet so do it by hand. We know that this is the first

2764

* slab on the node for this slabcache. There are no concurrent accesses

2764

* slab on the node for this slabcache. There are no concurrent accesses

2765

* possible.

2765

* possible.

2766

*

2766

*

2767

* Note that this function only works on the kmalloc_node_cache

2767

* Note that this function only works on the kmalloc_node_cache

2768

* when allocating for the kmalloc_node_cache. This is used for bootstrapping

2768

* when allocating for the kmalloc_node_cache. This is used for bootstrapping

2769

* memory on a fresh node that has no slab structures yet.

2769

* memory on a fresh node that has no slab structures yet.

2770

*/

2770

*/

2771

static void early_kmem_cache_node_alloc(int node)

2771

static void early_kmem_cache_node_alloc(int node)

2772

{

2772

{

2773

struct page *page;

2773

struct page *page;

2774

struct kmem_cache_node *n;

2774

struct kmem_cache_node *n;

2775

2776

BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));

2776

BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));

2777

2778

page = new_slab(kmem_cache_node, GFP_NOWAIT, node);

2778

page = new_slab(kmem_cache_node, GFP_NOWAIT, node);

2779

2780

BUG_ON(!page);

2780

BUG_ON(!page);

2781

if (page_to_nid(page) != node) {

2781

if (page_to_nid(page) != node) {

2782

printk(KERN_ERR "SLUB: Unable to allocate memory from "

2782

printk(KERN_ERR "SLUB: Unable to allocate memory from "

2783

"node %d\n", node);

2783

"node %d\n", node);

2784

printk(KERN_ERR "SLUB: Allocating a useless per node structure "

2784

printk(KERN_ERR "SLUB: Allocating a useless per node structure "

2785

"in order to be able to continue\n");

2785

"in order to be able to continue\n");

2786

}

2786

}

2787

2788

n = page->freelist;

2788

n = page->freelist;

2789

BUG_ON(!n);

2789

BUG_ON(!n);

2790

page->freelist = get_freepointer(kmem_cache_node, n);

2790

page->freelist = get_freepointer(kmem_cache_node, n);

2791

page->inuse = 1;

2791

page->inuse = 1;

2792

page->frozen = 0;

2792

page->frozen = 0;

2793

kmem_cache_node->node[node] = n;

2793

kmem_cache_node->node[node] = n;

2794

#ifdef CONFIG_SLUB_DEBUG

2794

#ifdef CONFIG_SLUB_DEBUG

2795

init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);

2795

init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);

2796

init_tracking(kmem_cache_node, n);

2796

init_tracking(kmem_cache_node, n);

2797

#endif

2797

#endif

2798

init_kmem_cache_node(n, kmem_cache_node);

2798

init_kmem_cache_node(n, kmem_cache_node);

2799

inc_slabs_node(kmem_cache_node, node, page->objects);

2799

inc_slabs_node(kmem_cache_node, node, page->objects);

2800

2801

add_partial(n, page, DEACTIVATE_TO_HEAD);

2801

add_partial(n, page, DEACTIVATE_TO_HEAD);

2802

}

2802

}

2803

2804

static void free_kmem_cache_nodes(struct kmem_cache *s)

2804

static void free_kmem_cache_nodes(struct kmem_cache *s)

2805

{

2805

{

2806

int node;

2806

int node;

2807

2808

for_each_node_state(node, N_NORMAL_MEMORY) {

2808

for_each_node_state(node, N_NORMAL_MEMORY) {

2809

struct kmem_cache_node *n = s->node[node];

2809

struct kmem_cache_node *n = s->node[node];

2810

2811

if (n)

2811

if (n)

2812

kmem_cache_free(kmem_cache_node, n);

2812

kmem_cache_free(kmem_cache_node, n);

2813

2814

s->node[node] = NULL;

2814

s->node[node] = NULL;

2815

}

2815

}

2816

}

2816

}

2817

2818

static int init_kmem_cache_nodes(struct kmem_cache *s)

2818

static int init_kmem_cache_nodes(struct kmem_cache *s)

2819

{

2819

{

2820

int node;

2820

int node;

2821

2822

for_each_node_state(node, N_NORMAL_MEMORY) {

2822

for_each_node_state(node, N_NORMAL_MEMORY) {

2823

struct kmem_cache_node *n;

2823

struct kmem_cache_node *n;

2824

2825

if (slab_state == DOWN) {

2825

if (slab_state == DOWN) {

2826

early_kmem_cache_node_alloc(node);

2826

early_kmem_cache_node_alloc(node);

2827

continue;

2827

continue;

2828

}

2828

}

2829

n = kmem_cache_alloc_node(kmem_cache_node,

2829

n = kmem_cache_alloc_node(kmem_cache_node,

2830

GFP_KERNEL, node);

2830

GFP_KERNEL, node);

2831

2832

if (!n) {

2832

if (!n) {

2833

free_kmem_cache_nodes(s);

2833

free_kmem_cache_nodes(s);

2834

return 0;

2834

return 0;

2835

}

2835

}

2836

2837

s->node[node] = n;

2837

s->node[node] = n;

2838

init_kmem_cache_node(n, s);

2838

init_kmem_cache_node(n, s);

2839

}

2839

}

2840

return 1;

2840

return 1;

2841

}

2841

}

2842

2843

static void set_min_partial(struct kmem_cache *s, unsigned long min)

2843

static void set_min_partial(struct kmem_cache *s, unsigned long min)

2844

{

2844

{

2845

if (min < MIN_PARTIAL)

2845

if (min < MIN_PARTIAL)

2846

min = MIN_PARTIAL;

2846

min = MIN_PARTIAL;

2847

else if (min > MAX_PARTIAL)

2847

else if (min > MAX_PARTIAL)

2848

min = MAX_PARTIAL;

2848

min = MAX_PARTIAL;

2849

s->min_partial = min;

2849

s->min_partial = min;

2850

}

2850

}

2851

2852

/*

2852

/*

2853

* calculate_sizes() determines the order and the distribution of data within

2853

* calculate_sizes() determines the order and the distribution of data within

2854

* a slab object.

2854

* a slab object.

2855

*/

2855

*/

2856

static int calculate_sizes(struct kmem_cache *s, int forced_order)

2856

static int calculate_sizes(struct kmem_cache *s, int forced_order)

2857

{

2857

{

2858

unsigned long flags = s->flags;

2858

unsigned long flags = s->flags;

2859

unsigned long size = s->objsize;

2859

unsigned long size = s->objsize;

2860

unsigned long align = s->align;

2860

unsigned long align = s->align;

2861

int order;

2861

int order;

2862

2863

/*

2863

/*

2864

* Round up object size to the next word boundary. We can only

2864

* Round up object size to the next word boundary. We can only

2865

* place the free pointer at word boundaries and this determines

2865

* place the free pointer at word boundaries and this determines

2866

* the possible location of the free pointer.

2866

* the possible location of the free pointer.

2867

*/

2867

*/

2868

size = ALIGN(size, sizeof(void *));

2868

size = ALIGN(size, sizeof(void *));

2869

2870

#ifdef CONFIG_SLUB_DEBUG

2870

#ifdef CONFIG_SLUB_DEBUG

2871

/*

2871

/*

2872

* Determine if we can poison the object itself. If the user of

2872

* Determine if we can poison the object itself. If the user of

2873

* the slab may touch the object after free or before allocation

2873

* the slab may touch the object after free or before allocation

2874

* then we should never poison the object itself.

2874

* then we should never poison the object itself.

2875

*/

2875

*/

2876

if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&

2876

if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&

2877

!s->ctor)

2877

!s->ctor)

2878

s->flags |= __OBJECT_POISON;

2878

s->flags |= __OBJECT_POISON;

2879

else

2879

else

2880

s->flags &= ~__OBJECT_POISON;

2880

s->flags &= ~__OBJECT_POISON;

2881

2882

2883

/*

2883

/*

2884

* If we are Redzoning then check if there is some space between the

2884

* If we are Redzoning then check if there is some space between the

2885

* end of the object and the free pointer. If not then add an

2885

* end of the object and the free pointer. If not then add an

2886

* additional word to have some bytes to store Redzone information.

2886

* additional word to have some bytes to store Redzone information.

2887

*/

2887

*/

2888

if ((flags & SLAB_RED_ZONE) && size == s->objsize)

2888

if ((flags & SLAB_RED_ZONE) && size == s->objsize)

2889

size += sizeof(void *);

2889

size += sizeof(void *);

2890

#endif

2890

#endif

2891

2892

/*

2892

/*

2893

* With that we have determined the number of bytes in actual use

2893

* With that we have determined the number of bytes in actual use

2894

* by the object. This is the potential offset to the free pointer.

2894

* by the object. This is the potential offset to the free pointer.

2895

*/

2895

*/

2896

s->inuse = size;

2896

s->inuse = size;

2897

2898

if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||

2898

if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||

2899

s->ctor)) {

2899

s->ctor)) {

2900

/*

2900

/*

2901

* Relocate free pointer after the object if it is not

2901

* Relocate free pointer after the object if it is not

2902

* permitted to overwrite the first word of the object on

2902

* permitted to overwrite the first word of the object on

2903

* kmem_cache_free.

2903

* kmem_cache_free.

2904

*

2904

*

2905

* This is the case if we do RCU, have a constructor or

2905

* This is the case if we do RCU, have a constructor or

2906

* destructor or are poisoning the objects.

2906

* destructor or are poisoning the objects.

2907

*/

2907

*/

2908

s->offset = size;

2908

s->offset = size;

2909

size += sizeof(void *);

2909

size += sizeof(void *);

2910

}

2910

}

2911

2912

#ifdef CONFIG_SLUB_DEBUG

2912

#ifdef CONFIG_SLUB_DEBUG

2913

if (flags & SLAB_STORE_USER)

2913

if (flags & SLAB_STORE_USER)

2914

/*

2914

/*

2915

* Need to store information about allocs and frees after

2915

* Need to store information about allocs and frees after

2916

* the object.

2916

* the object.

2917

*/

2917

*/

2918

size += 2 * sizeof(struct track);

2918

size += 2 * sizeof(struct track);

2919

2920

if (flags & SLAB_RED_ZONE)

2920

if (flags & SLAB_RED_ZONE)

2921

/*

2921

/*

2922

* Add some empty padding so that we can catch

2922

* Add some empty padding so that we can catch

2923

* overwrites from earlier objects rather than let

2923

* overwrites from earlier objects rather than let

2924

* tracking information or the free pointer be

2924

* tracking information or the free pointer be

2925

* corrupted if a user writes before the start

2925

* corrupted if a user writes before the start

2926

* of the object.

2926

* of the object.

2927

*/

2927

*/

2928

size += sizeof(void *);

2928

size += sizeof(void *);

2929

#endif

2929

#endif

2930

2931

/*

2931

/*

2932

* Determine the alignment based on various parameters that the

2932

* Determine the alignment based on various parameters that the

2933

* user specified and the dynamic determination of cache line size

2933

* user specified and the dynamic determination of cache line size

2934

* on bootup.

2934

* on bootup.

2935

*/

2935

*/

2936

align = calculate_alignment(flags, align, s->objsize);

2936

align = calculate_alignment(flags, align, s->objsize);

2937

s->align = align;

2937

s->align = align;

2938

2939

/*

2939

/*

2940

* SLUB stores one object immediately after another beginning from

2940

* SLUB stores one object immediately after another beginning from

2941

* offset 0. In order to align the objects we have to simply size

2941

* offset 0. In order to align the objects we have to simply size

2942

* each object to conform to the alignment.

2942

* each object to conform to the alignment.

2943

*/

2943

*/

2944

size = ALIGN(size, align);

2944

size = ALIGN(size, align);

2945

s->size = size;

2945

s->size = size;

2946

if (forced_order >= 0)

2946

if (forced_order >= 0)

2947

order = forced_order;

2947

order = forced_order;

2948

else

2948

else

2949

order = calculate_order(size, s->reserved);

2949

order = calculate_order(size, s->reserved);

2950

2951

if (order < 0)

2951

if (order < 0)

2952

return 0;

2952

return 0;

2953

2954

s->allocflags = 0;

2954

s->allocflags = 0;

2955

if (order)

2955

if (order)

2956

s->allocflags |= __GFP_COMP;

2956

s->allocflags |= __GFP_COMP;

2957

2958

if (s->flags & SLAB_CACHE_DMA)

2958

if (s->flags & SLAB_CACHE_DMA)

2959

s->allocflags |= SLUB_DMA;

2959

s->allocflags |= SLUB_DMA;

2960

2961

if (s->flags & SLAB_RECLAIM_ACCOUNT)

2961

if (s->flags & SLAB_RECLAIM_ACCOUNT)

2962

s->allocflags |= __GFP_RECLAIMABLE;

2962

s->allocflags |= __GFP_RECLAIMABLE;

2963

2964

/*

2964

/*

2965

* Determine the number of objects per slab

2965

* Determine the number of objects per slab

2966

*/

2966

*/

2967

s->oo = oo_make(order, size, s->reserved);

2967

s->oo = oo_make(order, size, s->reserved);

2968

s->min = oo_make(get_order(size), size, s->reserved);

2968

s->min = oo_make(get_order(size), size, s->reserved);

2969

if (oo_objects(s->oo) > oo_objects(s->max))

2969

if (oo_objects(s->oo) > oo_objects(s->max))

2970

s->max = s->oo;

2970

s->max = s->oo;

2971

2972

return !!oo_objects(s->oo);

2972

return !!oo_objects(s->oo);

2973

2974

}

2974

}

2975

2976

static int kmem_cache_open(struct kmem_cache *s,

2976

static int kmem_cache_open(struct kmem_cache *s,

2977

const char *name, size_t size,

2977

const char *name, size_t size,

2978

size_t align, unsigned long flags,

2978

size_t align, unsigned long flags,

2979

void (*ctor)(void *))

2979

void (*ctor)(void *))

2980

{

2980

{

2981

memset(s, 0, kmem_size);

2981

memset(s, 0, kmem_size);

2982

s->name = name;

2982

s->name = name;

2983

s->ctor = ctor;

2983

s->ctor = ctor;

2984

s->objsize = size;

2984

s->objsize = size;

2985

s->align = align;

2985

s->align = align;

2986

s->flags = kmem_cache_flags(size, flags, name, ctor);

2986

s->flags = kmem_cache_flags(size, flags, name, ctor);

2987

s->reserved = 0;

2987

s->reserved = 0;

2988

2989

if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))

2989

if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))

2990

s->reserved = sizeof(struct rcu_head);

2990

s->reserved = sizeof(struct rcu_head);

2991

2992

if (!calculate_sizes(s, -1))

2992

if (!calculate_sizes(s, -1))

2993

goto error;

2993

goto error;

2994

if (disable_higher_order_debug) {

2994

if (disable_higher_order_debug) {

2995

/*

2995

/*

2996

* Disable debugging flags that store metadata if the min slab

2996

* Disable debugging flags that store metadata if the min slab

2997

* order increased.

2997

* order increased.

2998

*/

2998

*/

2999

if (get_order(s->size) > get_order(s->objsize)) {

2999

if (get_order(s->size) > get_order(s->objsize)) {

3000

s->flags &= ~DEBUG_METADATA_FLAGS;

3000

s->flags &= ~DEBUG_METADATA_FLAGS;

3001

s->offset = 0;

3001

s->offset = 0;

3002

if (!calculate_sizes(s, -1))

3002

if (!calculate_sizes(s, -1))

3003

goto error;

3003

goto error;

3004

}

3004

}

3005

}

3005

}

3006

3007

#ifdef CONFIG_CMPXCHG_DOUBLE

3007

#ifdef CONFIG_CMPXCHG_DOUBLE

3008

if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)

3008

if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)

3009

/* Enable fast mode */

3009

/* Enable fast mode */

3010

s->flags |= __CMPXCHG_DOUBLE;

3010

s->flags |= __CMPXCHG_DOUBLE;

3011

#endif

3011

#endif

3012

3013

/*

3013

/*

3014

* The larger the object size is, the more pages we want on the partial

3014

* The larger the object size is, the more pages we want on the partial

3015

* list to avoid pounding the page allocator excessively.

3015

* list to avoid pounding the page allocator excessively.

3016

*/

3016

*/

3017

set_min_partial(s, ilog2(s->size) / 2);

3017

set_min_partial(s, ilog2(s->size) / 2);

3018

3019

/*

3019

/*

3020

* cpu_partial determined the maximum number of objects kept in the

3020

* cpu_partial determined the maximum number of objects kept in the

3021

* per cpu partial lists of a processor.

3021

* per cpu partial lists of a processor.

3022

*

3022

*

3023

* Per cpu partial lists mainly contain slabs that just have one

3023

* Per cpu partial lists mainly contain slabs that just have one

3024

* object freed. If they are used for allocation then they can be

3024

* object freed. If they are used for allocation then they can be

3025

* filled up again with minimal effort. The slab will never hit the

3025

* filled up again with minimal effort. The slab will never hit the

3026

* per node partial lists and therefore no locking will be required.

3026

* per node partial lists and therefore no locking will be required.

3027

*

3027

*

3028

* This setting also determines

3028

* This setting also determines

3029

*

3029

*

3030

* A) The number of objects from per cpu partial slabs dumped to the

3030

* A) The number of objects from per cpu partial slabs dumped to the

3031

* per node list when we reach the limit.

3031

* per node list when we reach the limit.

3032

* B) The number of objects in cpu partial slabs to extract from the

3032

* B) The number of objects in cpu partial slabs to extract from the

3033

* per node list when we run out of per cpu objects. We only fetch 50%

3033

* per node list when we run out of per cpu objects. We only fetch 50%

3034

* to keep some capacity around for frees.

3034

* to keep some capacity around for frees.

3035

*/

3035

*/

3036

if (s->size >= PAGE_SIZE)

3036

if (s->size >= PAGE_SIZE)

3037

s->cpu_partial = 2;

3037

s->cpu_partial = 2;

3038

else if (s->size >= 1024)

3038

else if (s->size >= 1024)

3039

s->cpu_partial = 6;

3039

s->cpu_partial = 6;

3040

else if (s->size >= 256)

3040

else if (s->size >= 256)

3041

s->cpu_partial = 13;

3041

s->cpu_partial = 13;

3042

else

3042

else

3043

s->cpu_partial = 30;

3043

s->cpu_partial = 30;

3044

3045

s->refcount = 1;

3045

s->refcount = 1;

3046

#ifdef CONFIG_NUMA

3046

#ifdef CONFIG_NUMA

3047

s->remote_node_defrag_ratio = 1000;

3047

s->remote_node_defrag_ratio = 1000;

3048

#endif

3048

#endif

3049

if (!init_kmem_cache_nodes(s))

3049

if (!init_kmem_cache_nodes(s))

3050

goto error;

3050

goto error;

3051

3052

if (alloc_kmem_cache_cpus(s))

3052

if (alloc_kmem_cache_cpus(s))

3053

return 1;

3053

return 1;

3054

3055

free_kmem_cache_nodes(s);

3055

free_kmem_cache_nodes(s);

3056

error:

3056

error:

3057

if (flags & SLAB_PANIC)

3057

if (flags & SLAB_PANIC)

3058

panic("Cannot create slab %s size=%lu realsize=%u "

3058

panic("Cannot create slab %s size=%lu realsize=%u "

3059

"order=%u offset=%u flags=%lx\n",

3059

"order=%u offset=%u flags=%lx\n",

3060

s->name, (unsigned long)size, s->size, oo_order(s->oo),

3060

s->name, (unsigned long)size, s->size, oo_order(s->oo),

3061

s->offset, flags);

3061

s->offset, flags);

3062

return 0;

3062

return 0;

3063

}

3063

}

3064

3065

/*

3065

/*

3066

* Determine the size of a slab object

3066

* Determine the size of a slab object

3067

*/

3067

*/

3068

unsigned int kmem_cache_size(struct kmem_cache *s)

3068

unsigned int kmem_cache_size(struct kmem_cache *s)

3069

{

3069

{

3070

return s->objsize;

3070

return s->objsize;

3071

}

3071

}

3072

EXPORT_SYMBOL(kmem_cache_size);

3072

EXPORT_SYMBOL(kmem_cache_size);

3073

3074

static void list_slab_objects(struct kmem_cache *s, struct page *page,

3074

static void list_slab_objects(struct kmem_cache *s, struct page *page,

3075

const char *text)

3075

const char *text)

3076

{

3076

{

3077

#ifdef CONFIG_SLUB_DEBUG

3077

#ifdef CONFIG_SLUB_DEBUG

3078

void *addr = page_address(page);

3078

void *addr = page_address(page);

3079

void *p;

3079

void *p;

3080

unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *

3080

unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *

3081

sizeof(long), GFP_ATOMIC);

3081

sizeof(long), GFP_ATOMIC);

3082

if (!map)

3082

if (!map)

3083

return;

3083

return;

3084

slab_err(s, page, "%s", text);

3084

slab_err(s, page, "%s", text);

3085

slab_lock(page);

3085

slab_lock(page);

3086

3087

get_map(s, page, map);

3087

get_map(s, page, map);

3088

for_each_object(p, s, addr, page->objects) {

3088

for_each_object(p, s, addr, page->objects) {

3089

3090

if (!test_bit(slab_index(p, s, addr), map)) {

3090

if (!test_bit(slab_index(p, s, addr), map)) {

3091

printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",

3091

printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",

3092

p, p - addr);

3092

p, p - addr);

3093

print_tracking(s, p);

3093

print_tracking(s, p);

3094

}

3094

}

3095

}

3095

}

3096

slab_unlock(page);

3096

slab_unlock(page);

3097

kfree(map);

3097

kfree(map);

3098

#endif

3098

#endif

3099

}

3099

}

3100

3101

/*

3101

/*

3102

* Attempt to free all partial slabs on a node.

3102

* Attempt to free all partial slabs on a node.

3103

* This is called from kmem_cache_close(). We must be the last thread

3103

* This is called from kmem_cache_close(). We must be the last thread

3104

* using the cache and therefore we do not need to lock anymore.

3104

* using the cache and therefore we do not need to lock anymore.

3105

*/

3105

*/

3106

static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)

3106

static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)

3107

{

3107

{

3108

struct page *page, *h;

3108

struct page *page, *h;

3109

3110

list_for_each_entry_safe(page, h, &n->partial, lru) {

3110

list_for_each_entry_safe(page, h, &n->partial, lru) {

3111

if (!page->inuse) {

3111

if (!page->inuse) {

3112

remove_partial(n, page);

3112

remove_partial(n, page);

3113

discard_slab(s, page);

3113

discard_slab(s, page);

3114

} else {

3114

} else {

3115

list_slab_objects(s, page,

3115

list_slab_objects(s, page,

3116

"Objects remaining on kmem_cache_close()");

3116

"Objects remaining on kmem_cache_close()");

3117

}

3117

}

3118

}

3118

}

3119

}

3119

}

3120

3121

/*

3121

/*

3122

* Release all resources used by a slab cache.

3122

* Release all resources used by a slab cache.

3123

*/

3123

*/

3124

static inline int kmem_cache_close(struct kmem_cache *s)

3124

static inline int kmem_cache_close(struct kmem_cache *s)

3125

{

3125

{

3126

int node;

3126

int node;

3127

3128

flush_all(s);

3128

flush_all(s);

3129

free_percpu(s->cpu_slab);

3129

free_percpu(s->cpu_slab);

3130

/* Attempt to free all objects */

3130

/* Attempt to free all objects */

3131

for_each_node_state(node, N_NORMAL_MEMORY) {

3131

for_each_node_state(node, N_NORMAL_MEMORY) {

3132

struct kmem_cache_node *n = get_node(s, node);

3132

struct kmem_cache_node *n = get_node(s, node);

3133

3134

free_partial(s, n);

3134

free_partial(s, n);

3135

if (n->nr_partial || slabs_node(s, node))

3135

if (n->nr_partial || slabs_node(s, node))

3136

return 1;

3136

return 1;

3137

}

3137

}

3138

free_kmem_cache_nodes(s);

3138

free_kmem_cache_nodes(s);

3139

return 0;

3139

return 0;

3140

}

3140

}

3141

3142

/*

3142

/*

3143

* Close a cache and release the kmem_cache structure

3143

* Close a cache and release the kmem_cache structure

3144

* (must be used for caches created using kmem_cache_create)

3144

* (must be used for caches created using kmem_cache_create)

3145

*/

3145

*/

3146

void kmem_cache_destroy(struct kmem_cache *s)

3146

void kmem_cache_destroy(struct kmem_cache *s)

3147

{

3147

{

3148

down_write(&slub_lock);

3148

down_write(&slub_lock);

3149

s->refcount--;

3149

s->refcount--;

3150

if (!s->refcount) {

3150

if (!s->refcount) {

3151

list_del(&s->list);

3151

list_del(&s->list);

3152

up_write(&slub_lock);

3152

up_write(&slub_lock);

3153

if (kmem_cache_close(s)) {

3153

if (kmem_cache_close(s)) {

3154

printk(KERN_ERR "SLUB %s: %s called for cache that "

3154

printk(KERN_ERR "SLUB %s: %s called for cache that "

3155

"still has objects.\n", s->name, __func__);

3155

"still has objects.\n", s->name, __func__);

3156

dump_stack();

3156

dump_stack();

3157

}

3157

}

3158

if (s->flags & SLAB_DESTROY_BY_RCU)

3158

if (s->flags & SLAB_DESTROY_BY_RCU)

3159

rcu_barrier();

3159

rcu_barrier();

3160

sysfs_slab_remove(s);

3160

sysfs_slab_remove(s);

3161

} else

3161

} else

3162

up_write(&slub_lock);

3162

up_write(&slub_lock);

3163

}

3163

}

3164

EXPORT_SYMBOL(kmem_cache_destroy);

3164

EXPORT_SYMBOL(kmem_cache_destroy);

3165

3166

/********************************************************************

3166

/********************************************************************

3167

* Kmalloc subsystem

3167

* Kmalloc subsystem

3168

*******************************************************************/

3168

*******************************************************************/

3169

3170

struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];

3170

struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];

3171

EXPORT_SYMBOL(kmalloc_caches);

3171

EXPORT_SYMBOL(kmalloc_caches);

3172

3173

static struct kmem_cache *kmem_cache;

3173

static struct kmem_cache *kmem_cache;

3174

3175

#ifdef CONFIG_ZONE_DMA

3175

#ifdef CONFIG_ZONE_DMA

3176

static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];

3176

static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];

3177

#endif

3177

#endif

3178

3179

static int __init setup_slub_min_order(char *str)

3179

static int __init setup_slub_min_order(char *str)

3180

{

3180

{

3181

get_option(&str, &slub_min_order);

3181

get_option(&str, &slub_min_order);

3182

3183

return 1;

3183

return 1;

3184

}

3184

}

3185

3186

__setup("slub_min_order=", setup_slub_min_order);

3186

__setup("slub_min_order=", setup_slub_min_order);

3187

3188

static int __init setup_slub_max_order(char *str)

3188

static int __init setup_slub_max_order(char *str)

3189

{

3189

{

3190

get_option(&str, &slub_max_order);

3190

get_option(&str, &slub_max_order);

3191

slub_max_order = min(slub_max_order, MAX_ORDER - 1);

3191

slub_max_order = min(slub_max_order, MAX_ORDER - 1);

3192

3193

return 1;

3193

return 1;

3194

}

3194

}

3195

3196

__setup("slub_max_order=", setup_slub_max_order);

3196

__setup("slub_max_order=", setup_slub_max_order);

3197

3198

static int __init setup_slub_min_objects(char *str)

3198

static int __init setup_slub_min_objects(char *str)

3199

{

3199

{

3200

get_option(&str, &slub_min_objects);

3200

get_option(&str, &slub_min_objects);

3201

3202

return 1;

3202

return 1;

3203

}

3203

}

3204

3205

__setup("slub_min_objects=", setup_slub_min_objects);

3205

__setup("slub_min_objects=", setup_slub_min_objects);

3206

3207

static int __init setup_slub_nomerge(char *str)

3207

static int __init setup_slub_nomerge(char *str)

3208

{

3208

{

3209

slub_nomerge = 1;

3209

slub_nomerge = 1;

3210

return 1;

3210

return 1;

3211

}

3211

}

3212

3213

__setup("slub_nomerge", setup_slub_nomerge);

3213

__setup("slub_nomerge", setup_slub_nomerge);

3214

3215

static struct kmem_cache *__init create_kmalloc_cache(const char *name,

3215

static struct kmem_cache *__init create_kmalloc_cache(const char *name,

3216

int size, unsigned int flags)

3216

int size, unsigned int flags)

3217

{

3217

{

3218

struct kmem_cache *s;

3218

struct kmem_cache *s;

3219

3220

s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);

3220

s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);

3221

3222

/*

3222

/*

3223

* This function is called with IRQs disabled during early-boot on

3223

* This function is called with IRQs disabled during early-boot on

3224

* single CPU so there's no need to take slub_lock here.

3224

* single CPU so there's no need to take slub_lock here.

3225

*/

3225

*/

3226

if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,

3226

if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,

3227

flags, NULL))

3227

flags, NULL))

3228

goto panic;

3228

goto panic;

3229

3230

list_add(&s->list, &slab_caches);

3230

list_add(&s->list, &slab_caches);

3231

return s;

3231

return s;

3232

3233

panic:

3233

panic:

3234

panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);

3234

panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);

3235

return NULL;

3235

return NULL;

3236

}

3236

}

3237

3238

/*

3238

/*

3239

* Conversion table for small slabs sizes / 8 to the index in the

3239

* Conversion table for small slabs sizes / 8 to the index in the

3240

* kmalloc array. This is necessary for slabs < 192 since we have non power

3240

* kmalloc array. This is necessary for slabs < 192 since we have non power

3241

* of two cache sizes there. The size of larger slabs can be determined using

3241

* of two cache sizes there. The size of larger slabs can be determined using

3242

* fls.

3242

* fls.

3243

*/

3243

*/

3244

static s8 size_index[24] = {

3244

static s8 size_index[24] = {

3245

3, /* 8 */

3245

3, /* 8 */

3246

4, /* 16 */

3246

4, /* 16 */

3247

5, /* 24 */

3247

5, /* 24 */

3248

5, /* 32 */

3248

5, /* 32 */

3249

6, /* 40 */

3249

6, /* 40 */

3250

6, /* 48 */

3250

6, /* 48 */

3251

6, /* 56 */

3251

6, /* 56 */

3252

6, /* 64 */

3252

6, /* 64 */

3253

1, /* 72 */

3253

1, /* 72 */

3254

1, /* 80 */

3254

1, /* 80 */

3255

1, /* 88 */

3255

1, /* 88 */

3256

1, /* 96 */

3256

1, /* 96 */

3257

7, /* 104 */

3257

7, /* 104 */

3258

7, /* 112 */

3258

7, /* 112 */

3259

7, /* 120 */

3259

7, /* 120 */

3260

7, /* 128 */

3260

7, /* 128 */

3261

2, /* 136 */

3261

2, /* 136 */

3262

2, /* 144 */

3262

2, /* 144 */

3263

2, /* 152 */

3263

2, /* 152 */

3264

2, /* 160 */

3264

2, /* 160 */

3265

2, /* 168 */

3265

2, /* 168 */

3266

2, /* 176 */

3266

2, /* 176 */

3267

2, /* 184 */

3267

2, /* 184 */

3268

2 /* 192 */

3268

2 /* 192 */

3269

};

3269

};

3270

3271

static inline int size_index_elem(size_t bytes)

3271

static inline int size_index_elem(size_t bytes)

3272

{

3272

{

3273

return (bytes - 1) / 8;

3273

return (bytes - 1) / 8;

3274

}

3274

}

3275

3276

static struct kmem_cache *get_slab(size_t size, gfp_t flags)

3276

static struct kmem_cache *get_slab(size_t size, gfp_t flags)

3277

{

3277

{

3278

int index;

3278

int index;

3279

3280

if (size <= 192) {

3280

if (size <= 192) {

3281

if (!size)

3281

if (!size)

3282

return ZERO_SIZE_PTR;

3282

return ZERO_SIZE_PTR;

3283

3284

index = size_index[size_index_elem(size)];

3284

index = size_index[size_index_elem(size)];

3285

} else

3285

} else

3286

index = fls(size - 1);

3286

index = fls(size - 1);

3287

3288

#ifdef CONFIG_ZONE_DMA

3288

#ifdef CONFIG_ZONE_DMA

3289

if (unlikely((flags & SLUB_DMA)))

3289

if (unlikely((flags & SLUB_DMA)))

3290

return kmalloc_dma_caches[index];

3290

return kmalloc_dma_caches[index];

3291

3292

#endif

3292

#endif

3293

return kmalloc_caches[index];

3293

return kmalloc_caches[index];

3294

}

3294

}

3295

3296

void *__kmalloc(size_t size, gfp_t flags)

3296

void *__kmalloc(size_t size, gfp_t flags)

3297

{

3297

{

3298

struct kmem_cache *s;

3298

struct kmem_cache *s;

3299

void *ret;

3299

void *ret;

3300

3301

if (unlikely(size > SLUB_MAX_SIZE))

3301

if (unlikely(size > SLUB_MAX_SIZE))

3302

return kmalloc_large(size, flags);

3302

return kmalloc_large(size, flags);

3303

3304

s = get_slab(size, flags);

3304

s = get_slab(size, flags);

3305

3306

if (unlikely(ZERO_OR_NULL_PTR(s)))

3306

if (unlikely(ZERO_OR_NULL_PTR(s)))

3307

return s;

3307

return s;

3308

3309

ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);

3309

ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);

3310

3311

trace_kmalloc(_RET_IP_, ret, size, s->size, flags);

3311

trace_kmalloc(_RET_IP_, ret, size, s->size, flags);

3312

3313

return ret;

3313

return ret;

3314

}

3314

}

3315

EXPORT_SYMBOL(__kmalloc);

3315

EXPORT_SYMBOL(__kmalloc);

3316

3317

#ifdef CONFIG_NUMA

3317

#ifdef CONFIG_NUMA

3318

static void *kmalloc_large_node(size_t size, gfp_t flags, int node)

3318

static void *kmalloc_large_node(size_t size, gfp_t flags, int node)

3319

{

3319

{

3320

struct page *page;

3320

struct page *page;

3321

void *ptr = NULL;

3321

void *ptr = NULL;

3322

3323

flags |= __GFP_COMP | __GFP_NOTRACK;

3323

flags |= __GFP_COMP | __GFP_NOTRACK;

3324

page = alloc_pages_node(node, flags, get_order(size));

3324

page = alloc_pages_node(node, flags, get_order(size));

3325

if (page)

3325

if (page)

3326

ptr = page_address(page);

3326

ptr = page_address(page);

3327

3328

kmemleak_alloc(ptr, size, 1, flags);

3328

kmemleak_alloc(ptr, size, 1, flags);

3329

return ptr;

3329

return ptr;

3330

}

3330

}

3331

3332

void *__kmalloc_node(size_t size, gfp_t flags, int node)

3332

void *__kmalloc_node(size_t size, gfp_t flags, int node)

3333

{

3333

{

3334

struct kmem_cache *s;

3334

struct kmem_cache *s;

3335

void *ret;

3335

void *ret;

3336

3337

if (unlikely(size > SLUB_MAX_SIZE)) {

3337

if (unlikely(size > SLUB_MAX_SIZE)) {

3338

ret = kmalloc_large_node(size, flags, node);

3338

ret = kmalloc_large_node(size, flags, node);

3339

3340

trace_kmalloc_node(_RET_IP_, ret,

3340

trace_kmalloc_node(_RET_IP_, ret,

3341

size, PAGE_SIZE << get_order(size),

3341

size, PAGE_SIZE << get_order(size),

3342

flags, node);

3342

flags, node);

3343

3344

return ret;

3344

return ret;

3345

}

3345

}

3346

3347

s = get_slab(size, flags);

3347

s = get_slab(size, flags);

3348

3349

if (unlikely(ZERO_OR_NULL_PTR(s)))

3349

if (unlikely(ZERO_OR_NULL_PTR(s)))

3350

return s;

3350

return s;

3351

3352

ret = slab_alloc(s, flags, node, _RET_IP_);

3352

ret = slab_alloc(s, flags, node, _RET_IP_);

3353

3354

trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);

3354

trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);

3355

3356

return ret;

3356

return ret;

3357

}

3357

}

3358

EXPORT_SYMBOL(__kmalloc_node);

3358

EXPORT_SYMBOL(__kmalloc_node);

3359

#endif

3359

#endif

3360

3361

size_t ksize(const void *object)

3361

size_t ksize(const void *object)

3362

{

3362

{

3363

struct page *page;

3363

struct page *page;

3364

3365

if (unlikely(object == ZERO_SIZE_PTR))

3365

if (unlikely(object == ZERO_SIZE_PTR))

3366

return 0;

3366

return 0;

3367

3368

page = virt_to_head_page(object);

3368

page = virt_to_head_page(object);

3369

3370

if (unlikely(!PageSlab(page))) {

3370

if (unlikely(!PageSlab(page))) {

3371

WARN_ON(!PageCompound(page));

3371

WARN_ON(!PageCompound(page));

3372

return PAGE_SIZE << compound_order(page);

3372

return PAGE_SIZE << compound_order(page);

3373

}

3373

}

3374

3375

return slab_ksize(page->slab);

3375

return slab_ksize(page->slab);

3376

}

3376

}

3377

EXPORT_SYMBOL(ksize);

3377

EXPORT_SYMBOL(ksize);

3378

3379

#ifdef CONFIG_SLUB_DEBUG

3379

#ifdef CONFIG_SLUB_DEBUG

3380

bool verify_mem_not_deleted(const void *x)

3380

bool verify_mem_not_deleted(const void *x)

3381

{

3381

{

3382

struct page *page;

3382

struct page *page;

3383

void *object = (void *)x;

3383

void *object = (void *)x;

3384

unsigned long flags;

3384

unsigned long flags;

3385

bool rv;

3385

bool rv;

3386

3387

if (unlikely(ZERO_OR_NULL_PTR(x)))

3387

if (unlikely(ZERO_OR_NULL_PTR(x)))

3388

return false;

3388

return false;

3389

3390

local_irq_save(flags);

3390

local_irq_save(flags);

3391

3392

page = virt_to_head_page(x);

3392

page = virt_to_head_page(x);

3393

if (unlikely(!PageSlab(page))) {

3393

if (unlikely(!PageSlab(page))) {

3394

/* maybe it was from stack? */

3394

/* maybe it was from stack? */

3395

rv = true;

3395

rv = true;

3396

goto out_unlock;

3396

goto out_unlock;

3397

}

3397

}

3398

3399

slab_lock(page);

3399

slab_lock(page);

3400

if (on_freelist(page->slab, page, object)) {

3400

if (on_freelist(page->slab, page, object)) {

3401

object_err(page->slab, page, object, "Object is on free-list");

3401

object_err(page->slab, page, object, "Object is on free-list");

3402

rv = false;

3402

rv = false;

3403

} else {

3403

} else {

3404

rv = true;

3404

rv = true;

3405

}

3405

}

3406

slab_unlock(page);

3406

slab_unlock(page);

3407

3408

out_unlock:

3408

out_unlock:

3409

local_irq_restore(flags);

3409

local_irq_restore(flags);

3410

return rv;

3410

return rv;

3411

}

3411

}

3412

EXPORT_SYMBOL(verify_mem_not_deleted);

3412

EXPORT_SYMBOL(verify_mem_not_deleted);

3413

#endif

3413

#endif

3414

3415

void kfree(const void *x)

3415

void kfree(const void *x)

3416

{

3416

{

3417

struct page *page;

3417

struct page *page;

3418

void *object = (void *)x;

3418

void *object = (void *)x;

3419

3420

trace_kfree(_RET_IP_, x);

3420

trace_kfree(_RET_IP_, x);

3421

3422

if (unlikely(ZERO_OR_NULL_PTR(x)))

3422

if (unlikely(ZERO_OR_NULL_PTR(x)))

3423

return;

3423

return;

3424

3425

page = virt_to_head_page(x);

3425

page = virt_to_head_page(x);

3426

if (unlikely(!PageSlab(page))) {

3426

if (unlikely(!PageSlab(page))) {

3427

BUG_ON(!PageCompound(page));

3427

BUG_ON(!PageCompound(page));

3428

kmemleak_free(x);

3428

kmemleak_free(x);

3429

put_page(page);

3429

put_page(page);

3430

return;

3430

return;

3431

}

3431

}

3432

slab_free(page->slab, page, object, _RET_IP_);

3432

slab_free(page->slab, page, object, _RET_IP_);

3433

}

3433

}

3434

EXPORT_SYMBOL(kfree);

3434

EXPORT_SYMBOL(kfree);

3435

3436

/*

3436

/*

3437

* kmem_cache_shrink removes empty slabs from the partial lists and sorts

3437

* kmem_cache_shrink removes empty slabs from the partial lists and sorts

3438

* the remaining slabs by the number of items in use. The slabs with the

3438

* the remaining slabs by the number of items in use. The slabs with the

3439

* most items in use come first. New allocations will then fill those up

3439

* most items in use come first. New allocations will then fill those up

3440

* and thus they can be removed from the partial lists.

3440

* and thus they can be removed from the partial lists.

3441

*

3441

*

3442

* The slabs with the least items are placed last. This results in them

3442

* The slabs with the least items are placed last. This results in them

3443

* being allocated from last increasing the chance that the last objects

3443

* being allocated from last increasing the chance that the last objects

3444

* are freed in them.

3444

* are freed in them.

3445

*/

3445

*/

3446

int kmem_cache_shrink(struct kmem_cache *s)

3446

int kmem_cache_shrink(struct kmem_cache *s)

3447

{

3447

{

3448

int node;

3448

int node;

3449

int i;

3449

int i;

3450

struct kmem_cache_node *n;

3450

struct kmem_cache_node *n;

3451

struct page *page;

3451

struct page *page;

3452

struct page *t;

3452

struct page *t;

3453

int objects = oo_objects(s->max);

3453

int objects = oo_objects(s->max);

3454

struct list_head *slabs_by_inuse =

3454

struct list_head *slabs_by_inuse =

3455

kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);

3455

kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);

3456

unsigned long flags;

3456

unsigned long flags;

3457

3458

if (!slabs_by_inuse)

3458

if (!slabs_by_inuse)

3459

return -ENOMEM;

3459

return -ENOMEM;

3460

3461

flush_all(s);

3461

flush_all(s);

3462

for_each_node_state(node, N_NORMAL_MEMORY) {

3462

for_each_node_state(node, N_NORMAL_MEMORY) {

3463

n = get_node(s, node);

3463

n = get_node(s, node);

3464

3465

if (!n->nr_partial)

3465

if (!n->nr_partial)

3466

continue;

3466

continue;

3467

3468

for (i = 0; i < objects; i++)

3468

for (i = 0; i < objects; i++)

3469

INIT_LIST_HEAD(slabs_by_inuse + i);

3469

INIT_LIST_HEAD(slabs_by_inuse + i);

3470

3471

spin_lock_irqsave(&n->list_lock, flags);

3471

spin_lock_irqsave(&n->list_lock, flags);

3472

3473

/*

3473

/*

3474

* Build lists indexed by the items in use in each slab.

3474

* Build lists indexed by the items in use in each slab.

3475

*

3475

*

3476

* Note that concurrent frees may occur while we hold the

3476

* Note that concurrent frees may occur while we hold the

3477

* list_lock. page->inuse here is the upper limit.

3477

* list_lock. page->inuse here is the upper limit.

3478

*/

3478

*/

3479

list_for_each_entry_safe(page, t, &n->partial, lru) {

3479

list_for_each_entry_safe(page, t, &n->partial, lru) {

3480

list_move(&page->lru, slabs_by_inuse + page->inuse);

3480

list_move(&page->lru, slabs_by_inuse + page->inuse);

3481

if (!page->inuse)

3481

if (!page->inuse)

3482

n->nr_partial--;

3482

n->nr_partial--;

3483

}

3483

}

3484

3485

/*

3485

/*

3486

* Rebuild the partial list with the slabs filled up most

3486

* Rebuild the partial list with the slabs filled up most

3487

* first and the least used slabs at the end.

3487

* first and the least used slabs at the end.

3488

*/

3488

*/

3489

for (i = objects - 1; i > 0; i--)

3489

for (i = objects - 1; i > 0; i--)

3490

list_splice(slabs_by_inuse + i, n->partial.prev);

3490

list_splice(slabs_by_inuse + i, n->partial.prev);

3491

3492

spin_unlock_irqrestore(&n->list_lock, flags);

3492

spin_unlock_irqrestore(&n->list_lock, flags);

3493

3494

/* Release empty slabs */

3494

/* Release empty slabs */

3495

list_for_each_entry_safe(page, t, slabs_by_inuse, lru)

3495

list_for_each_entry_safe(page, t, slabs_by_inuse, lru)

3496

discard_slab(s, page);

3496

discard_slab(s, page);

3497

}

3497

}

3498

3499

kfree(slabs_by_inuse);

3499

kfree(slabs_by_inuse);

3500

return 0;

3500

return 0;

3501

}

3501

}

3502

EXPORT_SYMBOL(kmem_cache_shrink);

3502

EXPORT_SYMBOL(kmem_cache_shrink);

3503

3504

#if defined(CONFIG_MEMORY_HOTPLUG)

3504

#if defined(CONFIG_MEMORY_HOTPLUG)

3505

static int slab_mem_going_offline_callback(void *arg)

3505

static int slab_mem_going_offline_callback(void *arg)

3506

{

3506

{

3507

struct kmem_cache *s;

3507

struct kmem_cache *s;

3508

3509

down_read(&slub_lock);

3509

down_read(&slub_lock);

3510

list_for_each_entry(s, &slab_caches, list)

3510

list_for_each_entry(s, &slab_caches, list)

3511

kmem_cache_shrink(s);

3511

kmem_cache_shrink(s);

3512

up_read(&slub_lock);

3512

up_read(&slub_lock);

3513

3514

return 0;

3514

return 0;

3515

}

3515

}

3516

3517

static void slab_mem_offline_callback(void *arg)

3517

static void slab_mem_offline_callback(void *arg)

3518

{

3518

{

3519

struct kmem_cache_node *n;

3519

struct kmem_cache_node *n;

3520

struct kmem_cache *s;

3520

struct kmem_cache *s;

3521

struct memory_notify *marg = arg;

3521

struct memory_notify *marg = arg;

3522

int offline_node;

3522

int offline_node;

3523

3524

offline_node = marg->status_change_nid;

3524

offline_node = marg->status_change_nid;

3525

3526

/*

3526

/*

3527

* If the node still has available memory. we need kmem_cache_node

3527

* If the node still has available memory. we need kmem_cache_node

3528

* for it yet.

3528

* for it yet.

3529

*/

3529

*/

3530

if (offline_node < 0)

3530

if (offline_node < 0)

3531

return;

3531

return;

3532

3533

down_read(&slub_lock);

3533

down_read(&slub_lock);

3534

list_for_each_entry(s, &slab_caches, list) {

3534

list_for_each_entry(s, &slab_caches, list) {

3535

n = get_node(s, offline_node);

3535

n = get_node(s, offline_node);

3536

if (n) {

3536

if (n) {

3537

/*

3537

/*

3538

* if n->nr_slabs > 0, slabs still exist on the node

3538

* if n->nr_slabs > 0, slabs still exist on the node

3539

* that is going down. We were unable to free them,

3539

* that is going down. We were unable to free them,

3540

* and offline_pages() function shouldn't call this

3540

* and offline_pages() function shouldn't call this

3541

* callback. So, we must fail.

3541

* callback. So, we must fail.

3542

*/

3542

*/

3543

BUG_ON(slabs_node(s, offline_node));

3543

BUG_ON(slabs_node(s, offline_node));

3544

3545

s->node[offline_node] = NULL;

3545

s->node[offline_node] = NULL;

3546

kmem_cache_free(kmem_cache_node, n);

3546

kmem_cache_free(kmem_cache_node, n);

3547

}

3547

}

3548

}

3548

}

3549

up_read(&slub_lock);

3549

up_read(&slub_lock);

3550

}

3550

}

3551

3552

static int slab_mem_going_online_callback(void *arg)

3552

static int slab_mem_going_online_callback(void *arg)

3553

{

3553

{

3554

struct kmem_cache_node *n;

3554

struct kmem_cache_node *n;

3555

struct kmem_cache *s;

3555

struct kmem_cache *s;

3556

struct memory_notify *marg = arg;

3556

struct memory_notify *marg = arg;

3557

int nid = marg->status_change_nid;

3557

int nid = marg->status_change_nid;

3558

int ret = 0;

3558

int ret = 0;

3559

3560

/*

3560

/*

3561

* If the node's memory is already available, then kmem_cache_node is

3561

* If the node's memory is already available, then kmem_cache_node is

3562

* already created. Nothing to do.

3562

* already created. Nothing to do.

3563

*/

3563

*/

3564

if (nid < 0)

3564

if (nid < 0)

3565

return 0;

3565

return 0;

3566

3567

/*

3567

/*

3568

* We are bringing a node online. No memory is available yet. We must

3568

* We are bringing a node online. No memory is available yet. We must

3569

* allocate a kmem_cache_node structure in order to bring the node

3569

* allocate a kmem_cache_node structure in order to bring the node

3570

* online.

3570

* online.

3571

*/

3571

*/

3572

down_read(&slub_lock);

3572

down_read(&slub_lock);

3573

list_for_each_entry(s, &slab_caches, list) {

3573

list_for_each_entry(s, &slab_caches, list) {

3574

/*

3574

/*

3575

* XXX: kmem_cache_alloc_node will fallback to other nodes

3575

* XXX: kmem_cache_alloc_node will fallback to other nodes

3576

* since memory is not yet available from the node that

3576

* since memory is not yet available from the node that

3577

* is brought up.

3577

* is brought up.

3578

*/

3578

*/

3579

n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);

3579

n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);

3580

if (!n) {

3580

if (!n) {

3581

ret = -ENOMEM;

3581

ret = -ENOMEM;

3582

goto out;

3582

goto out;

3583

}

3583

}

3584

init_kmem_cache_node(n, s);

3584

init_kmem_cache_node(n, s);

3585

s->node[nid] = n;

3585

s->node[nid] = n;

3586

}

3586

}

3587

out:

3587

out:

3588

up_read(&slub_lock);

3588

up_read(&slub_lock);

3589

return ret;

3589

return ret;

3590

}

3590

}

3591

3592

static int slab_memory_callback(struct notifier_block *self,

3592

static int slab_memory_callback(struct notifier_block *self,

3593

unsigned long action, void *arg)

3593

unsigned long action, void *arg)

3594

{

3594

{

3595

int ret = 0;

3595

int ret = 0;

3596

3597

switch (action) {

3597

switch (action) {

3598

case MEM_GOING_ONLINE:

3598

case MEM_GOING_ONLINE:

3599

ret = slab_mem_going_online_callback(arg);

3599

ret = slab_mem_going_online_callback(arg);

3600

break;

3600

break;

3601

case MEM_GOING_OFFLINE:

3601

case MEM_GOING_OFFLINE:

3602

ret = slab_mem_going_offline_callback(arg);

3602

ret = slab_mem_going_offline_callback(arg);

3603

break;

3603

break;

3604

case MEM_OFFLINE:

3604

case MEM_OFFLINE:

3605

case MEM_CANCEL_ONLINE:

3605

case MEM_CANCEL_ONLINE:

3606

slab_mem_offline_callback(arg);

3606

slab_mem_offline_callback(arg);

3607

break;

3607

break;

3608

case MEM_ONLINE:

3608

case MEM_ONLINE:

3609

case MEM_CANCEL_OFFLINE:

3609

case MEM_CANCEL_OFFLINE:

3610

break;

3610

break;

3611

}

3611

}

3612

if (ret)

3612

if (ret)

3613

ret = notifier_from_errno(ret);

3613

ret = notifier_from_errno(ret);

3614

else

3614

else

3615

ret = NOTIFY_OK;

3615

ret = NOTIFY_OK;

3616

return ret;

3616

return ret;

3617

}

3617

}

3618

3619

#endif /* CONFIG_MEMORY_HOTPLUG */

3619

#endif /* CONFIG_MEMORY_HOTPLUG */

3620

3621

/********************************************************************

3621

/********************************************************************

3622

* Basic setup of slabs

3622

* Basic setup of slabs

3623

*******************************************************************/

3623

*******************************************************************/

3624

3625

/*

3625

/*

3626

* Used for early kmem_cache structures that were allocated using

3626

* Used for early kmem_cache structures that were allocated using

3627

* the page allocator

3627

* the page allocator

3628

*/

3628

*/

3629

3630

static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)

3630

static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)

3631

{

3631

{

3632

int node;

3632

int node;

3633

3634

list_add(&s->list, &slab_caches);

3634

list_add(&s->list, &slab_caches);

3635

s->refcount = -1;

3635

s->refcount = -1;

3636

3637

for_each_node_state(node, N_NORMAL_MEMORY) {

3637

for_each_node_state(node, N_NORMAL_MEMORY) {

3638

struct kmem_cache_node *n = get_node(s, node);

3638

struct kmem_cache_node *n = get_node(s, node);

3639

struct page *p;

3639

struct page *p;

3640

3641

if (n) {

3641

if (n) {

3642

list_for_each_entry(p, &n->partial, lru)

3642

list_for_each_entry(p, &n->partial, lru)

3643

p->slab = s;

3643

p->slab = s;

3644

3645

#ifdef CONFIG_SLUB_DEBUG

3645

#ifdef CONFIG_SLUB_DEBUG

3646

list_for_each_entry(p, &n->full, lru)

3646

list_for_each_entry(p, &n->full, lru)

3647

p->slab = s;

3647

p->slab = s;

3648

#endif

3648

#endif

3649

}

3649

}

3650

}

3650

}

3651

}

3651

}

3652

3653

void __init kmem_cache_init(void)

3653

void __init kmem_cache_init(void)

3654

{

3654

{

3655

int i;

3655

int i;

3656

int caches = 0;

3656

int caches = 0;

3657

struct kmem_cache *temp_kmem_cache;

3657

struct kmem_cache *temp_kmem_cache;

3658

int order;

3658

int order;

3659

struct kmem_cache *temp_kmem_cache_node;

3659

struct kmem_cache *temp_kmem_cache_node;

3660

unsigned long kmalloc_size;

3660

unsigned long kmalloc_size;

3661

3662

kmem_size = offsetof(struct kmem_cache, node) +

3662

kmem_size = offsetof(struct kmem_cache, node) +

3663

nr_node_ids * sizeof(struct kmem_cache_node *);

3663

nr_node_ids * sizeof(struct kmem_cache_node *);

3664

3665

/* Allocate two kmem_caches from the page allocator */

3665

/* Allocate two kmem_caches from the page allocator */

3666

kmalloc_size = ALIGN(kmem_size, cache_line_size());

3666

kmalloc_size = ALIGN(kmem_size, cache_line_size());

3667

order = get_order(2 * kmalloc_size);

3667

order = get_order(2 * kmalloc_size);

3668

kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);

3668

kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);

3669

3670

/*

3670

/*

3671

* Must first have the slab cache available for the allocations of the

3671

* Must first have the slab cache available for the allocations of the

3672

* struct kmem_cache_node's. There is special bootstrap code in

3672

* struct kmem_cache_node's. There is special bootstrap code in

3673

* kmem_cache_open for slab_state == DOWN.

3673

* kmem_cache_open for slab_state == DOWN.

3674

*/

3674

*/

3675

kmem_cache_node = (void *)kmem_cache + kmalloc_size;

3675

kmem_cache_node = (void *)kmem_cache + kmalloc_size;

3676

3677

kmem_cache_open(kmem_cache_node, "kmem_cache_node",

3677

kmem_cache_open(kmem_cache_node, "kmem_cache_node",

3678

sizeof(struct kmem_cache_node),

3678

sizeof(struct kmem_cache_node),

3679

0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);

3679

0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);

3680

3681

hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);

3681

hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);

3682

3683

/* Able to allocate the per node structures */

3683

/* Able to allocate the per node structures */

3684

slab_state = PARTIAL;

3684

slab_state = PARTIAL;

3685

3686

temp_kmem_cache = kmem_cache;

3686

temp_kmem_cache = kmem_cache;

3687

kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,

3687

kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,

3688

0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);

3688

0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);

3689

kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);

3689

kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);

3690

memcpy(kmem_cache, temp_kmem_cache, kmem_size);

3690

memcpy(kmem_cache, temp_kmem_cache, kmem_size);

3691

3692

/*

3692

/*

3693

* Allocate kmem_cache_node properly from the kmem_cache slab.

3693

* Allocate kmem_cache_node properly from the kmem_cache slab.

3694

* kmem_cache_node is separately allocated so no need to

3694

* kmem_cache_node is separately allocated so no need to

3695

* update any list pointers.

3695

* update any list pointers.

3696

*/

3696

*/

3697

temp_kmem_cache_node = kmem_cache_node;

3697

temp_kmem_cache_node = kmem_cache_node;

3698

3699

kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);

3699

kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);

3700

memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);

3700

memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);

3701

3702

kmem_cache_bootstrap_fixup(kmem_cache_node);

3702

kmem_cache_bootstrap_fixup(kmem_cache_node);

3703

3704

caches++;

3704

caches++;

3705

kmem_cache_bootstrap_fixup(kmem_cache);

3705

kmem_cache_bootstrap_fixup(kmem_cache);

3706

caches++;

3706

caches++;

3707

/* Free temporary boot structure */

3707

/* Free temporary boot structure */

3708

free_pages((unsigned long)temp_kmem_cache, order);

3708

free_pages((unsigned long)temp_kmem_cache, order);

3709

3710

/* Now we can use the kmem_cache to allocate kmalloc slabs */

3710

/* Now we can use the kmem_cache to allocate kmalloc slabs */

3711

3712

/*

3712

/*

3713

* Patch up the size_index table if we have strange large alignment

3713

* Patch up the size_index table if we have strange large alignment

3714

* requirements for the kmalloc array. This is only the case for

3714

* requirements for the kmalloc array. This is only the case for

3715

* MIPS it seems. The standard arches will not generate any code here.

3715

* MIPS it seems. The standard arches will not generate any code here.

3716

*

3716

*

3717

* Largest permitted alignment is 256 bytes due to the way we

3717

* Largest permitted alignment is 256 bytes due to the way we

3718

* handle the index determination for the smaller caches.

3718

* handle the index determination for the smaller caches.

3719

*

3719

*

3720

* Make sure that nothing crazy happens if someone starts tinkering

3720

* Make sure that nothing crazy happens if someone starts tinkering

3721

* around with ARCH_KMALLOC_MINALIGN

3721

* around with ARCH_KMALLOC_MINALIGN

3722

*/

3722

*/

3723

BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||

3723

BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||

3724

(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));

3724

(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));

3725

3726

for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {

3726

for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {

3727

int elem = size_index_elem(i);

3727

int elem = size_index_elem(i);

3728

if (elem >= ARRAY_SIZE(size_index))

3728

if (elem >= ARRAY_SIZE(size_index))

3729

break;

3729

break;

3730

size_index[elem] = KMALLOC_SHIFT_LOW;

3730

size_index[elem] = KMALLOC_SHIFT_LOW;

3731

}

3731

}

3732

3733

if (KMALLOC_MIN_SIZE == 64) {

3733

if (KMALLOC_MIN_SIZE == 64) {

3734

/*

3734

/*

3735

* The 96 byte size cache is not used if the alignment

3735

* The 96 byte size cache is not used if the alignment

3736

* is 64 byte.

3736

* is 64 byte.

3737

*/

3737

*/

3738

for (i = 64 + 8; i <= 96; i += 8)

3738

for (i = 64 + 8; i <= 96; i += 8)

3739

size_index[size_index_elem(i)] = 7;

3739

size_index[size_index_elem(i)] = 7;

3740

} else if (KMALLOC_MIN_SIZE == 128) {

3740

} else if (KMALLOC_MIN_SIZE == 128) {

3741

/*

3741

/*

3742

* The 192 byte sized cache is not used if the alignment

3742

* The 192 byte sized cache is not used if the alignment

3743

* is 128 byte. Redirect kmalloc to use the 256 byte cache

3743

* is 128 byte. Redirect kmalloc to use the 256 byte cache

3744

* instead.

3744

* instead.

3745

*/

3745

*/

3746

for (i = 128 + 8; i <= 192; i += 8)

3746

for (i = 128 + 8; i <= 192; i += 8)

3747

size_index[size_index_elem(i)] = 8;

3747

size_index[size_index_elem(i)] = 8;

3748

}

3748

}

3749

3750

/* Caches that are not of the two-to-the-power-of size */

3750

/* Caches that are not of the two-to-the-power-of size */

3751

if (KMALLOC_MIN_SIZE <= 32) {

3751

if (KMALLOC_MIN_SIZE <= 32) {

3752

kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);

3752

kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);

3753

caches++;

3753

caches++;

3754

}

3754

}

3755

3756

if (KMALLOC_MIN_SIZE <= 64) {

3756

if (KMALLOC_MIN_SIZE <= 64) {

3757

kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);

3757

kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);

3758

caches++;

3758

caches++;

3759

}

3759

}

3760

3761

for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {

3761

for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {

3762

kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);

3762

kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);

3763

caches++;

3763

caches++;

3764

}

3764

}

3765

3766

slab_state = UP;

3766

slab_state = UP;

3767

3768

/* Provide the correct kmalloc names now that the caches are up */

3768

/* Provide the correct kmalloc names now that the caches are up */

3769

if (KMALLOC_MIN_SIZE <= 32) {

3769

if (KMALLOC_MIN_SIZE <= 32) {

3770

kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);

3770

kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);

3771

BUG_ON(!kmalloc_caches[1]->name);

3771

BUG_ON(!kmalloc_caches[1]->name);

3772

}

3772

}

3773

3774

if (KMALLOC_MIN_SIZE <= 64) {

3774

if (KMALLOC_MIN_SIZE <= 64) {

3775

kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);

3775

kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);

3776

BUG_ON(!kmalloc_caches[2]->name);

3776

BUG_ON(!kmalloc_caches[2]->name);

3777

}

3777

}

3778

3779

for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {

3779

for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {

3780

char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);

3780

char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);

3781

3782

BUG_ON(!s);

3782

BUG_ON(!s);

3783

kmalloc_caches[i]->name = s;

3783

kmalloc_caches[i]->name = s;

3784

}

3784

}

3785

3786

#ifdef CONFIG_SMP

3786

#ifdef CONFIG_SMP

3787

register_cpu_notifier(&slab_notifier);

3787

register_cpu_notifier(&slab_notifier);

3788

#endif

3788

#endif

3789

3790

#ifdef CONFIG_ZONE_DMA

3790

#ifdef CONFIG_ZONE_DMA

3791

for (i = 0; i < SLUB_PAGE_SHIFT; i++) {

3791

for (i = 0; i < SLUB_PAGE_SHIFT; i++) {

3792

struct kmem_cache *s = kmalloc_caches[i];

3792

struct kmem_cache *s = kmalloc_caches[i];

3793

3794

if (s && s->size) {

3794

if (s && s->size) {

3795

char *name = kasprintf(GFP_NOWAIT,

3795

char *name = kasprintf(GFP_NOWAIT,

3796

"dma-kmalloc-%d", s->objsize);

3796

"dma-kmalloc-%d", s->objsize);

3797

3798

BUG_ON(!name);

3798

BUG_ON(!name);

3799

kmalloc_dma_caches[i] = create_kmalloc_cache(name,

3799

kmalloc_dma_caches[i] = create_kmalloc_cache(name,

3800

s->objsize, SLAB_CACHE_DMA);

3800

s->objsize, SLAB_CACHE_DMA);

3801

}

3801

}

3802

}

3802

}

3803

#endif

3803

#endif

3804

printk(KERN_INFO

3804

printk(KERN_INFO

3805

"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"

3805

"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"

3806

" CPUs=%d, Nodes=%d\n",

3806

" CPUs=%d, Nodes=%d\n",

3807

caches, cache_line_size(),

3807

caches, cache_line_size(),

3808

slub_min_order, slub_max_order, slub_min_objects,

3808

slub_min_order, slub_max_order, slub_min_objects,

3809

nr_cpu_ids, nr_node_ids);

3809

nr_cpu_ids, nr_node_ids);

3810

}

3810

}

3811

3812

void __init kmem_cache_init_late(void)

3812

void __init kmem_cache_init_late(void)

3813

{

3813

{

3814

}

3814

}

3815

3816

/*

3816

/*

3817

* Find a mergeable slab cache

3817

* Find a mergeable slab cache

3818

*/

3818

*/

3819

static int slab_unmergeable(struct kmem_cache *s)

3819

static int slab_unmergeable(struct kmem_cache *s)

3820

{

3820

{

3821

if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))

3821

if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))

3822

return 1;

3822

return 1;

3823

3824

if (s->ctor)

3824

if (s->ctor)

3825

return 1;

3825

return 1;

3826

3827

/*

3827

/*

3828

* We may have set a slab to be unmergeable during bootstrap.

3828

* We may have set a slab to be unmergeable during bootstrap.

3829

*/

3829

*/

3830

if (s->refcount < 0)

3830

if (s->refcount < 0)

3831

return 1;

3831

return 1;

3832

3833

return 0;

3833

return 0;

3834

}

3834

}

3835

3836

static struct kmem_cache *find_mergeable(size_t size,

3836

static struct kmem_cache *find_mergeable(size_t size,

3837

size_t align, unsigned long flags, const char *name,

3837

size_t align, unsigned long flags, const char *name,

3838

void (*ctor)(void *))

3838

void (*ctor)(void *))

3839

{

3839

{

3840

struct kmem_cache *s;

3840

struct kmem_cache *s;

3841

3842

if (slub_nomerge || (flags & SLUB_NEVER_MERGE))

3842

if (slub_nomerge || (flags & SLUB_NEVER_MERGE))

3843

return NULL;

3843

return NULL;

3844

3845

if (ctor)

3845

if (ctor)

3846

return NULL;

3846

return NULL;

3847

3848

size = ALIGN(size, sizeof(void *));

3848

size = ALIGN(size, sizeof(void *));

3849

align = calculate_alignment(flags, align, size);

3849

align = calculate_alignment(flags, align, size);

3850

size = ALIGN(size, align);

3850

size = ALIGN(size, align);

3851

flags = kmem_cache_flags(size, flags, name, NULL);

3851

flags = kmem_cache_flags(size, flags, name, NULL);

3852

3853

list_for_each_entry(s, &slab_caches, list) {

3853

list_for_each_entry(s, &slab_caches, list) {

3854

if (slab_unmergeable(s))

3854

if (slab_unmergeable(s))

3855

continue;

3855

continue;

3856

3857

if (size > s->size)

3857

if (size > s->size)

3858

continue;

3858

continue;

3859

3860

if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))

3860

if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))

3861

continue;

3861

continue;

3862

/*

3862

/*

3863

* Check if alignment is compatible.

3863

* Check if alignment is compatible.

3864

* Courtesy of Adrian Drzewiecki

3864

* Courtesy of Adrian Drzewiecki

3865

*/

3865

*/

3866

if ((s->size & ~(align - 1)) != s->size)

3866

if ((s->size & ~(align - 1)) != s->size)

3867

continue;

3867

continue;

3868

3869

if (s->size - size >= sizeof(void *))

3869

if (s->size - size >= sizeof(void *))

3870

continue;

3870

continue;

3871

3872

return s;

3872

return s;

3873

}

3873

}

3874

return NULL;

3874

return NULL;

3875

}

3875

}

3876

3877

struct kmem_cache *kmem_cache_create(const char *name, size_t size,

3877

struct kmem_cache *kmem_cache_create(const char *name, size_t size,

3878

size_t align, unsigned long flags, void (*ctor)(void *))

3878

size_t align, unsigned long flags, void (*ctor)(void *))

3879

{

3879

{

3880

struct kmem_cache *s;

3880

struct kmem_cache *s;

3881

char *n;

3881

char *n;

3882

3883

if (WARN_ON(!name))

3883

if (WARN_ON(!name))

3884

return NULL;

3884

return NULL;

3885

3886

down_write(&slub_lock);

3886

down_write(&slub_lock);

3887

s = find_mergeable(size, align, flags, name, ctor);

3887

s = find_mergeable(size, align, flags, name, ctor);

3888

if (s) {

3888

if (s) {

3889

s->refcount++;

3889

s->refcount++;

3890

/*

3890

/*

3891

* Adjust the object sizes so that we clear

3891

* Adjust the object sizes so that we clear

3892

* the complete object on kzalloc.

3892

* the complete object on kzalloc.

3893

*/

3893

*/

3894

s->objsize = max(s->objsize, (int)size);

3894

s->objsize = max(s->objsize, (int)size);

3895

s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));

3895

s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));

3896

3897

if (sysfs_slab_alias(s, name)) {

3897

if (sysfs_slab_alias(s, name)) {

3898

s->refcount--;

3898

s->refcount--;

3899

goto err;

3899

goto err;

3900

}

3900

}

3901

up_write(&slub_lock);

3901

up_write(&slub_lock);

3902

return s;

3902

return s;

3903

}

3903

}

3904

3905

n = kstrdup(name, GFP_KERNEL);

3905

n = kstrdup(name, GFP_KERNEL);

3906

if (!n)

3906

if (!n)

3907

goto err;

3907

goto err;

3908

3909

s = kmalloc(kmem_size, GFP_KERNEL);

3909

s = kmalloc(kmem_size, GFP_KERNEL);

3910

if (s) {

3910

if (s) {

3911

if (kmem_cache_open(s, n,

3911

if (kmem_cache_open(s, n,

3912

size, align, flags, ctor)) {

3912

size, align, flags, ctor)) {

3913

list_add(&s->list, &slab_caches);

3913

list_add(&s->list, &slab_caches);

3914

up_write(&slub_lock);

3914

if (sysfs_slab_add(s)) {

3915

if (sysfs_slab_add(s)) {

3916

down_write(&slub_lock);

3915

list_del(&s->list);

3917

list_del(&s->list);

3916

kfree(n);

3918

kfree(n);

3917

kfree(s);

3919

kfree(s);

3918

goto err;

3920

goto err;

3919

}

3921

}

3920

up_write(&slub_lock);

3921

return s;

3922

return s;

3922

}

3923

}

3923

kfree(n);

3924

kfree(n);

3924

kfree(s);

3925

kfree(s);

3925

}

3926

}

3926

err:

3927

err:

3927

up_write(&slub_lock);

3928

up_write(&slub_lock);

3928

3929

if (flags & SLAB_PANIC)

3930

if (flags & SLAB_PANIC)

3930

panic("Cannot create slabcache %s\n", name);

3931

panic("Cannot create slabcache %s\n", name);

3931

else

3932

else

3932

s = NULL;

3933

s = NULL;

3933

return s;

3934

return s;

3934

}

3935

}

3935

EXPORT_SYMBOL(kmem_cache_create);

3936

EXPORT_SYMBOL(kmem_cache_create);

3936

3937

#ifdef CONFIG_SMP

3938

#ifdef CONFIG_SMP

3938

/*

3939

/*

3939

* Use the cpu notifier to insure that the cpu slabs are flushed when

3940

* Use the cpu notifier to insure that the cpu slabs are flushed when

3940

* necessary.

3941

* necessary.

3941

*/

3942

*/

3942

static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,

3943

static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,

3943

unsigned long action, void *hcpu)

3944

unsigned long action, void *hcpu)

3944

{

3945

{

3945

long cpu = (long)hcpu;

3946

long cpu = (long)hcpu;

3946

struct kmem_cache *s;

3947

struct kmem_cache *s;

3947

unsigned long flags;

3948

unsigned long flags;

3948

3949

switch (action) {

3950

switch (action) {

3950

case CPU_UP_CANCELED:

3951

case CPU_UP_CANCELED:

3951

case CPU_UP_CANCELED_FROZEN:

3952

case CPU_UP_CANCELED_FROZEN:

3952

case CPU_DEAD:

3953

case CPU_DEAD:

3953

case CPU_DEAD_FROZEN:

3954

case CPU_DEAD_FROZEN:

3954

down_read(&slub_lock);

3955

down_read(&slub_lock);

3955

list_for_each_entry(s, &slab_caches, list) {

3956

list_for_each_entry(s, &slab_caches, list) {

3956

local_irq_save(flags);

3957

local_irq_save(flags);

3957

__flush_cpu_slab(s, cpu);

3958

__flush_cpu_slab(s, cpu);

3958

local_irq_restore(flags);

3959

local_irq_restore(flags);

3959

}

3960

}

3960

up_read(&slub_lock);

3961

up_read(&slub_lock);

3961

break;

3962

break;

3962

default:

3963

default:

3963

break;

3964

break;

3964

}

3965

}

3965

return NOTIFY_OK;

3966

return NOTIFY_OK;

3966

}

3967

}

3967

3968

static struct notifier_block __cpuinitdata slab_notifier = {

3969

static struct notifier_block __cpuinitdata slab_notifier = {

3969

.notifier_call = slab_cpuup_callback

3970

.notifier_call = slab_cpuup_callback

3970

};

3971

};

3971

3972

#endif

3973

#endif

3973

3974

void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)

3975

void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)

3975

{

3976

{

3976

struct kmem_cache *s;

3977

struct kmem_cache *s;

3977

void *ret;

3978

void *ret;

3978

3979

if (unlikely(size > SLUB_MAX_SIZE))

3980

if (unlikely(size > SLUB_MAX_SIZE))

3980

return kmalloc_large(size, gfpflags);

3981

return kmalloc_large(size, gfpflags);

3981

3982

s = get_slab(size, gfpflags);

3983

s = get_slab(size, gfpflags);

3983

3984

if (unlikely(ZERO_OR_NULL_PTR(s)))

3985

if (unlikely(ZERO_OR_NULL_PTR(s)))

3985

return s;

3986

return s;

3986

3987

ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);

3988

ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);

3988

3989

/* Honor the call site pointer we received. */

3990

/* Honor the call site pointer we received. */

3990

trace_kmalloc(caller, ret, size, s->size, gfpflags);

3991

trace_kmalloc(caller, ret, size, s->size, gfpflags);

3991

3992

return ret;

3993

return ret;

3993

}

3994

}

3994

3995

#ifdef CONFIG_NUMA

3996

#ifdef CONFIG_NUMA

3996

void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,

3997

void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,

3997

int node, unsigned long caller)

3998

int node, unsigned long caller)

3998

{

3999

{

3999

struct kmem_cache *s;

4000

struct kmem_cache *s;

4000

void *ret;

4001

void *ret;

4001

4002

if (unlikely(size > SLUB_MAX_SIZE)) {

4003

if (unlikely(size > SLUB_MAX_SIZE)) {

4003

ret = kmalloc_large_node(size, gfpflags, node);

4004

ret = kmalloc_large_node(size, gfpflags, node);

4004

4005

trace_kmalloc_node(caller, ret,

4006

trace_kmalloc_node(caller, ret,

4006

size, PAGE_SIZE << get_order(size),

4007

size, PAGE_SIZE << get_order(size),

4007

gfpflags, node);

4008

gfpflags, node);

4008

4009

return ret;

4010

return ret;

4010

}

4011

}

4011

4012

s = get_slab(size, gfpflags);

4013

s = get_slab(size, gfpflags);

4013

4014

if (unlikely(ZERO_OR_NULL_PTR(s)))

4015

if (unlikely(ZERO_OR_NULL_PTR(s)))

4015

return s;

4016

return s;

4016

4017

ret = slab_alloc(s, gfpflags, node, caller);

4018

ret = slab_alloc(s, gfpflags, node, caller);

4018

4019

/* Honor the call site pointer we received. */

4020

/* Honor the call site pointer we received. */

4020

trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);

4021

trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);

4021

4022

return ret;

4023

return ret;

4023

}

4024

}

4024

#endif

4025

#endif

4025

4026

#ifdef CONFIG_SYSFS

4027

#ifdef CONFIG_SYSFS

4027

static int count_inuse(struct page *page)

4028

static int count_inuse(struct page *page)

4028

{

4029

{

4029

return page->inuse;

4030

return page->inuse;

4030

}

4031

}

4031

4032

static int count_total(struct page *page)

4033

static int count_total(struct page *page)

4033

{

4034

{

4034

return page->objects;

4035

return page->objects;

4035

}

4036

}

4036

#endif

4037

#endif

4037

4038

#ifdef CONFIG_SLUB_DEBUG

4039

#ifdef CONFIG_SLUB_DEBUG

4039

static int validate_slab(struct kmem_cache *s, struct page *page,

4040

static int validate_slab(struct kmem_cache *s, struct page *page,

4040

unsigned long *map)

4041

unsigned long *map)

4041

{

4042

{

4042

void *p;

4043

void *p;

4043

void *addr = page_address(page);

4044

void *addr = page_address(page);

4044

4045

if (!check_slab(s, page) ||

4046

if (!check_slab(s, page) ||

4046

!on_freelist(s, page, NULL))

4047

!on_freelist(s, page, NULL))

4047

return 0;

4048

return 0;

4048

4049

/* Now we know that a valid freelist exists */

4050

/* Now we know that a valid freelist exists */

4050

bitmap_zero(map, page->objects);

4051

bitmap_zero(map, page->objects);

4051

4052

get_map(s, page, map);

4053

get_map(s, page, map);

4053

for_each_object(p, s, addr, page->objects) {

4054

for_each_object(p, s, addr, page->objects) {

4054

if (test_bit(slab_index(p, s, addr), map))

4055

if (test_bit(slab_index(p, s, addr), map))

4055

if (!check_object(s, page, p, SLUB_RED_INACTIVE))

4056

if (!check_object(s, page, p, SLUB_RED_INACTIVE))

4056

return 0;

4057

return 0;

4057

}

4058

}

4058

4059

for_each_object(p, s, addr, page->objects)

4060

for_each_object(p, s, addr, page->objects)

4060

if (!test_bit(slab_index(p, s, addr), map))

4061

if (!test_bit(slab_index(p, s, addr), map))

4061

if (!check_object(s, page, p, SLUB_RED_ACTIVE))

4062

if (!check_object(s, page, p, SLUB_RED_ACTIVE))

4062

return 0;

4063

return 0;

4063

return 1;

4064

return 1;

4064

}

4065

}

4065

4066

static void validate_slab_slab(struct kmem_cache *s, struct page *page,

4067

static void validate_slab_slab(struct kmem_cache *s, struct page *page,

4067

unsigned long *map)

4068

unsigned long *map)

4068

{

4069

{

4069

slab_lock(page);

4070

slab_lock(page);

4070

validate_slab(s, page, map);

4071

validate_slab(s, page, map);

4071

slab_unlock(page);

4072

slab_unlock(page);

4072

}

4073

}

4073

4074

static int validate_slab_node(struct kmem_cache *s,

4075

static int validate_slab_node(struct kmem_cache *s,

4075

struct kmem_cache_node *n, unsigned long *map)

4076

struct kmem_cache_node *n, unsigned long *map)

4076

{

4077

{

4077

unsigned long count = 0;

4078

unsigned long count = 0;

4078

struct page *page;

4079

struct page *page;

4079

unsigned long flags;

4080

unsigned long flags;

4080

4081

spin_lock_irqsave(&n->list_lock, flags);

4082

spin_lock_irqsave(&n->list_lock, flags);

4082

4083

list_for_each_entry(page, &n->partial, lru) {

4084

list_for_each_entry(page, &n->partial, lru) {

4084

validate_slab_slab(s, page, map);

4085

validate_slab_slab(s, page, map);

4085

count++;

4086

count++;

4086

}

4087

}

4087

if (count != n->nr_partial)

4088

if (count != n->nr_partial)

4088

printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "

4089

printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "

4089

"counter=%ld\n", s->name, count, n->nr_partial);

4090

"counter=%ld\n", s->name, count, n->nr_partial);

4090

4091

if (!(s->flags & SLAB_STORE_USER))

4092

if (!(s->flags & SLAB_STORE_USER))

4092

goto out;

4093

goto out;

4093

4094

list_for_each_entry(page, &n->full, lru) {

4095

list_for_each_entry(page, &n->full, lru) {

4095

validate_slab_slab(s, page, map);

4096

validate_slab_slab(s, page, map);

4096

count++;

4097

count++;

4097

}

4098

}

4098

if (count != atomic_long_read(&n->nr_slabs))

4099

if (count != atomic_long_read(&n->nr_slabs))

4099

printk(KERN_ERR "SLUB: %s %ld slabs counted but "

4100

printk(KERN_ERR "SLUB: %s %ld slabs counted but "

4100

"counter=%ld\n", s->name, count,

4101

"counter=%ld\n", s->name, count,

4101

atomic_long_read(&n->nr_slabs));

4102

atomic_long_read(&n->nr_slabs));

4102

4103

out:

4104

out:

4104

spin_unlock_irqrestore(&n->list_lock, flags);

4105

spin_unlock_irqrestore(&n->list_lock, flags);

4105

return count;

4106

return count;

4106

}

4107

}

4107

4108

static long validate_slab_cache(struct kmem_cache *s)

4109

static long validate_slab_cache(struct kmem_cache *s)

4109

{

4110

{

4110

int node;

4111

int node;

4111

unsigned long count = 0;

4112

unsigned long count = 0;

4112

unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *

4113

unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *

4113

sizeof(unsigned long), GFP_KERNEL);

4114

sizeof(unsigned long), GFP_KERNEL);

4114

4115

if (!map)

4116

if (!map)

4116

return -ENOMEM;

4117

return -ENOMEM;

4117

4118

flush_all(s);

4119

flush_all(s);

4119

for_each_node_state(node, N_NORMAL_MEMORY) {

4120

for_each_node_state(node, N_NORMAL_MEMORY) {

4120

struct kmem_cache_node *n = get_node(s, node);

4121

struct kmem_cache_node *n = get_node(s, node);

4121

4122

count += validate_slab_node(s, n, map);

4123

count += validate_slab_node(s, n, map);

4123

}

4124

}

4124

kfree(map);

4125

kfree(map);

4125

return count;

4126

return count;

4126

}

4127

}

4127

/*

4128

/*

4128

* Generate lists of code addresses where slabcache objects are allocated

4129

* Generate lists of code addresses where slabcache objects are allocated

4129

* and freed.

4130

* and freed.

4130

*/

4131

*/

4131

4132

struct location {

4133

struct location {

4133

unsigned long count;

4134

unsigned long count;

4134

unsigned long addr;

4135

unsigned long addr;

4135

long long sum_time;

4136

long long sum_time;

4136

long min_time;

4137

long min_time;

4137

long max_time;

4138

long max_time;

4138

long min_pid;

4139

long min_pid;

4139

long max_pid;

4140

long max_pid;

4140

DECLARE_BITMAP(cpus, NR_CPUS);

4141

DECLARE_BITMAP(cpus, NR_CPUS);

4141

nodemask_t nodes;

4142

nodemask_t nodes;

4142

};

4143

};

4143

4144

struct loc_track {

4145

struct loc_track {

4145

unsigned long max;

4146

unsigned long max;

4146

unsigned long count;

4147

unsigned long count;

4147

struct location *loc;

4148

struct location *loc;

4148

};

4149

};

4149

4150

static void free_loc_track(struct loc_track *t)

4151

static void free_loc_track(struct loc_track *t)

4151

{

4152

{

4152

if (t->max)

4153

if (t->max)

4153

free_pages((unsigned long)t->loc,

4154

free_pages((unsigned long)t->loc,

4154

get_order(sizeof(struct location) * t->max));

4155

get_order(sizeof(struct location) * t->max));

4155

}

4156

}

4156

4157

static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)

4158

static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)

4158

{

4159

{

4159

struct location *l;

4160

struct location *l;

4160

int order;

4161

int order;

4161

4162

order = get_order(sizeof(struct location) * max);

4163

order = get_order(sizeof(struct location) * max);

4163

4164

l = (void *)__get_free_pages(flags, order);

4165

l = (void *)__get_free_pages(flags, order);

4165

if (!l)

4166

if (!l)

4166

return 0;

4167

return 0;

4167

4168

if (t->count) {

4169

if (t->count) {

4169

memcpy(l, t->loc, sizeof(struct location) * t->count);

4170

memcpy(l, t->loc, sizeof(struct location) * t->count);

4170

free_loc_track(t);

4171

free_loc_track(t);

4171

}

4172

}

4172

t->max = max;

4173

t->max = max;

4173

t->loc = l;

4174

t->loc = l;

4174

return 1;

4175

return 1;

4175

}

4176

}

4176

4177

static int add_location(struct loc_track *t, struct kmem_cache *s,

4178

static int add_location(struct loc_track *t, struct kmem_cache *s,

4178

const struct track *track)

4179

const struct track *track)

4179

{

4180

{

4180

long start, end, pos;

4181

long start, end, pos;

4181

struct location *l;

4182

struct location *l;

4182

unsigned long caddr;

4183

unsigned long caddr;

4183

unsigned long age = jiffies - track->when;

4184

unsigned long age = jiffies - track->when;

4184

4185

start = -1;

4186

start = -1;

4186

end = t->count;

4187

end = t->count;

4187

4188

for ( ; ; ) {

4189

for ( ; ; ) {

4189

pos = start + (end - start + 1) / 2;

4190

pos = start + (end - start + 1) / 2;

4190

4191

/*

4192

/*

4192

* There is nothing at "end". If we end up there

4193

* There is nothing at "end". If we end up there

4193

* we need to add something to before end.

4194

* we need to add something to before end.

4194

*/

4195

*/

4195

if (pos == end)

4196

if (pos == end)

4196

break;

4197

break;

4197

4198

caddr = t->loc[pos].addr;

4199

caddr = t->loc[pos].addr;

4199

if (track->addr == caddr) {

4200

if (track->addr == caddr) {

4200

4201

l = &t->loc[pos];

4202

l = &t->loc[pos];

4202

l->count++;

4203

l->count++;

4203

if (track->when) {

4204

if (track->when) {

4204

l->sum_time += age;

4205

l->sum_time += age;

4205

if (age < l->min_time)

4206

if (age < l->min_time)

4206

l->min_time = age;

4207

l->min_time = age;

4207

if (age > l->max_time)

4208

if (age > l->max_time)

4208

l->max_time = age;

4209

l->max_time = age;

4209

4210

if (track->pid < l->min_pid)

4211

if (track->pid < l->min_pid)

4211

l->min_pid = track->pid;

4212

l->min_pid = track->pid;

4212

if (track->pid > l->max_pid)

4213

if (track->pid > l->max_pid)

4213

l->max_pid = track->pid;

4214

l->max_pid = track->pid;

4214

4215

cpumask_set_cpu(track->cpu,

4216

cpumask_set_cpu(track->cpu,

4216

to_cpumask(l->cpus));

4217

to_cpumask(l->cpus));

4217

}

4218

}

4218

node_set(page_to_nid(virt_to_page(track)), l->nodes);

4219

node_set(page_to_nid(virt_to_page(track)), l->nodes);

4219

return 1;

4220

return 1;

4220

}

4221

}

4221

4222

if (track->addr < caddr)

4223

if (track->addr < caddr)

4223

end = pos;

4224

end = pos;

4224

else

4225

else

4225

start = pos;

4226

start = pos;

4226

}

4227

}

4227

4228

/*

4229

/*

4229

* Not found. Insert new tracking element.

4230

* Not found. Insert new tracking element.

4230

*/

4231

*/

4231

if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))

4232

if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))

4232

return 0;

4233

return 0;

4233

4234

l = t->loc + pos;

4235

l = t->loc + pos;

4235

if (pos < t->count)

4236

if (pos < t->count)

4236

memmove(l + 1, l,

4237

memmove(l + 1, l,

4237

(t->count - pos) * sizeof(struct location));

4238

(t->count - pos) * sizeof(struct location));

4238

t->count++;

4239

t->count++;

4239

l->count = 1;

4240

l->count = 1;

4240

l->addr = track->addr;

4241

l->addr = track->addr;

4241

l->sum_time = age;

4242

l->sum_time = age;

4242

l->min_time = age;

4243

l->min_time = age;

4243

l->max_time = age;

4244

l->max_time = age;

4244

l->min_pid = track->pid;

4245

l->min_pid = track->pid;

4245

l->max_pid = track->pid;

4246

l->max_pid = track->pid;

4246

cpumask_clear(to_cpumask(l->cpus));

4247

cpumask_clear(to_cpumask(l->cpus));

4247

cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));

4248

cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));

4248

nodes_clear(l->nodes);

4249

nodes_clear(l->nodes);

4249

node_set(page_to_nid(virt_to_page(track)), l->nodes);

4250

node_set(page_to_nid(virt_to_page(track)), l->nodes);

4250

return 1;

4251

return 1;

4251

}

4252

}

4252

4253

static void process_slab(struct loc_track *t, struct kmem_cache *s,

4254

static void process_slab(struct loc_track *t, struct kmem_cache *s,

4254

struct page *page, enum track_item alloc,

4255

struct page *page, enum track_item alloc,

4255

unsigned long *map)

4256

unsigned long *map)

4256

{

4257

{

4257

void *addr = page_address(page);

4258

void *addr = page_address(page);

4258

void *p;

4259

void *p;

4259

4260

bitmap_zero(map, page->objects);

4261

bitmap_zero(map, page->objects);

4261

get_map(s, page, map);

4262

get_map(s, page, map);

4262

4263

for_each_object(p, s, addr, page->objects)

4264

for_each_object(p, s, addr, page->objects)

4264

if (!test_bit(slab_index(p, s, addr), map))

4265

if (!test_bit(slab_index(p, s, addr), map))

4265

add_location(t, s, get_track(s, p, alloc));

4266

add_location(t, s, get_track(s, p, alloc));

4266

}

4267

}

4267

4268

static int list_locations(struct kmem_cache *s, char *buf,

4269

static int list_locations(struct kmem_cache *s, char *buf,

4269

enum track_item alloc)

4270

enum track_item alloc)

4270

{

4271

{

4271

int len = 0;

4272

int len = 0;

4272

unsigned long i;

4273

unsigned long i;

4273

struct loc_track t = { 0, 0, NULL };

4274

struct loc_track t = { 0, 0, NULL };

4274

int node;

4275

int node;

4275

unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *

4276

unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *

4276

sizeof(unsigned long), GFP_KERNEL);

4277

sizeof(unsigned long), GFP_KERNEL);

4277

4278

if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),

4279

if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),

4279

GFP_TEMPORARY)) {

4280

GFP_TEMPORARY)) {

4280

kfree(map);

4281

kfree(map);

4281

return sprintf(buf, "Out of memory\n");

4282

return sprintf(buf, "Out of memory\n");

4282

}

4283

}

4283

/* Push back cpu slabs */

4284

/* Push back cpu slabs */

4284

flush_all(s);

4285

flush_all(s);

4285

4286

for_each_node_state(node, N_NORMAL_MEMORY) {

4287

for_each_node_state(node, N_NORMAL_MEMORY) {

4287

struct kmem_cache_node *n = get_node(s, node);

4288

struct kmem_cache_node *n = get_node(s, node);

4288

unsigned long flags;

4289

unsigned long flags;

4289

struct page *page;

4290

struct page *page;

4290

4291

if (!atomic_long_read(&n->nr_slabs))

4292

if (!atomic_long_read(&n->nr_slabs))

4292

continue;

4293

continue;

4293

4294

spin_lock_irqsave(&n->list_lock, flags);

4295

spin_lock_irqsave(&n->list_lock, flags);

4295

list_for_each_entry(page, &n->partial, lru)

4296

list_for_each_entry(page, &n->partial, lru)

4296

process_slab(&t, s, page, alloc, map);

4297

process_slab(&t, s, page, alloc, map);

4297

list_for_each_entry(page, &n->full, lru)

4298

list_for_each_entry(page, &n->full, lru)

4298

process_slab(&t, s, page, alloc, map);

4299

process_slab(&t, s, page, alloc, map);

4299

spin_unlock_irqrestore(&n->list_lock, flags);

4300

spin_unlock_irqrestore(&n->list_lock, flags);

4300

}

4301

}

4301

4302

for (i = 0; i < t.count; i++) {

4303

for (i = 0; i < t.count; i++) {

4303

struct location *l = &t.loc[i];

4304

struct location *l = &t.loc[i];

4304

4305

if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)

4306

if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)

4306

break;

4307

break;

4307

len += sprintf(buf + len, "%7ld ", l->count);

4308

len += sprintf(buf + len, "%7ld ", l->count);

4308

4309

if (l->addr)

4310

if (l->addr)

4310

len += sprintf(buf + len, "%pS", (void *)l->addr);

4311

len += sprintf(buf + len, "%pS", (void *)l->addr);

4311

else

4312

else

4312

len += sprintf(buf + len, "<not-available>");

4313

len += sprintf(buf + len, "<not-available>");

4313

4314

if (l->sum_time != l->min_time) {

4315

if (l->sum_time != l->min_time) {

4315

len += sprintf(buf + len, " age=%ld/%ld/%ld",

4316

len += sprintf(buf + len, " age=%ld/%ld/%ld",

4316

l->min_time,

4317

l->min_time,

4317

(long)div_u64(l->sum_time, l->count),

4318

(long)div_u64(l->sum_time, l->count),

4318

l->max_time);

4319

l->max_time);

4319

} else

4320

} else

4320

len += sprintf(buf + len, " age=%ld",

4321

len += sprintf(buf + len, " age=%ld",

4321

l->min_time);

4322

l->min_time);

4322

4323

if (l->min_pid != l->max_pid)

4324

if (l->min_pid != l->max_pid)

4324

len += sprintf(buf + len, " pid=%ld-%ld",

4325

len += sprintf(buf + len, " pid=%ld-%ld",

4325

l->min_pid, l->max_pid);

4326

l->min_pid, l->max_pid);

4326

else

4327

else

4327

len += sprintf(buf + len, " pid=%ld",

4328

len += sprintf(buf + len, " pid=%ld",

4328

l->min_pid);

4329

l->min_pid);

4329

4330

if (num_online_cpus() > 1 &&

4331

if (num_online_cpus() > 1 &&

4331

!cpumask_empty(to_cpumask(l->cpus)) &&

4332

!cpumask_empty(to_cpumask(l->cpus)) &&

4332

len < PAGE_SIZE - 60) {

4333

len < PAGE_SIZE - 60) {

4333

len += sprintf(buf + len, " cpus=");

4334

len += sprintf(buf + len, " cpus=");

4334

len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,

4335

len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,

4335

to_cpumask(l->cpus));

4336

to_cpumask(l->cpus));

4336

}

4337

}

4337

4338

if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&

4339

if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&

4339

len < PAGE_SIZE - 60) {

4340

len < PAGE_SIZE - 60) {

4340

len += sprintf(buf + len, " nodes=");

4341

len += sprintf(buf + len, " nodes=");

4341

len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,

4342

len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,

4342

l->nodes);

4343

l->nodes);

4343

}

4344

}

4344

4345

len += sprintf(buf + len, "\n");

4346

len += sprintf(buf + len, "\n");

4346

}

4347

}

4347

4348

free_loc_track(&t);

4349

free_loc_track(&t);

4349

kfree(map);

4350

kfree(map);

4350

if (!t.count)

4351

if (!t.count)

4351

len += sprintf(buf, "No data\n");

4352

len += sprintf(buf, "No data\n");

4352

return len;

4353

return len;

4353

}

4354

}

4354

#endif

4355

#endif

4355

4356

#ifdef SLUB_RESILIENCY_TEST

4357

#ifdef SLUB_RESILIENCY_TEST

4357

static void resiliency_test(void)

4358

static void resiliency_test(void)

4358

{

4359

{

4359

u8 *p;

4360

u8 *p;

4360

4361

BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);

4362

BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);

4362

4363

printk(KERN_ERR "SLUB resiliency testing\n");

4364

printk(KERN_ERR "SLUB resiliency testing\n");

4364

printk(KERN_ERR "-----------------------\n");

4365

printk(KERN_ERR "-----------------------\n");

4365

printk(KERN_ERR "A. Corruption after allocation\n");

4366

printk(KERN_ERR "A. Corruption after allocation\n");

4366

4367

p = kzalloc(16, GFP_KERNEL);

4368

p = kzalloc(16, GFP_KERNEL);

4368

p[16] = 0x12;

4369

p[16] = 0x12;

4369

printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"

4370

printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"

4370

" 0x12->0x%p\n\n", p + 16);

4371

" 0x12->0x%p\n\n", p + 16);

4371

4372

validate_slab_cache(kmalloc_caches[4]);

4373

validate_slab_cache(kmalloc_caches[4]);

4373

4374

/* Hmmm... The next two are dangerous */

4375

/* Hmmm... The next two are dangerous */

4375

p = kzalloc(32, GFP_KERNEL);

4376

p = kzalloc(32, GFP_KERNEL);

4376

p[32 + sizeof(void *)] = 0x34;

4377

p[32 + sizeof(void *)] = 0x34;

4377

printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"

4378

printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"

4378

" 0x34 -> -0x%p\n", p);

4379

" 0x34 -> -0x%p\n", p);

4379

printk(KERN_ERR

4380

printk(KERN_ERR

4380

"If allocated object is overwritten then not detectable\n\n");

4381

"If allocated object is overwritten then not detectable\n\n");

4381

4382

validate_slab_cache(kmalloc_caches[5]);

4383

validate_slab_cache(kmalloc_caches[5]);

4383

p = kzalloc(64, GFP_KERNEL);

4384

p = kzalloc(64, GFP_KERNEL);

4384

p += 64 + (get_cycles() & 0xff) * sizeof(void *);

4385

p += 64 + (get_cycles() & 0xff) * sizeof(void *);

4385

*p = 0x56;

4386

*p = 0x56;

4386

printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",

4387

printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",

4387

p);

4388

p);

4388

printk(KERN_ERR

4389

printk(KERN_ERR

4389

"If allocated object is overwritten then not detectable\n\n");

4390

"If allocated object is overwritten then not detectable\n\n");

4390

validate_slab_cache(kmalloc_caches[6]);

4391

validate_slab_cache(kmalloc_caches[6]);

4391

4392

printk(KERN_ERR "\nB. Corruption after free\n");

4393

printk(KERN_ERR "\nB. Corruption after free\n");

4393

p = kzalloc(128, GFP_KERNEL);

4394

p = kzalloc(128, GFP_KERNEL);

4394

kfree(p);

4395

kfree(p);

4395

*p = 0x78;

4396

*p = 0x78;

4396

printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);

4397

printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);

4397

validate_slab_cache(kmalloc_caches[7]);

4398

validate_slab_cache(kmalloc_caches[7]);

4398

4399

p = kzalloc(256, GFP_KERNEL);

4400

p = kzalloc(256, GFP_KERNEL);

4400

kfree(p);

4401

kfree(p);

4401

p[50] = 0x9a;

4402

p[50] = 0x9a;

4402

printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",

4403

printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",

4403

p);

4404

p);

4404

validate_slab_cache(kmalloc_caches[8]);

4405

validate_slab_cache(kmalloc_caches[8]);

4405

4406

p = kzalloc(512, GFP_KERNEL);

4407

p = kzalloc(512, GFP_KERNEL);

4407

kfree(p);

4408

kfree(p);

4408

p[512] = 0xab;

4409

p[512] = 0xab;

4409

printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);

4410

printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);

4410

validate_slab_cache(kmalloc_caches[9]);

4411

validate_slab_cache(kmalloc_caches[9]);

4411

}

4412

}

4412

#else

4413

#else

4413

#ifdef CONFIG_SYSFS

4414

#ifdef CONFIG_SYSFS

4414

static void resiliency_test(void) {};

4415

static void resiliency_test(void) {};

4415

#endif

4416

#endif

4416

#endif

4417

#endif

4417

4418

#ifdef CONFIG_SYSFS

4419

#ifdef CONFIG_SYSFS

4419

enum slab_stat_type {

4420

enum slab_stat_type {

4420

SL_ALL, /* All slabs */

4421

SL_ALL, /* All slabs */

4421

SL_PARTIAL, /* Only partially allocated slabs */

4422

SL_PARTIAL, /* Only partially allocated slabs */

4422

SL_CPU, /* Only slabs used for cpu caches */

4423

SL_CPU, /* Only slabs used for cpu caches */

4423

SL_OBJECTS, /* Determine allocated objects not slabs */

4424

SL_OBJECTS, /* Determine allocated objects not slabs */

4424

SL_TOTAL /* Determine object capacity not slabs */

4425

SL_TOTAL /* Determine object capacity not slabs */

4425

};

4426

};

4426

4427

#define SO_ALL (1 << SL_ALL)

4428

#define SO_ALL (1 << SL_ALL)

4428

#define SO_PARTIAL (1 << SL_PARTIAL)

4429

#define SO_PARTIAL (1 << SL_PARTIAL)

4429

#define SO_CPU (1 << SL_CPU)

4430

#define SO_CPU (1 << SL_CPU)

4430

#define SO_OBJECTS (1 << SL_OBJECTS)

4431

#define SO_OBJECTS (1 << SL_OBJECTS)

4431

#define SO_TOTAL (1 << SL_TOTAL)

4432

#define SO_TOTAL (1 << SL_TOTAL)

4432

4433

static ssize_t show_slab_objects(struct kmem_cache *s,

4434

static ssize_t show_slab_objects(struct kmem_cache *s,

4434

char *buf, unsigned long flags)

4435

char *buf, unsigned long flags)

4435

{

4436

{

4436

unsigned long total = 0;

4437

unsigned long total = 0;

4437

int node;

4438

int node;

4438

int x;

4439

int x;

4439

unsigned long *nodes;

4440

unsigned long *nodes;

4440

unsigned long *per_cpu;

4441

unsigned long *per_cpu;

4441

4442

nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);

4443

nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);

4443

if (!nodes)

4444

if (!nodes)

4444

return -ENOMEM;

4445

return -ENOMEM;

4445

per_cpu = nodes + nr_node_ids;

4446

per_cpu = nodes + nr_node_ids;

4446

4447

if (flags & SO_CPU) {

4448

if (flags & SO_CPU) {

4448

int cpu;

4449

int cpu;

4449

4450

for_each_possible_cpu(cpu) {

4451

for_each_possible_cpu(cpu) {

4451

struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);

4452

struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);

4452

int node = ACCESS_ONCE(c->node);

4453

int node = ACCESS_ONCE(c->node);

4453

struct page *page;

4454

struct page *page;

4454

4455

if (node < 0)

4456

if (node < 0)

4456

continue;

4457

continue;

4457

page = ACCESS_ONCE(c->page);

4458

page = ACCESS_ONCE(c->page);

4458

if (page) {

4459

if (page) {

4459

if (flags & SO_TOTAL)

4460

if (flags & SO_TOTAL)

4460

x = page->objects;

4461

x = page->objects;

4461

else if (flags & SO_OBJECTS)

4462

else if (flags & SO_OBJECTS)

4462

x = page->inuse;

4463

x = page->inuse;

4463

else

4464

else

4464

x = 1;

4465

x = 1;

4465

4466

total += x;

4467

total += x;

4467

nodes[node] += x;

4468

nodes[node] += x;

4468

}

4469

}

4469

page = c->partial;

4470

page = c->partial;

4470

4471

if (page) {

4472

if (page) {

4472

x = page->pobjects;

4473

x = page->pobjects;

4473

total += x;

4474

total += x;

4474

nodes[node] += x;

4475

nodes[node] += x;

4475

}

4476

}

4476

per_cpu[node]++;

4477

per_cpu[node]++;

4477

}

4478

}

4478

}

4479

}

4479

4480

lock_memory_hotplug();

4481

lock_memory_hotplug();

4481

#ifdef CONFIG_SLUB_DEBUG

4482

#ifdef CONFIG_SLUB_DEBUG

4482

if (flags & SO_ALL) {

4483

if (flags & SO_ALL) {

4483

for_each_node_state(node, N_NORMAL_MEMORY) {

4484

for_each_node_state(node, N_NORMAL_MEMORY) {

4484

struct kmem_cache_node *n = get_node(s, node);

4485

struct kmem_cache_node *n = get_node(s, node);

4485

4486

if (flags & SO_TOTAL)

4487

if (flags & SO_TOTAL)

4487

x = atomic_long_read(&n->total_objects);

4488

x = atomic_long_read(&n->total_objects);

4488

else if (flags & SO_OBJECTS)

4489

else if (flags & SO_OBJECTS)

4489

x = atomic_long_read(&n->total_objects) -

4490

x = atomic_long_read(&n->total_objects) -

4490

count_partial(n, count_free);

4491

count_partial(n, count_free);

4491

4492

else

4493

else

4493

x = atomic_long_read(&n->nr_slabs);

4494

x = atomic_long_read(&n->nr_slabs);

4494

total += x;

4495

total += x;

4495

nodes[node] += x;

4496

nodes[node] += x;

4496

}

4497

}

4497

4498

} else

4499

} else

4499

#endif

4500

#endif

4500

if (flags & SO_PARTIAL) {

4501

if (flags & SO_PARTIAL) {

4501

for_each_node_state(node, N_NORMAL_MEMORY) {

4502

for_each_node_state(node, N_NORMAL_MEMORY) {

4502

struct kmem_cache_node *n = get_node(s, node);

4503

struct kmem_cache_node *n = get_node(s, node);

4503

4504

if (flags & SO_TOTAL)

4505

if (flags & SO_TOTAL)

4505

x = count_partial(n, count_total);

4506

x = count_partial(n, count_total);

4506

else if (flags & SO_OBJECTS)

4507

else if (flags & SO_OBJECTS)

4507

x = count_partial(n, count_inuse);

4508

x = count_partial(n, count_inuse);

4508

else

4509

else

4509

x = n->nr_partial;

4510

x = n->nr_partial;

4510

total += x;

4511

total += x;

4511

nodes[node] += x;

4512

nodes[node] += x;

4512

}

4513

}

4513

}

4514

}

4514

x = sprintf(buf, "%lu", total);

4515

x = sprintf(buf, "%lu", total);

4515

#ifdef CONFIG_NUMA

4516

#ifdef CONFIG_NUMA

4516

for_each_node_state(node, N_NORMAL_MEMORY)

4517

for_each_node_state(node, N_NORMAL_MEMORY)

4517

if (nodes[node])

4518

if (nodes[node])

4518

x += sprintf(buf + x, " N%d=%lu",

4519

x += sprintf(buf + x, " N%d=%lu",

4519

node, nodes[node]);

4520

node, nodes[node]);

4520

#endif

4521

#endif

4521

unlock_memory_hotplug();

4522

unlock_memory_hotplug();

4522

kfree(nodes);

4523

kfree(nodes);

4523

return x + sprintf(buf + x, "\n");

4524

return x + sprintf(buf + x, "\n");

4524

}

4525

}

4525

4526

#ifdef CONFIG_SLUB_DEBUG

4527

#ifdef CONFIG_SLUB_DEBUG

4527

static int any_slab_objects(struct kmem_cache *s)

4528

static int any_slab_objects(struct kmem_cache *s)

4528

{

4529

{

4529

int node;

4530

int node;

4530

4531

for_each_online_node(node) {

4532

for_each_online_node(node) {

4532

struct kmem_cache_node *n = get_node(s, node);

4533

struct kmem_cache_node *n = get_node(s, node);

4533

4534

if (!n)

4535

if (!n)

4535

continue;

4536

continue;

4536

4537

if (atomic_long_read(&n->total_objects))

4538

if (atomic_long_read(&n->total_objects))

4538

return 1;

4539

return 1;

4539

}

4540

}

4540

return 0;

4541

return 0;

4541

}

4542

}

4542

#endif

4543

#endif

4543

4544

#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)

4545

#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)

4545

#define to_slab(n) container_of(n, struct kmem_cache, kobj)

4546

#define to_slab(n) container_of(n, struct kmem_cache, kobj)

4546

4547

struct slab_attribute {

4548

struct slab_attribute {

4548

struct attribute attr;

4549

struct attribute attr;

4549

ssize_t (*show)(struct kmem_cache *s, char *buf);

4550

ssize_t (*show)(struct kmem_cache *s, char *buf);

4550

ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);

4551

ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);

4551

};

4552

};

4552

4553

#define SLAB_ATTR_RO(_name) \

4554

#define SLAB_ATTR_RO(_name) \

4554

static struct slab_attribute _name##_attr = \

4555

static struct slab_attribute _name##_attr = \

4555

__ATTR(_name, 0400, _name##_show, NULL)

4556

__ATTR(_name, 0400, _name##_show, NULL)

4556

4557

#define SLAB_ATTR(_name) \

4558

#define SLAB_ATTR(_name) \

4558

static struct slab_attribute _name##_attr = \

4559

static struct slab_attribute _name##_attr = \

4559

__ATTR(_name, 0600, _name##_show, _name##_store)

4560

__ATTR(_name, 0600, _name##_show, _name##_store)

4560

4561

static ssize_t slab_size_show(struct kmem_cache *s, char *buf)

4562

static ssize_t slab_size_show(struct kmem_cache *s, char *buf)

4562

{

4563

{

4563

return sprintf(buf, "%d\n", s->size);

4564

return sprintf(buf, "%d\n", s->size);

4564

}

4565

}

4565

SLAB_ATTR_RO(slab_size);

4566

SLAB_ATTR_RO(slab_size);

4566

4567

static ssize_t align_show(struct kmem_cache *s, char *buf)

4568

static ssize_t align_show(struct kmem_cache *s, char *buf)

4568

{

4569

{

4569

return sprintf(buf, "%d\n", s->align);

4570

return sprintf(buf, "%d\n", s->align);

4570

}

4571

}

4571

SLAB_ATTR_RO(align);

4572

SLAB_ATTR_RO(align);

4572

4573

static ssize_t object_size_show(struct kmem_cache *s, char *buf)

4574

static ssize_t object_size_show(struct kmem_cache *s, char *buf)

4574

{

4575

{

4575

return sprintf(buf, "%d\n", s->objsize);

4576

return sprintf(buf, "%d\n", s->objsize);

4576

}

4577

}

4577

SLAB_ATTR_RO(object_size);

4578

SLAB_ATTR_RO(object_size);

4578

4579

static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)

4580

static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)

4580

{

4581

{

4581

return sprintf(buf, "%d\n", oo_objects(s->oo));

4582

return sprintf(buf, "%d\n", oo_objects(s->oo));

4582

}

4583

}

4583

SLAB_ATTR_RO(objs_per_slab);

4584

SLAB_ATTR_RO(objs_per_slab);

4584

4585

static ssize_t order_store(struct kmem_cache *s,

4586

static ssize_t order_store(struct kmem_cache *s,

4586

const char *buf, size_t length)

4587

const char *buf, size_t length)

4587

{

4588

{

4588

unsigned long order;

4589

unsigned long order;

4589

int err;

4590

int err;

4590

4591

err = strict_strtoul(buf, 10, &order);

4592

err = strict_strtoul(buf, 10, &order);

4592

if (err)

4593

if (err)

4593

return err;

4594

return err;

4594

4595

if (order > slub_max_order || order < slub_min_order)

4596

if (order > slub_max_order || order < slub_min_order)

4596

return -EINVAL;

4597

return -EINVAL;

4597

4598

calculate_sizes(s, order);

4599

calculate_sizes(s, order);

4599

return length;

4600

return length;

4600

}

4601

}

4601

4602

static ssize_t order_show(struct kmem_cache *s, char *buf)

4603

static ssize_t order_show(struct kmem_cache *s, char *buf)

4603

{

4604

{

4604

return sprintf(buf, "%d\n", oo_order(s->oo));

4605

return sprintf(buf, "%d\n", oo_order(s->oo));

4605

}

4606

}

4606

SLAB_ATTR(order);

4607

SLAB_ATTR(order);

4607

4608

static ssize_t min_partial_show(struct kmem_cache *s, char *buf)

4609

static ssize_t min_partial_show(struct kmem_cache *s, char *buf)

4609

{

4610

{

4610

return sprintf(buf, "%lu\n", s->min_partial);

4611

return sprintf(buf, "%lu\n", s->min_partial);

4611

}

4612

}

4612

4613

static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,

4614

static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,

4614

size_t length)

4615

size_t length)

4615

{

4616

{

4616

unsigned long min;

4617

unsigned long min;

4617

int err;

4618

int err;

4618

4619

err = strict_strtoul(buf, 10, &min);

4620

err = strict_strtoul(buf, 10, &min);

4620

if (err)

4621

if (err)

4621

return err;

4622

return err;

4622

4623

set_min_partial(s, min);

4624

set_min_partial(s, min);

4624

return length;

4625

return length;

4625

}

4626

}

4626

SLAB_ATTR(min_partial);

4627

SLAB_ATTR(min_partial);

4627

4628

static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)

4629

static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)

4629

{

4630

{

4630

return sprintf(buf, "%u\n", s->cpu_partial);

4631

return sprintf(buf, "%u\n", s->cpu_partial);

4631

}

4632

}

4632

4633

static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,

4634

static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,

4634

size_t length)

4635

size_t length)

4635

{

4636

{

4636

unsigned long objects;

4637

unsigned long objects;

4637

int err;

4638

int err;

4638

4639

err = strict_strtoul(buf, 10, &objects);

4640

err = strict_strtoul(buf, 10, &objects);

4640

if (err)

4641

if (err)

4641

return err;

4642

return err;

4642

4643

s->cpu_partial = objects;

4644

s->cpu_partial = objects;

4644

flush_all(s);

4645

flush_all(s);

4645

return length;

4646

return length;

4646

}

4647

}

4647

SLAB_ATTR(cpu_partial);

4648

SLAB_ATTR(cpu_partial);

4648

4649

static ssize_t ctor_show(struct kmem_cache *s, char *buf)

4650

static ssize_t ctor_show(struct kmem_cache *s, char *buf)

4650

{

4651

{

4651

if (!s->ctor)

4652

if (!s->ctor)

4652

return 0;

4653

return 0;

4653

return sprintf(buf, "%pS\n", s->ctor);

4654

return sprintf(buf, "%pS\n", s->ctor);

4654

}

4655

}

4655

SLAB_ATTR_RO(ctor);

4656

SLAB_ATTR_RO(ctor);

4656

4657

static ssize_t aliases_show(struct kmem_cache *s, char *buf)

4658

static ssize_t aliases_show(struct kmem_cache *s, char *buf)

4658

{

4659

{

4659

return sprintf(buf, "%d\n", s->refcount - 1);

4660

return sprintf(buf, "%d\n", s->refcount - 1);

4660

}

4661

}

4661

SLAB_ATTR_RO(aliases);

4662

SLAB_ATTR_RO(aliases);

4662

4663

static ssize_t partial_show(struct kmem_cache *s, char *buf)

4664

static ssize_t partial_show(struct kmem_cache *s, char *buf)

4664

{

4665

{

4665

return show_slab_objects(s, buf, SO_PARTIAL);

4666

return show_slab_objects(s, buf, SO_PARTIAL);

4666

}

4667

}

4667

SLAB_ATTR_RO(partial);

4668

SLAB_ATTR_RO(partial);

4668

4669

static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)

4670

static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)

4670

{

4671

{

4671

return show_slab_objects(s, buf, SO_CPU);

4672

return show_slab_objects(s, buf, SO_CPU);

4672

}

4673

}

4673

SLAB_ATTR_RO(cpu_slabs);

4674

SLAB_ATTR_RO(cpu_slabs);

4674

4675

static ssize_t objects_show(struct kmem_cache *s, char *buf)

4676

static ssize_t objects_show(struct kmem_cache *s, char *buf)

4676

{

4677

{

4677

return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);

4678

return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);

4678

}

4679

}

4679

SLAB_ATTR_RO(objects);

4680

SLAB_ATTR_RO(objects);

4680

4681

static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)

4682

static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)

4682

{

4683

{

4683

return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);

4684

return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);

4684

}

4685

}

4685

SLAB_ATTR_RO(objects_partial);

4686

SLAB_ATTR_RO(objects_partial);

4686

4687

static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)

4688

static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)

4688

{

4689

{

4689

int objects = 0;

4690

int objects = 0;

4690

int pages = 0;

4691

int pages = 0;

4691

int cpu;

4692

int cpu;

4692

int len;

4693

int len;

4693

4694

for_each_online_cpu(cpu) {

4695

for_each_online_cpu(cpu) {

4695

struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;

4696

struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;

4696

4697

if (page) {

4698

if (page) {

4698

pages += page->pages;

4699

pages += page->pages;

4699

objects += page->pobjects;

4700

objects += page->pobjects;

4700

}

4701

}

4701

}

4702

}

4702

4703

len = sprintf(buf, "%d(%d)", objects, pages);

4704

len = sprintf(buf, "%d(%d)", objects, pages);

4704

4705

#ifdef CONFIG_SMP

4706

#ifdef CONFIG_SMP

4706

for_each_online_cpu(cpu) {

4707

for_each_online_cpu(cpu) {

4707

struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;

4708

struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;

4708

4709

if (page && len < PAGE_SIZE - 20)

4710

if (page && len < PAGE_SIZE - 20)

4710

len += sprintf(buf + len, " C%d=%d(%d)", cpu,

4711

len += sprintf(buf + len, " C%d=%d(%d)", cpu,

4711

page->pobjects, page->pages);

4712

page->pobjects, page->pages);

4712

}

4713

}

4713

#endif

4714

#endif

4714

return len + sprintf(buf + len, "\n");

4715

return len + sprintf(buf + len, "\n");

4715

}

4716

}

4716

SLAB_ATTR_RO(slabs_cpu_partial);

4717

SLAB_ATTR_RO(slabs_cpu_partial);

4717

4718

static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)

4719

static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)

4719

{

4720

{

4720

return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));

4721

return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));

4721

}

4722

}

4722

4723

static ssize_t reclaim_account_store(struct kmem_cache *s,

4724

static ssize_t reclaim_account_store(struct kmem_cache *s,

4724

const char *buf, size_t length)

4725

const char *buf, size_t length)

4725

{

4726

{

4726

s->flags &= ~SLAB_RECLAIM_ACCOUNT;

4727

s->flags &= ~SLAB_RECLAIM_ACCOUNT;

4727

if (buf[0] == '1')

4728

if (buf[0] == '1')

4728

s->flags |= SLAB_RECLAIM_ACCOUNT;

4729

s->flags |= SLAB_RECLAIM_ACCOUNT;

4729

return length;

4730

return length;

4730

}

4731

}

4731

SLAB_ATTR(reclaim_account);

4732

SLAB_ATTR(reclaim_account);

4732

4733

static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)

4734

static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)

4734

{

4735

{

4735

return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));

4736

return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));

4736

}

4737

}

4737

SLAB_ATTR_RO(hwcache_align);

4738

SLAB_ATTR_RO(hwcache_align);

4738

4739

#ifdef CONFIG_ZONE_DMA

4740

#ifdef CONFIG_ZONE_DMA

4740

static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)

4741

static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)

4741

{

4742

{

4742

return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));

4743

return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));

4743

}

4744

}

4744

SLAB_ATTR_RO(cache_dma);

4745

SLAB_ATTR_RO(cache_dma);

4745

#endif

4746

#endif

4746

4747

static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)

4748

static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)

4748

{

4749

{

4749

return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));

4750

return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));

4750

}

4751

}

4751

SLAB_ATTR_RO(destroy_by_rcu);

4752

SLAB_ATTR_RO(destroy_by_rcu);

4752

4753

static ssize_t reserved_show(struct kmem_cache *s, char *buf)

4754

static ssize_t reserved_show(struct kmem_cache *s, char *buf)

4754

{

4755

{

4755

return sprintf(buf, "%d\n", s->reserved);

4756

return sprintf(buf, "%d\n", s->reserved);

4756

}

4757

}

4757

SLAB_ATTR_RO(reserved);

4758

SLAB_ATTR_RO(reserved);

4758

4759

#ifdef CONFIG_SLUB_DEBUG

4760

#ifdef CONFIG_SLUB_DEBUG

4760

static ssize_t slabs_show(struct kmem_cache *s, char *buf)

4761

static ssize_t slabs_show(struct kmem_cache *s, char *buf)

4761

{

4762

{

4762

return show_slab_objects(s, buf, SO_ALL);

4763

return show_slab_objects(s, buf, SO_ALL);

4763

}

4764

}

4764

SLAB_ATTR_RO(slabs);

4765

SLAB_ATTR_RO(slabs);

4765

4766

static ssize_t total_objects_show(struct kmem_cache *s, char *buf)

4767

static ssize_t total_objects_show(struct kmem_cache *s, char *buf)

4767

{

4768

{

4768

return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);

4769

return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);

4769

}

4770

}

4770

SLAB_ATTR_RO(total_objects);

4771

SLAB_ATTR_RO(total_objects);

4771

4772

static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)

4773

static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)

4773

{

4774

{

4774

return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));

4775

return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));

4775

}

4776

}

4776

4777

static ssize_t sanity_checks_store(struct kmem_cache *s,

4778

static ssize_t sanity_checks_store(struct kmem_cache *s,

4778

const char *buf, size_t length)

4779

const char *buf, size_t length)

4779

{

4780

{

4780

s->flags &= ~SLAB_DEBUG_FREE;

4781

s->flags &= ~SLAB_DEBUG_FREE;

4781

if (buf[0] == '1') {

4782

if (buf[0] == '1') {

4782

s->flags &= ~__CMPXCHG_DOUBLE;

4783

s->flags &= ~__CMPXCHG_DOUBLE;

4783

s->flags |= SLAB_DEBUG_FREE;

4784

s->flags |= SLAB_DEBUG_FREE;

4784

}

4785

}

4785

return length;

4786

return length;

4786

}

4787

}

4787

SLAB_ATTR(sanity_checks);

4788

SLAB_ATTR(sanity_checks);

4788

4789

static ssize_t trace_show(struct kmem_cache *s, char *buf)

4790

static ssize_t trace_show(struct kmem_cache *s, char *buf)

4790

{

4791

{

4791

return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));

4792

return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));

4792

}

4793

}

4793

4794

static ssize_t trace_store(struct kmem_cache *s, const char *buf,

4795

static ssize_t trace_store(struct kmem_cache *s, const char *buf,

4795

size_t length)

4796

size_t length)

4796

{

4797

{

4797

s->flags &= ~SLAB_TRACE;

4798

s->flags &= ~SLAB_TRACE;

4798

if (buf[0] == '1') {

4799

if (buf[0] == '1') {

4799

s->flags &= ~__CMPXCHG_DOUBLE;

4800

s->flags &= ~__CMPXCHG_DOUBLE;

4800

s->flags |= SLAB_TRACE;

4801

s->flags |= SLAB_TRACE;

4801

}

4802

}

4802

return length;

4803

return length;

4803

}

4804

}

4804

SLAB_ATTR(trace);

4805

SLAB_ATTR(trace);

4805

4806

static ssize_t red_zone_show(struct kmem_cache *s, char *buf)

4807

static ssize_t red_zone_show(struct kmem_cache *s, char *buf)

4807

{

4808

{

4808

return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));

4809

return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));

4809

}

4810

}

4810

4811

static ssize_t red_zone_store(struct kmem_cache *s,

4812

static ssize_t red_zone_store(struct kmem_cache *s,

4812

const char *buf, size_t length)

4813

const char *buf, size_t length)

4813

{

4814

{

4814

if (any_slab_objects(s))

4815

if (any_slab_objects(s))

4815

return -EBUSY;

4816

return -EBUSY;

4816

4817

s->flags &= ~SLAB_RED_ZONE;

4818

s->flags &= ~SLAB_RED_ZONE;

4818

if (buf[0] == '1') {

4819

if (buf[0] == '1') {

4819

s->flags &= ~__CMPXCHG_DOUBLE;

4820

s->flags &= ~__CMPXCHG_DOUBLE;

4820

s->flags |= SLAB_RED_ZONE;

4821

s->flags |= SLAB_RED_ZONE;

4821

}

4822

}

4822

calculate_sizes(s, -1);

4823

calculate_sizes(s, -1);

4823

return length;

4824

return length;

4824

}

4825

}

4825

SLAB_ATTR(red_zone);

4826

SLAB_ATTR(red_zone);

4826

4827

static ssize_t poison_show(struct kmem_cache *s, char *buf)

4828

static ssize_t poison_show(struct kmem_cache *s, char *buf)

4828

{

4829

{

4829

return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));

4830

return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));

4830

}

4831

}

4831

4832

static ssize_t poison_store(struct kmem_cache *s,

4833

static ssize_t poison_store(struct kmem_cache *s,

4833

const char *buf, size_t length)

4834

const char *buf, size_t length)

4834

{

4835

{

4835

if (any_slab_objects(s))

4836

if (any_slab_objects(s))

4836

return -EBUSY;

4837

return -EBUSY;

4837

4838

s->flags &= ~SLAB_POISON;

4839

s->flags &= ~SLAB_POISON;

4839

if (buf[0] == '1') {

4840

if (buf[0] == '1') {

4840

s->flags &= ~__CMPXCHG_DOUBLE;

4841

s->flags &= ~__CMPXCHG_DOUBLE;

4841

s->flags |= SLAB_POISON;

4842

s->flags |= SLAB_POISON;

4842

}

4843

}

4843

calculate_sizes(s, -1);

4844

calculate_sizes(s, -1);

4844

return length;

4845

return length;

4845

}

4846

}

4846

SLAB_ATTR(poison);

4847

SLAB_ATTR(poison);

4847

4848

static ssize_t store_user_show(struct kmem_cache *s, char *buf)

4849

static ssize_t store_user_show(struct kmem_cache *s, char *buf)

4849

{

4850

{

4850

return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));

4851

return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));

4851

}

4852

}

4852

4853

static ssize_t store_user_store(struct kmem_cache *s,

4854

static ssize_t store_user_store(struct kmem_cache *s,

4854

const char *buf, size_t length)

4855

const char *buf, size_t length)

4855

{

4856

{

4856

if (any_slab_objects(s))

4857

if (any_slab_objects(s))

4857

return -EBUSY;

4858

return -EBUSY;

4858

4859

s->flags &= ~SLAB_STORE_USER;

4860

s->flags &= ~SLAB_STORE_USER;

4860

if (buf[0] == '1') {

4861

if (buf[0] == '1') {

4861

s->flags &= ~__CMPXCHG_DOUBLE;

4862

s->flags &= ~__CMPXCHG_DOUBLE;

4862

s->flags |= SLAB_STORE_USER;

4863

s->flags |= SLAB_STORE_USER;

4863

}

4864

}

4864

calculate_sizes(s, -1);

4865

calculate_sizes(s, -1);

4865

return length;

4866

return length;

4866

}

4867

}

4867

SLAB_ATTR(store_user);

4868

SLAB_ATTR(store_user);

4868

4869

static ssize_t validate_show(struct kmem_cache *s, char *buf)

4870

static ssize_t validate_show(struct kmem_cache *s, char *buf)

4870

{

4871

{

4871

return 0;

4872

return 0;

4872

}

4873

}

4873

4874

static ssize_t validate_store(struct kmem_cache *s,

4875

static ssize_t validate_store(struct kmem_cache *s,

4875

const char *buf, size_t length)

4876

const char *buf, size_t length)

4876

{

4877

{

4877

int ret = -EINVAL;

4878

int ret = -EINVAL;

4878

4879

if (buf[0] == '1') {

4880

if (buf[0] == '1') {

4880

ret = validate_slab_cache(s);

4881

ret = validate_slab_cache(s);

4881

if (ret >= 0)

4882

if (ret >= 0)

4882

ret = length;

4883

ret = length;

4883

}

4884

}

4884

return ret;

4885

return ret;

4885

}

4886

}

4886

SLAB_ATTR(validate);

4887

SLAB_ATTR(validate);

4887

4888

static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)

4889

static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)

4889

{

4890

{

4890

if (!(s->flags & SLAB_STORE_USER))

4891

if (!(s->flags & SLAB_STORE_USER))

4891

return -ENOSYS;

4892

return -ENOSYS;

4892

return list_locations(s, buf, TRACK_ALLOC);

4893

return list_locations(s, buf, TRACK_ALLOC);

4893

}

4894

}

4894

SLAB_ATTR_RO(alloc_calls);

4895

SLAB_ATTR_RO(alloc_calls);

4895

4896

static ssize_t free_calls_show(struct kmem_cache *s, char *buf)

4897

static ssize_t free_calls_show(struct kmem_cache *s, char *buf)

4897

{

4898

{

4898

if (!(s->flags & SLAB_STORE_USER))

4899

if (!(s->flags & SLAB_STORE_USER))

4899

return -ENOSYS;

4900

return -ENOSYS;

4900

return list_locations(s, buf, TRACK_FREE);

4901

return list_locations(s, buf, TRACK_FREE);

4901

}

4902

}

4902

SLAB_ATTR_RO(free_calls);

4903

SLAB_ATTR_RO(free_calls);

4903

#endif /* CONFIG_SLUB_DEBUG */

4904

#endif /* CONFIG_SLUB_DEBUG */

4904

4905

#ifdef CONFIG_FAILSLAB

4906

#ifdef CONFIG_FAILSLAB

4906

static ssize_t failslab_show(struct kmem_cache *s, char *buf)

4907

static ssize_t failslab_show(struct kmem_cache *s, char *buf)

4907

{

4908

{

4908

return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));

4909

return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));

4909

}

4910

}

4910

4911

static ssize_t failslab_store(struct kmem_cache *s, const char *buf,

4912

static ssize_t failslab_store(struct kmem_cache *s, const char *buf,

4912

size_t length)

4913

size_t length)

4913

{

4914

{

4914

s->flags &= ~SLAB_FAILSLAB;

4915

s->flags &= ~SLAB_FAILSLAB;

4915

if (buf[0] == '1')

4916

if (buf[0] == '1')

4916

s->flags |= SLAB_FAILSLAB;

4917

s->flags |= SLAB_FAILSLAB;

4917

return length;

4918

return length;

4918

}

4919

}

4919

SLAB_ATTR(failslab);

4920

SLAB_ATTR(failslab);

4920

#endif

4921

#endif

4921

4922

static ssize_t shrink_show(struct kmem_cache *s, char *buf)

4923

static ssize_t shrink_show(struct kmem_cache *s, char *buf)

4923

{

4924

{

4924

return 0;

4925

return 0;

4925

}

4926

}

4926

4927

static ssize_t shrink_store(struct kmem_cache *s,

4928

static ssize_t shrink_store(struct kmem_cache *s,

4928

const char *buf, size_t length)

4929

const char *buf, size_t length)

4929

{

4930

{

4930

if (buf[0] == '1') {

4931

if (buf[0] == '1') {

4931

int rc = kmem_cache_shrink(s);

4932

int rc = kmem_cache_shrink(s);

4932

4933

if (rc)

4934

if (rc)

4934

return rc;

4935

return rc;

4935

} else

4936

} else

4936

return -EINVAL;

4937

return -EINVAL;

4937

return length;

4938

return length;

4938

}

4939

}

4939

SLAB_ATTR(shrink);

4940

SLAB_ATTR(shrink);

4940

4941

#ifdef CONFIG_NUMA

4942

#ifdef CONFIG_NUMA

4942

static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)

4943

static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)

4943

{

4944

{

4944

return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);

4945

return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);

4945

}

4946

}

4946

4947

static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,

4948

static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,

4948

const char *buf, size_t length)

4949

const char *buf, size_t length)

4949

{

4950

{

4950

unsigned long ratio;

4951

unsigned long ratio;

4951

int err;

4952

int err;

4952

4953

err = strict_strtoul(buf, 10, &ratio);

4954

err = strict_strtoul(buf, 10, &ratio);

4954

if (err)

4955

if (err)

4955

return err;

4956

return err;

4956

4957

if (ratio <= 100)

4958

if (ratio <= 100)

4958

s->remote_node_defrag_ratio = ratio * 10;

4959

s->remote_node_defrag_ratio = ratio * 10;

4959

4960

return length;

4961

return length;

4961

}

4962

}

4962

SLAB_ATTR(remote_node_defrag_ratio);

4963

SLAB_ATTR(remote_node_defrag_ratio);

4963

#endif

4964

#endif

4964

4965

#ifdef CONFIG_SLUB_STATS

4966

#ifdef CONFIG_SLUB_STATS

4966

static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)

4967

static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)

4967

{

4968

{

4968

unsigned long sum = 0;

4969

unsigned long sum = 0;

4969

int cpu;

4970

int cpu;

4970

int len;

4971

int len;

4971

int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);

4972

int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);

4972

4973

if (!data)

4974

if (!data)

4974

return -ENOMEM;

4975

return -ENOMEM;

4975

4976

for_each_online_cpu(cpu) {

4977

for_each_online_cpu(cpu) {

4977

unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];

4978

unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];

4978

4979

data[cpu] = x;

4980

data[cpu] = x;

4980

sum += x;

4981

sum += x;

4981

}

4982

}

4982

4983

len = sprintf(buf, "%lu", sum);

4984

len = sprintf(buf, "%lu", sum);

4984

4985

#ifdef CONFIG_SMP

4986

#ifdef CONFIG_SMP

4986

for_each_online_cpu(cpu) {

4987

for_each_online_cpu(cpu) {

4987

if (data[cpu] && len < PAGE_SIZE - 20)

4988

if (data[cpu] && len < PAGE_SIZE - 20)

4988

len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);

4989

len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);

4989

}

4990

}

4990

#endif

4991

#endif

4991

kfree(data);

4992

kfree(data);

4992

return len + sprintf(buf + len, "\n");

4993

return len + sprintf(buf + len, "\n");

4993

}

4994

}

4994

4995

static void clear_stat(struct kmem_cache *s, enum stat_item si)

4996

static void clear_stat(struct kmem_cache *s, enum stat_item si)

4996

{

4997

{

4997

int cpu;

4998

int cpu;

4998

4999

for_each_online_cpu(cpu)

5000

for_each_online_cpu(cpu)

5000

per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;

5001

per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;

5001

}

5002

}

5002

5003

#define STAT_ATTR(si, text) \

5004

#define STAT_ATTR(si, text) \

5004

static ssize_t text##_show(struct kmem_cache *s, char *buf) \

5005

static ssize_t text##_show(struct kmem_cache *s, char *buf) \

5005

{ \

5006

{ \

5006

return show_stat(s, buf, si); \

5007

return show_stat(s, buf, si); \

5007

} \

5008

} \

5008

static ssize_t text##_store(struct kmem_cache *s, \

5009

static ssize_t text##_store(struct kmem_cache *s, \

5009

const char *buf, size_t length) \

5010

const char *buf, size_t length) \

5010

{ \

5011

{ \

5011

if (buf[0] != '0') \

5012

if (buf[0] != '0') \

5012

return -EINVAL; \

5013

return -EINVAL; \

5013

clear_stat(s, si); \

5014

clear_stat(s, si); \

5014

return length; \

5015

return length; \

5015

} \

5016

} \

5016

SLAB_ATTR(text); \

5017

SLAB_ATTR(text); \

5017

5018

STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);

5019

STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);

5019

STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);

5020

STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);

5020

STAT_ATTR(FREE_FASTPATH, free_fastpath);

5021

STAT_ATTR(FREE_FASTPATH, free_fastpath);

5021

STAT_ATTR(FREE_SLOWPATH, free_slowpath);

5022

STAT_ATTR(FREE_SLOWPATH, free_slowpath);

5022

STAT_ATTR(FREE_FROZEN, free_frozen);

5023

STAT_ATTR(FREE_FROZEN, free_frozen);

5023

STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);

5024

STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);

5024

STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);

5025

STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);

5025

STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);

5026

STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);

5026

STAT_ATTR(ALLOC_SLAB, alloc_slab);

5027

STAT_ATTR(ALLOC_SLAB, alloc_slab);

5027

STAT_ATTR(ALLOC_REFILL, alloc_refill);

5028

STAT_ATTR(ALLOC_REFILL, alloc_refill);

5028

STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);

5029

STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);

5029

STAT_ATTR(FREE_SLAB, free_slab);

5030

STAT_ATTR(FREE_SLAB, free_slab);

5030

STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);

5031

STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);

5031

STAT_ATTR(DEACTIVATE_FULL, deactivate_full);

5032

STAT_ATTR(DEACTIVATE_FULL, deactivate_full);

5032

STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);

5033

STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);

5033

STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);

5034

STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);

5034

STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);

5035

STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);

5035

STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);

5036

STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);

5036

STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);

5037

STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);

5037

STAT_ATTR(ORDER_FALLBACK, order_fallback);

5038

STAT_ATTR(ORDER_FALLBACK, order_fallback);

5038

STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);

5039

STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);

5039

STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);

5040

STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);

5040

STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);

5041

STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);

5041

STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);

5042

STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);

5042

#endif

5043

#endif

5043

5044

static struct attribute *slab_attrs[] = {

5045

static struct attribute *slab_attrs[] = {

5045

&slab_size_attr.attr,

5046

&slab_size_attr.attr,

5046

&object_size_attr.attr,

5047

&object_size_attr.attr,

5047

&objs_per_slab_attr.attr,

5048

&objs_per_slab_attr.attr,

5048

&order_attr.attr,

5049

&order_attr.attr,

5049

&min_partial_attr.attr,

5050

&min_partial_attr.attr,

5050

&cpu_partial_attr.attr,

5051

&cpu_partial_attr.attr,

5051

&objects_attr.attr,

5052

&objects_attr.attr,

5052

&objects_partial_attr.attr,

5053

&objects_partial_attr.attr,

5053

&partial_attr.attr,

5054

&partial_attr.attr,

5054

&cpu_slabs_attr.attr,

5055

&cpu_slabs_attr.attr,

5055

&ctor_attr.attr,

5056

&ctor_attr.attr,

5056

&aliases_attr.attr,

5057

&aliases_attr.attr,

5057

&align_attr.attr,

5058

&align_attr.attr,

5058

&hwcache_align_attr.attr,

5059

&hwcache_align_attr.attr,

5059

&reclaim_account_attr.attr,

5060

&reclaim_account_attr.attr,

5060

&destroy_by_rcu_attr.attr,

5061

&destroy_by_rcu_attr.attr,

5061

&shrink_attr.attr,

5062

&shrink_attr.attr,

5062

&reserved_attr.attr,

5063

&reserved_attr.attr,

5063

&slabs_cpu_partial_attr.attr,

5064

&slabs_cpu_partial_attr.attr,

5064

#ifdef CONFIG_SLUB_DEBUG

5065

#ifdef CONFIG_SLUB_DEBUG

5065

&total_objects_attr.attr,

5066

&total_objects_attr.attr,

5066

&slabs_attr.attr,

5067

&slabs_attr.attr,

5067

&sanity_checks_attr.attr,

5068

&sanity_checks_attr.attr,

5068

&trace_attr.attr,

5069

&trace_attr.attr,

5069

&red_zone_attr.attr,

5070

&red_zone_attr.attr,

5070

&poison_attr.attr,

5071

&poison_attr.attr,

5071

&store_user_attr.attr,

5072

&store_user_attr.attr,

5072

&validate_attr.attr,

5073

&validate_attr.attr,

5073

&alloc_calls_attr.attr,

5074

&alloc_calls_attr.attr,

5074

&free_calls_attr.attr,

5075

&free_calls_attr.attr,

5075

#endif

5076

#endif

5076

#ifdef CONFIG_ZONE_DMA

5077

#ifdef CONFIG_ZONE_DMA

5077

&cache_dma_attr.attr,

5078

&cache_dma_attr.attr,

5078

#endif

5079

#endif

5079

#ifdef CONFIG_NUMA

5080

#ifdef CONFIG_NUMA

5080

&remote_node_defrag_ratio_attr.attr,

5081

&remote_node_defrag_ratio_attr.attr,

5081

#endif

5082

#endif

5082

#ifdef CONFIG_SLUB_STATS

5083

#ifdef CONFIG_SLUB_STATS

5083

&alloc_fastpath_attr.attr,

5084

&alloc_fastpath_attr.attr,

5084

&alloc_slowpath_attr.attr,

5085

&alloc_slowpath_attr.attr,

5085

&free_fastpath_attr.attr,

5086

&free_fastpath_attr.attr,

5086

&free_slowpath_attr.attr,

5087

&free_slowpath_attr.attr,

5087

&free_frozen_attr.attr,

5088

&free_frozen_attr.attr,

5088

&free_add_partial_attr.attr,

5089

&free_add_partial_attr.attr,

5089

&free_remove_partial_attr.attr,

5090

&free_remove_partial_attr.attr,

5090

&alloc_from_partial_attr.attr,

5091

&alloc_from_partial_attr.attr,

5091

&alloc_slab_attr.attr,

5092

&alloc_slab_attr.attr,

5092

&alloc_refill_attr.attr,

5093

&alloc_refill_attr.attr,

5093

&alloc_node_mismatch_attr.attr,

5094

&alloc_node_mismatch_attr.attr,

5094

&free_slab_attr.attr,

5095

&free_slab_attr.attr,

5095

&cpuslab_flush_attr.attr,

5096

&cpuslab_flush_attr.attr,

5096

&deactivate_full_attr.attr,

5097

&deactivate_full_attr.attr,

5097

&deactivate_empty_attr.attr,

5098

&deactivate_empty_attr.attr,

5098

&deactivate_to_head_attr.attr,

5099

&deactivate_to_head_attr.attr,

5099

&deactivate_to_tail_attr.attr,

5100

&deactivate_to_tail_attr.attr,

5100

&deactivate_remote_frees_attr.attr,

5101

&deactivate_remote_frees_attr.attr,

5101

&deactivate_bypass_attr.attr,

5102

&deactivate_bypass_attr.attr,

5102

&order_fallback_attr.attr,

5103

&order_fallback_attr.attr,

5103

&cmpxchg_double_fail_attr.attr,

5104

&cmpxchg_double_fail_attr.attr,

5104

&cmpxchg_double_cpu_fail_attr.attr,

5105

&cmpxchg_double_cpu_fail_attr.attr,

5105

&cpu_partial_alloc_attr.attr,

5106

&cpu_partial_alloc_attr.attr,

5106

&cpu_partial_free_attr.attr,

5107

&cpu_partial_free_attr.attr,

5107

#endif

5108

#endif

5108

#ifdef CONFIG_FAILSLAB

5109

#ifdef CONFIG_FAILSLAB

5109

&failslab_attr.attr,

5110

&failslab_attr.attr,

5110

#endif

5111

#endif

5111

5112

NULL

5113

NULL

5113

};

5114

};

5114

5115

static struct attribute_group slab_attr_group = {

5116

static struct attribute_group slab_attr_group = {

5116

.attrs = slab_attrs,

5117

.attrs = slab_attrs,

5117

};

5118

};

5118

5119

static ssize_t slab_attr_show(struct kobject *kobj,

5120

static ssize_t slab_attr_show(struct kobject *kobj,

5120

struct attribute *attr,

5121

struct attribute *attr,

5121

char *buf)

5122

char *buf)

5122

{

5123

{

5123

struct slab_attribute *attribute;

5124

struct slab_attribute *attribute;

5124

struct kmem_cache *s;

5125

struct kmem_cache *s;

5125

int err;

5126

int err;

5126

5127

attribute = to_slab_attr(attr);

5128

attribute = to_slab_attr(attr);

5128

s = to_slab(kobj);

5129

s = to_slab(kobj);

5129

5130

if (!attribute->show)

5131

if (!attribute->show)

5131

return -EIO;

5132

return -EIO;

5132

5133

err = attribute->show(s, buf);

5134

err = attribute->show(s, buf);

5134

5135

return err;

5136

return err;

5136

}

5137

}

5137

5138

static ssize_t slab_attr_store(struct kobject *kobj,

5139

static ssize_t slab_attr_store(struct kobject *kobj,

5139

struct attribute *attr,

5140

struct attribute *attr,

5140

const char *buf, size_t len)

5141

const char *buf, size_t len)

5141

{

5142

{

5142

struct slab_attribute *attribute;

5143

struct slab_attribute *attribute;

5143

struct kmem_cache *s;

5144

struct kmem_cache *s;

5144

int err;

5145

int err;

5145

5146

attribute = to_slab_attr(attr);

5147

attribute = to_slab_attr(attr);

5147

s = to_slab(kobj);

5148

s = to_slab(kobj);

5148

5149

if (!attribute->store)

5150

if (!attribute->store)

5150

return -EIO;

5151

return -EIO;

5151

5152

err = attribute->store(s, buf, len);

5153

err = attribute->store(s, buf, len);

5153

5154

return err;

5155

return err;

5155

}

5156

}

5156

5157

static void kmem_cache_release(struct kobject *kobj)

5158

static void kmem_cache_release(struct kobject *kobj)

5158

{

5159

{

5159

struct kmem_cache *s = to_slab(kobj);

5160

struct kmem_cache *s = to_slab(kobj);

5160

5161

kfree(s->name);

5162

kfree(s->name);

5162

kfree(s);

5163

kfree(s);

5163

}

5164

}

5164

5165

static const struct sysfs_ops slab_sysfs_ops = {

5166

static const struct sysfs_ops slab_sysfs_ops = {

5166

.show = slab_attr_show,

5167

.show = slab_attr_show,

5167

.store = slab_attr_store,

5168

.store = slab_attr_store,

5168

};

5169

};

5169

5170

static struct kobj_type slab_ktype = {

5171

static struct kobj_type slab_ktype = {

5171

.sysfs_ops = &slab_sysfs_ops,

5172

.sysfs_ops = &slab_sysfs_ops,

5172

.release = kmem_cache_release

5173

.release = kmem_cache_release

5173

};

5174

};

5174

5175

static int uevent_filter(struct kset *kset, struct kobject *kobj)

5176

static int uevent_filter(struct kset *kset, struct kobject *kobj)

5176

{

5177

{

5177

struct kobj_type *ktype = get_ktype(kobj);

5178

struct kobj_type *ktype = get_ktype(kobj);

5178

5179

if (ktype == &slab_ktype)

5180

if (ktype == &slab_ktype)

5180

return 1;

5181

return 1;

5181

return 0;

5182

return 0;

5182

}

5183

}

5183

5184

static const struct kset_uevent_ops slab_uevent_ops = {

5185

static const struct kset_uevent_ops slab_uevent_ops = {

5185

.filter = uevent_filter,

5186

.filter = uevent_filter,

5186

};

5187

};

5187

5188

static struct kset *slab_kset;

5189

static struct kset *slab_kset;

5189

5190

#define ID_STR_LENGTH 64

5191

#define ID_STR_LENGTH 64

5191

5192

/* Create a unique string id for a slab cache:

5193

/* Create a unique string id for a slab cache:

5193

*

5194

*

5194

* Format :[flags-]size

5195

* Format :[flags-]size

5195

*/

5196

*/

5196

static char *create_unique_id(struct kmem_cache *s)

5197

static char *create_unique_id(struct kmem_cache *s)

5197

{

5198

{

5198

char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);

5199

char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);

5199

char *p = name;

5200

char *p = name;

5200

5201

BUG_ON(!name);

5202

BUG_ON(!name);

5202

5203

*p++ = ':';

5204

*p++ = ':';

5204

/*

5205

/*

5205

* First flags affecting slabcache operations. We will only

5206

* First flags affecting slabcache operations. We will only

5206

* get here for aliasable slabs so we do not need to support

5207

* get here for aliasable slabs so we do not need to support

5207

* too many flags. The flags here must cover all flags that

5208

* too many flags. The flags here must cover all flags that

5208

* are matched during merging to guarantee that the id is

5209

* are matched during merging to guarantee that the id is

5209

* unique.

5210

* unique.

5210

*/

5211

*/

5211

if (s->flags & SLAB_CACHE_DMA)

5212

if (s->flags & SLAB_CACHE_DMA)

5212

*p++ = 'd';

5213

*p++ = 'd';

5213

if (s->flags & SLAB_RECLAIM_ACCOUNT)

5214

if (s->flags & SLAB_RECLAIM_ACCOUNT)

5214

*p++ = 'a';

5215

*p++ = 'a';

5215

if (s->flags & SLAB_DEBUG_FREE)

5216

if (s->flags & SLAB_DEBUG_FREE)

5216

*p++ = 'F';

5217

*p++ = 'F';

5217

if (!(s->flags & SLAB_NOTRACK))

5218

if (!(s->flags & SLAB_NOTRACK))

5218

*p++ = 't';

5219

*p++ = 't';

5219

if (p != name + 1)

5220

if (p != name + 1)

5220

*p++ = '-';

5221

*p++ = '-';

5221

p += sprintf(p, "%07d", s->size);

5222

p += sprintf(p, "%07d", s->size);

5222

BUG_ON(p > name + ID_STR_LENGTH - 1);

5223

BUG_ON(p > name + ID_STR_LENGTH - 1);

5223

return name;

5224

return name;

5224

}

5225

}

5225

5226

static int sysfs_slab_add(struct kmem_cache *s)

5227

static int sysfs_slab_add(struct kmem_cache *s)

5227

{

5228

{

5228

int err;

5229

int err;

5229

const char *name;

5230

const char *name;

5230

int unmergeable;

5231

int unmergeable;

5231

5232

if (slab_state < SYSFS)

5233

if (slab_state < SYSFS)

5233

/* Defer until later */

5234

/* Defer until later */

5234

return 0;

5235

return 0;

5235

5236

unmergeable = slab_unmergeable(s);

5237

unmergeable = slab_unmergeable(s);

5237

if (unmergeable) {

5238

if (unmergeable) {

5238

/*

5239

/*

5239

* Slabcache can never be merged so we can use the name proper.

5240

* Slabcache can never be merged so we can use the name proper.

5240

* This is typically the case for debug situations. In that

5241

* This is typically the case for debug situations. In that

5241

* case we can catch duplicate names easily.

5242

* case we can catch duplicate names easily.

5242

*/

5243

*/

5243

sysfs_remove_link(&slab_kset->kobj, s->name);

5244

sysfs_remove_link(&slab_kset->kobj, s->name);

5244

name = s->name;

5245

name = s->name;

5245

} else {

5246

} else {

5246

/*

5247

/*

5247

* Create a unique name for the slab as a target

5248

* Create a unique name for the slab as a target

5248

* for the symlinks.

5249

* for the symlinks.

5249

*/

5250

*/

5250

name = create_unique_id(s);

5251

name = create_unique_id(s);

5251

}

5252

}

5252

5253

s->kobj.kset = slab_kset;

5254

s->kobj.kset = slab_kset;

5254

err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);

5255

err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);

5255

if (err) {

5256

if (err) {

5256

kobject_put(&s->kobj);

5257

kobject_put(&s->kobj);

5257

return err;

5258

return err;

5258

}

5259

}

5259

5260

err = sysfs_create_group(&s->kobj, &slab_attr_group);

5261

err = sysfs_create_group(&s->kobj, &slab_attr_group);

5261

if (err) {

5262

if (err) {

5262

kobject_del(&s->kobj);

5263

kobject_del(&s->kobj);

5263

kobject_put(&s->kobj);

5264

kobject_put(&s->kobj);

5264

return err;

5265

return err;

5265

}

5266

}

5266

kobject_uevent(&s->kobj, KOBJ_ADD);

5267

kobject_uevent(&s->kobj, KOBJ_ADD);

5267

if (!unmergeable) {

5268

if (!unmergeable) {

5268

/* Setup first alias */

5269

/* Setup first alias */

5269

sysfs_slab_alias(s, s->name);

5270

sysfs_slab_alias(s, s->name);

5270

kfree(name);

5271

kfree(name);

5271

}

5272

}

5272

return 0;

5273

return 0;

5273

}

5274

}

5274

5275

static void sysfs_slab_remove(struct kmem_cache *s)

5276

static void sysfs_slab_remove(struct kmem_cache *s)

5276

{

5277

{

5277

if (slab_state < SYSFS)

5278

if (slab_state < SYSFS)

5278

/*

5279

/*

5279

* Sysfs has not been setup yet so no need to remove the

5280

* Sysfs has not been setup yet so no need to remove the

5280

* cache from sysfs.

5281

* cache from sysfs.

5281

*/

5282

*/

5282

return;

5283

return;

5283

5284

kobject_uevent(&s->kobj, KOBJ_REMOVE);

5285

kobject_uevent(&s->kobj, KOBJ_REMOVE);

5285

kobject_del(&s->kobj);

5286

kobject_del(&s->kobj);

5286

kobject_put(&s->kobj);

5287

kobject_put(&s->kobj);

5287

}

5288

}

5288

5289

/*

5290

/*

5290

* Need to buffer aliases during bootup until sysfs becomes

5291

* Need to buffer aliases during bootup until sysfs becomes

5291

* available lest we lose that information.

5292

* available lest we lose that information.

5292

*/

5293

*/

5293

struct saved_alias {

5294

struct saved_alias {

5294

struct kmem_cache *s;

5295

struct kmem_cache *s;

5295

const char *name;

5296

const char *name;

5296

struct saved_alias *next;

5297

struct saved_alias *next;

5297

};

5298

};

5298

5299

static struct saved_alias *alias_list;

5300

static struct saved_alias *alias_list;

5300

5301

static int sysfs_slab_alias(struct kmem_cache *s, const char *name)

5302

static int sysfs_slab_alias(struct kmem_cache *s, const char *name)

5302

{

5303

{

5303

struct saved_alias *al;

5304

struct saved_alias *al;

5304

5305

if (slab_state == SYSFS) {

5306

if (slab_state == SYSFS) {

5306

/*

5307

/*

5307

* If we have a leftover link then remove it.

5308

* If we have a leftover link then remove it.

5308

*/

5309

*/

5309

sysfs_remove_link(&slab_kset->kobj, name);

5310

sysfs_remove_link(&slab_kset->kobj, name);

5310

return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);

5311

return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);

5311

}

5312

}

5312

5313

al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);

5314

al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);

5314

if (!al)

5315

if (!al)

5315

return -ENOMEM;

5316

return -ENOMEM;

5316

5317

al->s = s;

5318

al->s = s;

5318

al->name = name;

5319

al->name = name;

5319

al->next = alias_list;

5320

al->next = alias_list;

5320

alias_list = al;

5321

alias_list = al;

5321

return 0;

5322

return 0;

5322

}

5323

}

5323

5324

static int __init slab_sysfs_init(void)

5325

static int __init slab_sysfs_init(void)

5325

{

5326

{

5326

struct kmem_cache *s;

5327

struct kmem_cache *s;

5327

int err;

5328

int err;

5328

5329

down_write(&slub_lock);

5330

down_write(&slub_lock);

5330

5331

slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);

5332

slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);

5332

if (!slab_kset) {

5333

if (!slab_kset) {

5333

up_write(&slub_lock);

5334

up_write(&slub_lock);

5334

printk(KERN_ERR "Cannot register slab subsystem.\n");

5335

printk(KERN_ERR "Cannot register slab subsystem.\n");

5335

return -ENOSYS;

5336

return -ENOSYS;

5336

}

5337

}

5337

5338

slab_state = SYSFS;

5339

slab_state = SYSFS;

5339

5340

list_for_each_entry(s, &slab_caches, list) {

5341

list_for_each_entry(s, &slab_caches, list) {

5341

err = sysfs_slab_add(s);

5342

err = sysfs_slab_add(s);

5342

if (err)

5343

if (err)

5343

printk(KERN_ERR "SLUB: Unable to add boot slab %s"

5344

printk(KERN_ERR "SLUB: Unable to add boot slab %s"

5344

" to sysfs\n", s->name);

5345

" to sysfs\n", s->name);

5345

}

5346

}

5346

5347

while (alias_list) {

5348

while (alias_list) {

5348

struct saved_alias *al = alias_list;

5349

struct saved_alias *al = alias_list;

5349

5350

alias_list = alias_list->next;

5351

alias_list = alias_list->next;

5351

err = sysfs_slab_alias(al->s, al->name);

5352

err = sysfs_slab_alias(al->s, al->name);

5352

if (err)

5353

if (err)

5353

printk(KERN_ERR "SLUB: Unable to add boot slab alias"

5354

printk(KERN_ERR "SLUB: Unable to add boot slab alias"

5354

" %s to sysfs\n", s->name);

5355

" %s to sysfs\n", s->name);

5355

kfree(al);

5356

kfree(al);

5356

}

5357

}

5357

5358

up_write(&slub_lock);

5359

up_write(&slub_lock);

5359

resiliency_test();

5360

resiliency_test();

5360

return 0;

5361

return 0;

5361

}

5362

}

5362

5363

__initcall(slab_sysfs_init);

5364

__initcall(slab_sysfs_init);

5364

#endif /* CONFIG_SYSFS */

5365

#endif /* CONFIG_SYSFS */

5365

5366

/*

5367

/*

5367

* The /proc/slabinfo ABI

5368

* The /proc/slabinfo ABI

5368

*/

5369

*/

5369

#ifdef CONFIG_SLABINFO

5370

#ifdef CONFIG_SLABINFO

5370

static void print_slabinfo_header(struct seq_file *m)

5371

static void print_slabinfo_header(struct seq_file *m)

5371

{

5372

{

5372

seq_puts(m, "slabinfo - version: 2.1\n");

5373

seq_puts(m, "slabinfo - version: 2.1\n");

5373

seq_puts(m, "# name <active_objs> <num_objs> <objsize> "

5374

seq_puts(m, "# name <active_objs> <num_objs> <objsize> "

5374

"<objperslab> <pagesperslab>");

5375

"<objperslab> <pagesperslab>");

5375

seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");

5376

seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");

5376

seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");

5377

seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");

5377

seq_putc(m, '\n');

5378

seq_putc(m, '\n');

5378

}

5379

}

5379

5380

static void *s_start(struct seq_file *m, loff_t *pos)

5381

static void *s_start(struct seq_file *m, loff_t *pos)

5381

{

5382

{

5382

loff_t n = *pos;

5383

loff_t n = *pos;

5383

5384

down_read(&slub_lock);

5385

down_read(&slub_lock);

5385

if (!n)

5386

if (!n)

5386

print_slabinfo_header(m);

5387

print_slabinfo_header(m);

5387

5388

return seq_list_start(&slab_caches, *pos);

5389

return seq_list_start(&slab_caches, *pos);

5389

}

5390

}

5390

5391

static void *s_next(struct seq_file *m, void *p, loff_t *pos)

5392

static void *s_next(struct seq_file *m, void *p, loff_t *pos)

5392

{

5393

{

5393

return seq_list_next(p, &slab_caches, pos);

5394

return seq_list_next(p, &slab_caches, pos);

5394

}

5395

}

5395

5396

static void s_stop(struct seq_file *m, void *p)

5397

static void s_stop(struct seq_file *m, void *p)

5397

{

5398

{

5398

up_read(&slub_lock);

5399

up_read(&slub_lock);

5399

}

5400

}

5400

5401

static int s_show(struct seq_file *m, void *p)

5402

static int s_show(struct seq_file *m, void *p)

5402

{

5403

{

5403

unsigned long nr_partials = 0;

5404

unsigned long nr_partials = 0;

5404

unsigned long nr_slabs = 0;

5405

unsigned long nr_slabs = 0;

5405

unsigned long nr_inuse = 0;

5406

unsigned long nr_inuse = 0;

5406

unsigned long nr_objs = 0;

5407

unsigned long nr_objs = 0;

5407

unsigned long nr_free = 0;

5408

unsigned long nr_free = 0;

5408

struct kmem_cache *s;

5409

struct kmem_cache *s;

5409

int node;

5410

int node;

5410

5411

s = list_entry(p, struct kmem_cache, list);

5412

s = list_entry(p, struct kmem_cache, list);

5412

5413

for_each_online_node(node) {

5414

for_each_online_node(node) {

5414

struct kmem_cache_node *n = get_node(s, node);

5415

struct kmem_cache_node *n = get_node(s, node);

5415

5416

if (!n)

5417

if (!n)

5417

continue;

5418

continue;

5418

5419

nr_partials += n->nr_partial;

5420

nr_partials += n->nr_partial;

5420

nr_slabs += atomic_long_read(&n->nr_slabs);

5421

nr_slabs += atomic_long_read(&n->nr_slabs);

5421

nr_objs += atomic_long_read(&n->total_objects);

5422

nr_objs += atomic_long_read(&n->total_objects);

5422

nr_free += count_partial(n, count_free);

5423

nr_free += count_partial(n, count_free);

5423

}

5424

}

5424

5425

nr_inuse = nr_objs - nr_free;

5426

nr_inuse = nr_objs - nr_free;

5426

5427

seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,

5428

seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,

5428

nr_objs, s->size, oo_objects(s->oo),

5429

nr_objs, s->size, oo_objects(s->oo),

5429

(1 << oo_order(s->oo)));

5430

(1 << oo_order(s->oo)));

5430

seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);

5431

seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);

5431

seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,

5432

seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,

5432

0UL);

5433

0UL);

5433

seq_putc(m, '\n');

5434

seq_putc(m, '\n');

5434

return 0;

5435

return 0;

5435

}

5436

}

5436

5437

static const struct seq_operations slabinfo_op = {

5438

static const struct seq_operations slabinfo_op = {

5438

.start = s_start,

5439

.start = s_start,

5439

.next = s_next,

5440

.next = s_next,

5440

.stop = s_stop,

5441

.stop = s_stop,

5441

.show = s_show,

5442

.show = s_show,

5442

};

5443

};

5443

5444

static int slabinfo_open(struct inode *inode, struct file *file)

5445

static int slabinfo_open(struct inode *inode, struct file *file)

5445

{

5446

{

5446

return seq_open(file, &slabinfo_op);

5447

return seq_open(file, &slabinfo_op);

5447

}

5448

}

5448

5449

static const struct file_operations proc_slabinfo_operations = {

5450

static const struct file_operations proc_slabinfo_operations = {

5450

.open = slabinfo_open,

5451

.open = slabinfo_open,

5451

.read = seq_read,

5452

.read = seq_read,

5452

.llseek = seq_lseek,

5453

.llseek = seq_lseek,

5453

.release = seq_release,

5454

.release = seq_release,

5454

};

5455

};

5455

5456

static int __init slab_proc_init(void)

5457

static int __init slab_proc_init(void)

5457

{

5458

{

5458

proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);

5459

proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);

5459

return 0;

5460

return 0;

5460

}

5461

}

5461

module_init(slab_proc_init);

5462

module_init(slab_proc_init);

5462

#endif /* CONFIG_SLABINFO */

5463

#endif /* CONFIG_SLABINFO */

GITLAB

slub: Do not hold slub_lock when calling sysfs_slab_add()

 /*
  * SLUB: A slab allocator that limits cache line use instead of queuing
  * objects in per cpu and per node lists.
  *
  * The allocator synchronizes using per slab locks or atomic operatios
  * and only uses a centralized lock to manage a pool of partial slabs.
  *
  * (C) 2007 SGI, Christoph Lameter
  * (C) 2011 Linux Foundation, Christoph Lameter
  */
 #include <linux/mm.h>
 #include <linux/swap.h> /* struct reclaim_state */
 #include <linux/module.h>
 #include <linux/bit_spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/kmemcheck.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/mempolicy.h>
 #include <linux/ctype.h>
 #include <linux/debugobjects.h>
 #include <linux/kallsyms.h>
 #include <linux/memory.h>
 #include <linux/math64.h>
 #include <linux/fault-inject.h>
 #include <linux/stacktrace.h>
 #include <trace/events/kmem.h>
 /*
  * Lock order:
  *   1. slub_lock (Global Semaphore)
  *   2. node->list_lock
  *   3. slab_lock(page) (Only on some arches and for debugging)
  *
  *   slub_lock
  *
  *   The role of the slub_lock is to protect the list of all the slabs
  *   and to synchronize major metadata changes to slab cache structures.
  *
  *   The slab_lock is only used for debugging and on arches that do not
  *   have the ability to do a cmpxchg_double. It only protects the second
  *   double word in the page struct. Meaning
  *	A. page->freelist	-> List of object free in a page
  *	B. page->counters	-> Counters of objects
  *	C. page->frozen		-> frozen state
  *
  *   If a slab is frozen then it is exempt from list management. It is not
  *   on any list. The processor that froze the slab is the one who can
  *   perform list operations on the page. Other processors may put objects
  *   onto the freelist but the processor that froze the slab is the only
  *   one that can retrieve the objects from the page's freelist.
  *
  *   The list_lock protects the partial and full list on each node and
  *   the partial slab counter. If taken then no new slabs may be added or
  *   removed from the lists nor make the number of partial slabs be modified.
  *   (Note that the total number of slabs is an atomic value that may be
  *   modified without taking the list lock).
  *
  *   The list_lock is a centralized lock and thus we avoid taking it as
  *   much as possible. As long as SLUB does not have to handle partial
  *   slabs, operations can continue without any centralized lock. F.e.
  *   allocating a long series of objects that fill up slabs does not require
  *   the list lock.
  *   Interrupts are disabled during allocation and deallocation in order to
  *   make the slab allocator safe to use in the context of an irq. In addition
  *   interrupts are disabled to ensure that the processor does not change
  *   while handling per_cpu slabs, due to kernel preemption.
  *
  * SLUB assigns one slab for allocation to each processor.
  * Allocations only occur from these slabs called cpu slabs.
  *
  * Slabs with free elements are kept on a partial list and during regular
  * operations no list for full slabs is used. If an object in a full slab is
  * freed then the slab will show up again on the partial lists.
  * We track full slabs for debugging purposes though because otherwise we
  * cannot scan all objects.
  *
  * Slabs are freed when they become empty. Teardown and setup is
  * minimal so we rely on the page allocators per cpu caches for
  * fast frees and allocs.
  *
  * Overloading of page flags that are otherwise used for LRU management.
  *
  * PageActive 		The slab is frozen and exempt from list processing.
  * 			This means that the slab is dedicated to a purpose
  * 			such as satisfying allocations for a specific
  * 			processor. Objects may be freed in the slab while
  * 			it is frozen but slab_free will then skip the usual
  * 			list operations. It is up to the processor holding
  * 			the slab to integrate the slab into the slab lists
  * 			when the slab is no longer needed.
  *
  * 			One use of this flag is to mark slabs that are
  * 			used for allocations. Then such a slab becomes a cpu
  * 			slab. The cpu slab may be equipped with an additional
  * 			freelist that allows lockless access to
  * 			free objects in addition to the regular freelist
  * 			that requires the slab lock.
  *
  * PageError		Slab requires special handling due to debug
  * 			options set. This moves	slab handling out of
  * 			the fast path and disables lockless freelists.
  */
 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
 		SLAB_TRACE | SLAB_DEBUG_FREE)
 static inline int kmem_cache_debug(struct kmem_cache *s)
 {
 #ifdef CONFIG_SLUB_DEBUG
 	return unlikely(s->flags & SLAB_DEBUG_FLAGS);
 #else
 	return 0;
 #endif
 }
 /*
  * Issues still to be resolved:
  *
  * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
  *
  * - Variable sizing of the per node arrays
  */
 /* Enable to test recovery from slab corruption on boot */
 #undef SLUB_RESILIENCY_TEST
 /* Enable to log cmpxchg failures */
 #undef SLUB_DEBUG_CMPXCHG
 /*
  * Mininum number of partial slabs. These will be left on the partial
  * lists even if they are empty. kmem_cache_shrink may reclaim them.
  */
 #define MIN_PARTIAL 5
 /*
  * Maximum number of desirable partial slabs.
  * The existence of more partial slabs makes kmem_cache_shrink
  * sort the partial list by the number of objects in the.
  */
 #define MAX_PARTIAL 10
 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
 				SLAB_POISON | SLAB_STORE_USER)
 /*
  * Debugging flags that require metadata to be stored in the slab.  These get
  * disabled when slub_debug=O is used and a cache's min order increases with
  * metadata.
  */
 #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
 /*
  * Set of flags that will prevent slab merging
  */
 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
 		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
 		SLAB_FAILSLAB)
 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
 		SLAB_CACHE_DMA | SLAB_NOTRACK)
 #define OO_SHIFT	16
 #define OO_MASK		((1 << OO_SHIFT) - 1)
 #define MAX_OBJS_PER_PAGE	32767 /* since page.objects is u15 */
 /* Internal SLUB flags */
 #define __OBJECT_POISON		0x80000000UL /* Poison object */
 #define __CMPXCHG_DOUBLE	0x40000000UL /* Use cmpxchg_double */
 static int kmem_size = sizeof(struct kmem_cache);
 #ifdef CONFIG_SMP
 static struct notifier_block slab_notifier;
 #endif
 static enum {
 	DOWN,		/* No slab functionality available */
 	PARTIAL,	/* Kmem_cache_node works */
 	UP,		/* Everything works but does not show up in sysfs */
 	SYSFS		/* Sysfs up */
 } slab_state = DOWN;
 /* A list of all slab caches on the system */
 static DECLARE_RWSEM(slub_lock);
 static LIST_HEAD(slab_caches);
 /*
  * Tracking user of a slab.
  */
 #define TRACK_ADDRS_COUNT 16
 struct track {
 	unsigned long addr;	/* Called from address */
 #ifdef CONFIG_STACKTRACE
 	unsigned long addrs[TRACK_ADDRS_COUNT];	/* Called from address */
 #endif
 	int cpu;		/* Was running on cpu */
 	int pid;		/* Pid context */
 	unsigned long when;	/* When did the operation occur */
 };
 enum track_item { TRACK_ALLOC, TRACK_FREE };
 #ifdef CONFIG_SYSFS
 static int sysfs_slab_add(struct kmem_cache *);
 static int sysfs_slab_alias(struct kmem_cache *, const char *);
 static void sysfs_slab_remove(struct kmem_cache *);
 #else
 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
 							{ return 0; }
 static inline void sysfs_slab_remove(struct kmem_cache *s)
 {
 	kfree(s->name);
 	kfree(s);
 }
 #endif
 static inline void stat(const struct kmem_cache *s, enum stat_item si)
 {
 #ifdef CONFIG_SLUB_STATS
 	__this_cpu_inc(s->cpu_slab->stat[si]);
 #endif
 }
 /********************************************************************
  * 			Core slab cache functions
  *******************************************************************/
 int slab_is_available(void)
 {
 	return slab_state >= UP;
 }
 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
 {
 	return s->node[node];
 }
 /* Verify that a pointer has an address that is valid within a slab page */
 static inline int check_valid_pointer(struct kmem_cache *s,
 				struct page *page, const void *object)
 {
 	void *base;
 	if (!object)
 		return 1;
 	base = page_address(page);
 	if (object < base || object >= base + page->objects * s->size ||
 		(object - base) % s->size) {
 		return 0;
 	}
 	return 1;
 }
 static inline void *get_freepointer(struct kmem_cache *s, void *object)
 {
 	return *(void **)(object + s->offset);
 }
 static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
 {
 	void *p;
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
 #else
 	p = get_freepointer(s, object);
 #endif
 	return p;
 }
 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
 {
 	*(void **)(object + s->offset) = fp;
 }
 /* Loop over all objects in a slab */
 #define for_each_object(__p, __s, __addr, __objects) \
 	for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
 			__p += (__s)->size)
 /* Determine object index from a given position */
 static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
 {
 	return (p - addr) / s->size;
 }
 static inline size_t slab_ksize(const struct kmem_cache *s)
 {
 #ifdef CONFIG_SLUB_DEBUG
 	/*
 	 * Debugging requires use of the padding between object
 	 * and whatever may come after it.
 	 */
 	if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
 		return s->objsize;
 #endif
 	/*
 	 * If we have the need to store the freelist pointer
 	 * back there or track user information then we can
 	 * only use the space before that information.
 	 */
 	if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
 		return s->inuse;
 	/*
 	 * Else we can use all the padding etc for the allocation
 	 */
 	return s->size;
 }
 static inline int order_objects(int order, unsigned long size, int reserved)
 {
 	return ((PAGE_SIZE << order) - reserved) / size;
 }
 static inline struct kmem_cache_order_objects oo_make(int order,
 		unsigned long size, int reserved)
 {
 	struct kmem_cache_order_objects x = {
 		(order << OO_SHIFT) + order_objects(order, size, reserved)
 	};
 	return x;
 }
 static inline int oo_order(struct kmem_cache_order_objects x)
 {
 	return x.x >> OO_SHIFT;
 }
 static inline int oo_objects(struct kmem_cache_order_objects x)
 {
 	return x.x & OO_MASK;
 }
 /*
  * Per slab locking using the pagelock
  */
 static __always_inline void slab_lock(struct page *page)
 {
 	bit_spin_lock(PG_locked, &page->flags);
 }
 static __always_inline void slab_unlock(struct page *page)
 {
 	__bit_spin_unlock(PG_locked, &page->flags);
 }
 /* Interrupts must be disabled (for the fallback code to work right) */
 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 		void *freelist_old, unsigned long counters_old,
 		void *freelist_new, unsigned long counters_new,
 		const char *n)
 {
 	VM_BUG_ON(!irqs_disabled());
 #ifdef CONFIG_CMPXCHG_DOUBLE
 	if (s->flags & __CMPXCHG_DOUBLE) {
 		if (cmpxchg_double(&page->freelist,
 			freelist_old, counters_old,
 			freelist_new, counters_new))
 		return 1;
 	} else
 #endif
 	{
 		slab_lock(page);
 		if (page->freelist == freelist_old && page->counters == counters_old) {
 			page->freelist = freelist_new;
 			page->counters = counters_new;
 			slab_unlock(page);
 			return 1;
 		}
 		slab_unlock(page);
 	}
 	cpu_relax();
 	stat(s, CMPXCHG_DOUBLE_FAIL);
 #ifdef SLUB_DEBUG_CMPXCHG
 	printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
 #endif
 	return 0;
 }
 static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 		void *freelist_old, unsigned long counters_old,
 		void *freelist_new, unsigned long counters_new,
 		const char *n)
 {
 #ifdef CONFIG_CMPXCHG_DOUBLE
 	if (s->flags & __CMPXCHG_DOUBLE) {
 		if (cmpxchg_double(&page->freelist,
 			freelist_old, counters_old,
 			freelist_new, counters_new))
 		return 1;
 	} else
 #endif
 	{
 		unsigned long flags;
 		local_irq_save(flags);
 		slab_lock(page);
 		if (page->freelist == freelist_old && page->counters == counters_old) {
 			page->freelist = freelist_new;
 			page->counters = counters_new;
 			slab_unlock(page);
 			local_irq_restore(flags);
 			return 1;
 		}
 		slab_unlock(page);
 		local_irq_restore(flags);
 	}
 	cpu_relax();
 	stat(s, CMPXCHG_DOUBLE_FAIL);
 #ifdef SLUB_DEBUG_CMPXCHG
 	printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
 #endif
 	return 0;
 }
 #ifdef CONFIG_SLUB_DEBUG
 /*
  * Determine a map of object in use on a page.
  *
  * Node listlock must be held to guarantee that the page does
  * not vanish from under us.
  */
 static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
 {
 	void *p;
 	void *addr = page_address(page);
 	for (p = page->freelist; p; p = get_freepointer(s, p))
 		set_bit(slab_index(p, s, addr), map);
 }
 /*
  * Debug settings:
  */
 #ifdef CONFIG_SLUB_DEBUG_ON
 static int slub_debug = DEBUG_DEFAULT_FLAGS;
 #else
 static int slub_debug;
 #endif
 static char *slub_debug_slabs;
 static int disable_higher_order_debug;
 /*
  * Object debugging
  */
 static void print_section(char *text, u8 *addr, unsigned int length)
 {
 	print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
 			length, 1);
 }
 static struct track *get_track(struct kmem_cache *s, void *object,
 	enum track_item alloc)
 {
 	struct track *p;
 	if (s->offset)
 		p = object + s->offset + sizeof(void *);
 	else
 		p = object + s->inuse;
 	return p + alloc;
 }
 static void set_track(struct kmem_cache *s, void *object,
 			enum track_item alloc, unsigned long addr)
 {
 	struct track *p = get_track(s, object, alloc);
 	if (addr) {
 #ifdef CONFIG_STACKTRACE
 		struct stack_trace trace;
 		int i;
 		trace.nr_entries = 0;
 		trace.max_entries = TRACK_ADDRS_COUNT;
 		trace.entries = p->addrs;
 		trace.skip = 3;
 		save_stack_trace(&trace);
 		/* See rant in lockdep.c */
 		if (trace.nr_entries != 0 &&
 		    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
 			trace.nr_entries--;
 		for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
 			p->addrs[i] = 0;
 #endif
 		p->addr = addr;
 		p->cpu = smp_processor_id();
 		p->pid = current->pid;
 		p->when = jiffies;
 	} else
 		memset(p, 0, sizeof(struct track));
 }
 static void init_tracking(struct kmem_cache *s, void *object)
 {
 	if (!(s->flags & SLAB_STORE_USER))
 		return;
 	set_track(s, object, TRACK_FREE, 0UL);
 	set_track(s, object, TRACK_ALLOC, 0UL);
 }
 static void print_track(const char *s, struct track *t)
 {
 	if (!t->addr)
 		return;
 	printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
 		s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
 #ifdef CONFIG_STACKTRACE
 	{
 		int i;
 		for (i = 0; i < TRACK_ADDRS_COUNT; i++)
 			if (t->addrs[i])
 				printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
 			else
 				break;
 	}
 #endif
 }
 static void print_tracking(struct kmem_cache *s, void *object)
 {
 	if (!(s->flags & SLAB_STORE_USER))
 		return;
 	print_track("Allocated", get_track(s, object, TRACK_ALLOC));
 	print_track("Freed", get_track(s, object, TRACK_FREE));
 }
 static void print_page_info(struct page *page)
 {
 	printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
 		page, page->objects, page->inuse, page->freelist, page->flags);
 }
 static void slab_bug(struct kmem_cache *s, char *fmt, ...)
 {
 	va_list args;
 	char buf[100];
 	va_start(args, fmt);
 	vsnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 	printk(KERN_ERR "========================================"
 			"=====================================\n");
 	printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
 	printk(KERN_ERR "----------------------------------------"
 			"-------------------------------------\n\n");
 }
 static void slab_fix(struct kmem_cache *s, char *fmt, ...)
 {
 	va_list args;
 	char buf[100];
 	va_start(args, fmt);
 	vsnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 	printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
 }
 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 {
 	unsigned int off;	/* Offset of last byte */
 	u8 *addr = page_address(page);
 	print_tracking(s, p);
 	print_page_info(page);
 	printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
 			p, p - addr, get_freepointer(s, p));
 	if (p > addr + 16)
 		print_section("Bytes b4 ", p - 16, 16);
 	print_section("Object ", p, min_t(unsigned long, s->objsize,
 				PAGE_SIZE));
 	if (s->flags & SLAB_RED_ZONE)
 		print_section("Redzone ", p + s->objsize,
 			s->inuse - s->objsize);
 	if (s->offset)
 		off = s->offset + sizeof(void *);
 	else
 		off = s->inuse;
 	if (s->flags & SLAB_STORE_USER)
 		off += 2 * sizeof(struct track);
 	if (off != s->size)
 		/* Beginning of the filler is the free pointer */
 		print_section("Padding ", p + off, s->size - off);
 	dump_stack();
 }
 static void object_err(struct kmem_cache *s, struct page *page,
 			u8 *object, char *reason)
 {
 	slab_bug(s, "%s", reason);
 	print_trailer(s, page, object);
 }
 static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
 {
 	va_list args;
 	char buf[100];
 	va_start(args, fmt);
 	vsnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 	slab_bug(s, "%s", buf);
 	print_page_info(page);
 	dump_stack();
 }
 static void init_object(struct kmem_cache *s, void *object, u8 val)
 {
 	u8 *p = object;
 	if (s->flags & __OBJECT_POISON) {
 		memset(p, POISON_FREE, s->objsize - 1);
 		p[s->objsize - 1] = POISON_END;
 	}
 	if (s->flags & SLAB_RED_ZONE)
 		memset(p + s->objsize, val, s->inuse - s->objsize);
 }
 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
 						void *from, void *to)
 {
 	slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
 	memset(from, data, to - from);
 }
 static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
 			u8 *object, char *what,
 			u8 *start, unsigned int value, unsigned int bytes)
 {
 	u8 *fault;
 	u8 *end;
 	fault = memchr_inv(start, value, bytes);
 	if (!fault)
 		return 1;
 	end = start + bytes;
 	while (end > fault && end[-1] == value)
 		end--;
 	slab_bug(s, "%s overwritten", what);
 	printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
 					fault, end - 1, fault[0], value);
 	print_trailer(s, page, object);
 	restore_bytes(s, what, value, fault, end);
 	return 0;
 }
 /*
  * Object layout:
  *
  * object address
  * 	Bytes of the object to be managed.
  * 	If the freepointer may overlay the object then the free
  * 	pointer is the first word of the object.
  *
  * 	Poisoning uses 0x6b (POISON_FREE) and the last byte is
  * 	0xa5 (POISON_END)
  *
  * object + s->objsize
  * 	Padding to reach word boundary. This is also used for Redzoning.
  * 	Padding is extended by another word if Redzoning is enabled and
  * 	objsize == inuse.
  *
  * 	We fill with 0xbb (RED_INACTIVE) for inactive objects and with
  * 	0xcc (RED_ACTIVE) for objects in use.
  *
  * object + s->inuse
  * 	Meta data starts here.
  *
  * 	A. Free pointer (if we cannot overwrite object on free)
  * 	B. Tracking data for SLAB_STORE_USER
  * 	C. Padding to reach required alignment boundary or at mininum
  * 		one word if debugging is on to be able to detect writes
  * 		before the word boundary.
  *
  *	Padding is done using 0x5a (POISON_INUSE)
  *
  * object + s->size
  * 	Nothing is used beyond s->size.
  *
  * If slabcaches are merged then the objsize and inuse boundaries are mostly
  * ignored. And therefore no slab options that rely on these boundaries
  * may be used with merged slabcaches.
  */
 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
 {
 	unsigned long off = s->inuse;	/* The end of info */
 	if (s->offset)
 		/* Freepointer is placed after the object. */
 		off += sizeof(void *);
 	if (s->flags & SLAB_STORE_USER)
 		/* We also have user information there */
 		off += 2 * sizeof(struct track);
 	if (s->size == off)
 		return 1;
 	return check_bytes_and_report(s, page, p, "Object padding",
 				p + off, POISON_INUSE, s->size - off);
 }
 /* Check the pad bytes at the end of a slab page */
 static int slab_pad_check(struct kmem_cache *s, struct page *page)
 {
 	u8 *start;
 	u8 *fault;
 	u8 *end;
 	int length;
 	int remainder;
 	if (!(s->flags & SLAB_POISON))
 		return 1;
 	start = page_address(page);
 	length = (PAGE_SIZE << compound_order(page)) - s->reserved;
 	end = start + length;
 	remainder = length % s->size;
 	if (!remainder)
 		return 1;
 	fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
 	if (!fault)
 		return 1;
 	while (end > fault && end[-1] == POISON_INUSE)
 		end--;
 	slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
 	print_section("Padding ", end - remainder, remainder);
 	restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
 	return 0;
 }
 static int check_object(struct kmem_cache *s, struct page *page,
 					void *object, u8 val)
 {
 	u8 *p = object;
 	u8 *endobject = object + s->objsize;
 	if (s->flags & SLAB_RED_ZONE) {
 		if (!check_bytes_and_report(s, page, object, "Redzone",
 			endobject, val, s->inuse - s->objsize))
 			return 0;
 	} else {
 		if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
 			check_bytes_and_report(s, page, p, "Alignment padding",
 				endobject, POISON_INUSE, s->inuse - s->objsize);
 		}
 	}
 	if (s->flags & SLAB_POISON) {
 		if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
 			(!check_bytes_and_report(s, page, p, "Poison", p,
 					POISON_FREE, s->objsize - 1) ||
 			 !check_bytes_and_report(s, page, p, "Poison",
 				p + s->objsize - 1, POISON_END, 1)))
 			return 0;
 		/*
 		 * check_pad_bytes cleans up on its own.
 		 */
 		check_pad_bytes(s, page, p);
 	}
 	if (!s->offset && val == SLUB_RED_ACTIVE)
 		/*
 		 * Object and freepointer overlap. Cannot check
 		 * freepointer while object is allocated.
 		 */
 		return 1;
 	/* Check free pointer validity */
 	if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
 		object_err(s, page, p, "Freepointer corrupt");
 		/*
 		 * No choice but to zap it and thus lose the remainder
 		 * of the free objects in this slab. May cause
 		 * another error because the object count is now wrong.
 		 */
 		set_freepointer(s, p, NULL);
 		return 0;
 	}
 	return 1;
 }
 static int check_slab(struct kmem_cache *s, struct page *page)
 {
 	int maxobj;
 	VM_BUG_ON(!irqs_disabled());
 	if (!PageSlab(page)) {
 		slab_err(s, page, "Not a valid slab page");
 		return 0;
 	}
 	maxobj = order_objects(compound_order(page), s->size, s->reserved);
 	if (page->objects > maxobj) {
 		slab_err(s, page, "objects %u > max %u",
 			s->name, page->objects, maxobj);
 		return 0;
 	}
 	if (page->inuse > page->objects) {
 		slab_err(s, page, "inuse %u > max %u",
 			s->name, page->inuse, page->objects);
 		return 0;
 	}
 	/* Slab_pad_check fixes things up after itself */
 	slab_pad_check(s, page);
 	return 1;
 }
 /*
  * Determine if a certain object on a page is on the freelist. Must hold the
  * slab lock to guarantee that the chains are in a consistent state.
  */
 static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
 {
 	int nr = 0;
 	void *fp;
 	void *object = NULL;
 	unsigned long max_objects;
 	fp = page->freelist;
 	while (fp && nr <= page->objects) {
 		if (fp == search)
 			return 1;
 		if (!check_valid_pointer(s, page, fp)) {
 			if (object) {
 				object_err(s, page, object,
 					"Freechain corrupt");
 				set_freepointer(s, object, NULL);
 				break;
 			} else {
 				slab_err(s, page, "Freepointer corrupt");
 				page->freelist = NULL;
 				page->inuse = page->objects;
 				slab_fix(s, "Freelist cleared");
 				return 0;
 			}
 			break;
 		}
 		object = fp;
 		fp = get_freepointer(s, object);
 		nr++;
 	}
 	max_objects = order_objects(compound_order(page), s->size, s->reserved);
 	if (max_objects > MAX_OBJS_PER_PAGE)
 		max_objects = MAX_OBJS_PER_PAGE;
 	if (page->objects != max_objects) {
 		slab_err(s, page, "Wrong number of objects. Found %d but "
 			"should be %d", page->objects, max_objects);
 		page->objects = max_objects;
 		slab_fix(s, "Number of objects adjusted.");
 	}
 	if (page->inuse != page->objects - nr) {
 		slab_err(s, page, "Wrong object count. Counter is %d but "
 			"counted were %d", page->inuse, page->objects - nr);
 		page->inuse = page->objects - nr;
 		slab_fix(s, "Object count adjusted.");
 	}
 	return search == NULL;
 }
 static void trace(struct kmem_cache *s, struct page *page, void *object,
 								int alloc)
 {
 	if (s->flags & SLAB_TRACE) {
 		printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
 			s->name,
 			alloc ? "alloc" : "free",
 			object, page->inuse,
 			page->freelist);
 		if (!alloc)
 			print_section("Object ", (void *)object, s->objsize);
 		dump_stack();
 	}
 }
 /*
  * Hooks for other subsystems that check memory allocations. In a typical
  * production configuration these hooks all should produce no code at all.
  */
 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 {
 	flags &= gfp_allowed_mask;
 	lockdep_trace_alloc(flags);
 	might_sleep_if(flags & __GFP_WAIT);
 	return should_failslab(s->objsize, flags, s->flags);
 }
 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
 {
 	flags &= gfp_allowed_mask;
 	kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
 	kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
 }
 static inline void slab_free_hook(struct kmem_cache *s, void *x)
 {
 	kmemleak_free_recursive(x, s->flags);
 	/*
 	 * Trouble is that we may no longer disable interupts in the fast path
 	 * So in order to make the debug calls that expect irqs to be
 	 * disabled we need to disable interrupts temporarily.
 	 */
 #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
 	{
 		unsigned long flags;
 		local_irq_save(flags);
 		kmemcheck_slab_free(s, x, s->objsize);
 		debug_check_no_locks_freed(x, s->objsize);
 		local_irq_restore(flags);
 	}
 #endif
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
 		debug_check_no_obj_freed(x, s->objsize);
 }
 /*
  * Tracking of fully allocated slabs for debugging purposes.
  *
  * list_lock must be held.
  */
 static void add_full(struct kmem_cache *s,
 	struct kmem_cache_node *n, struct page *page)
 {
 	if (!(s->flags & SLAB_STORE_USER))
 		return;
 	list_add(&page->lru, &n->full);
 }
 /*
  * list_lock must be held.
  */
 static void remove_full(struct kmem_cache *s, struct page *page)
 {
 	if (!(s->flags & SLAB_STORE_USER))
 		return;
 	list_del(&page->lru);
 }
 /* Tracking of the number of slabs for debugging purposes */
 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
 {
 	struct kmem_cache_node *n = get_node(s, node);
 	return atomic_long_read(&n->nr_slabs);
 }
 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
 {
 	return atomic_long_read(&n->nr_slabs);
 }
 static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
 {
 	struct kmem_cache_node *n = get_node(s, node);
 	/*
 	 * May be called early in order to allocate a slab for the
 	 * kmem_cache_node structure. Solve the chicken-egg
 	 * dilemma by deferring the increment of the count during
 	 * bootstrap (see early_kmem_cache_node_alloc).
 	 */
 	if (n) {
 		atomic_long_inc(&n->nr_slabs);
 		atomic_long_add(objects, &n->total_objects);
 	}
 }
 static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
 {
 	struct kmem_cache_node *n = get_node(s, node);
 	atomic_long_dec(&n->nr_slabs);
 	atomic_long_sub(objects, &n->total_objects);
 }
 /* Object debug checks for alloc/free paths */
 static void setup_object_debug(struct kmem_cache *s, struct page *page,
 								void *object)
 {
 	if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
 		return;
 	init_object(s, object, SLUB_RED_INACTIVE);
 	init_tracking(s, object);
 }
 static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
 					void *object, unsigned long addr)
 {
 	if (!check_slab(s, page))
 		goto bad;
 	if (!check_valid_pointer(s, page, object)) {
 		object_err(s, page, object, "Freelist Pointer check fails");
 		goto bad;
 	}
 	if (!check_object(s, page, object, SLUB_RED_INACTIVE))
 		goto bad;
 	/* Success perform special debug activities for allocs */
 	if (s->flags & SLAB_STORE_USER)
 		set_track(s, object, TRACK_ALLOC, addr);
 	trace(s, page, object, 1);
 	init_object(s, object, SLUB_RED_ACTIVE);
 	return 1;
 bad:
 	if (PageSlab(page)) {
 		/*
 		 * If this is a slab page then lets do the best we can
 		 * to avoid issues in the future. Marking all objects
 		 * as used avoids touching the remaining objects.
 		 */
 		slab_fix(s, "Marking all objects used");
 		page->inuse = page->objects;
 		page->freelist = NULL;
 	}
 	return 0;
 }
 static noinline int free_debug_processing(struct kmem_cache *s,
 		 struct page *page, void *object, unsigned long addr)
 {
 	unsigned long flags;
 	int rc = 0;
 	local_irq_save(flags);
 	slab_lock(page);
 	if (!check_slab(s, page))
 		goto fail;
 	if (!check_valid_pointer(s, page, object)) {
 		slab_err(s, page, "Invalid object pointer 0x%p", object);
 		goto fail;
 	}
 	if (on_freelist(s, page, object)) {
 		object_err(s, page, object, "Object already free");
 		goto fail;
 	}
 	if (!check_object(s, page, object, SLUB_RED_ACTIVE))
 		goto out;
 	if (unlikely(s != page->slab)) {
 		if (!PageSlab(page)) {
 			slab_err(s, page, "Attempt to free object(0x%p) "
 				"outside of slab", object);
 		} else if (!page->slab) {
 			printk(KERN_ERR
 				"SLUB <none>: no slab for object 0x%p.\n",
 						object);
 			dump_stack();
 		} else
 			object_err(s, page, object,
 					"page slab pointer corrupt.");
 		goto fail;
 	}
 	if (s->flags & SLAB_STORE_USER)
 		set_track(s, object, TRACK_FREE, addr);
 	trace(s, page, object, 0);
 	init_object(s, object, SLUB_RED_INACTIVE);
 	rc = 1;
 out:
 	slab_unlock(page);
 	local_irq_restore(flags);
 	return rc;
 fail:
 	slab_fix(s, "Object at 0x%p not freed", object);
 	goto out;
 }
 static int __init setup_slub_debug(char *str)
 {
 	slub_debug = DEBUG_DEFAULT_FLAGS;
 	if (*str++ != '=' || !*str)
 		/*
 		 * No options specified. Switch on full debugging.
 		 */
 		goto out;
 	if (*str == ',')
 		/*
 		 * No options but restriction on slabs. This means full
 		 * debugging for slabs matching a pattern.
 		 */
 		goto check_slabs;
 	if (tolower(*str) == 'o') {
 		/*
 		 * Avoid enabling debugging on caches if its minimum order
 		 * would increase as a result.
 		 */
 		disable_higher_order_debug = 1;
 		goto out;
 	}
 	slub_debug = 0;
 	if (*str == '-')
 		/*
 		 * Switch off all debugging measures.
 		 */
 		goto out;
 	/*
 	 * Determine which debug features should be switched on
 	 */
 	for (; *str && *str != ','; str++) {
 		switch (tolower(*str)) {
 		case 'f':
 			slub_debug |= SLAB_DEBUG_FREE;
 			break;
 		case 'z':
 			slub_debug |= SLAB_RED_ZONE;
 			break;
 		case 'p':
 			slub_debug |= SLAB_POISON;
 			break;
 		case 'u':
 			slub_debug |= SLAB_STORE_USER;
 			break;
 		case 't':
 			slub_debug |= SLAB_TRACE;
 			break;
 		case 'a':
 			slub_debug |= SLAB_FAILSLAB;
 			break;
 		default:
 			printk(KERN_ERR "slub_debug option '%c' "
 				"unknown. skipped\n", *str);
 		}
 	}
 check_slabs:
 	if (*str == ',')
 		slub_debug_slabs = str + 1;
 out:
 	return 1;
 }
 __setup("slub_debug", setup_slub_debug);
 static unsigned long kmem_cache_flags(unsigned long objsize,
 	unsigned long flags, const char *name,
 	void (*ctor)(void *))
 {
 	/*
 	 * Enable debugging if selected on the kernel commandline.
 	 */
 	if (slub_debug && (!slub_debug_slabs ||
 		!strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
 		flags |= slub_debug;
 	return flags;
 }
 #else
 static inline void setup_object_debug(struct kmem_cache *s,
 			struct page *page, void *object) {}
 static inline int alloc_debug_processing(struct kmem_cache *s,
 	struct page *page, void *object, unsigned long addr) { return 0; }
 static inline int free_debug_processing(struct kmem_cache *s,
 	struct page *page, void *object, unsigned long addr) { return 0; }
 static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
 			{ return 1; }
 static inline int check_object(struct kmem_cache *s, struct page *page,
 			void *object, u8 val) { return 1; }
 static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
 					struct page *page) {}
 static inline void remove_full(struct kmem_cache *s, struct page *page) {}
 static inline unsigned long kmem_cache_flags(unsigned long objsize,
 	unsigned long flags, const char *name,
 	void (*ctor)(void *))
 {
 	return flags;
 }
 #define slub_debug 0
 #define disable_higher_order_debug 0
 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
 							{ return 0; }
 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
 							{ return 0; }
 static inline void inc_slabs_node(struct kmem_cache *s, int node,
 							int objects) {}
 static inline void dec_slabs_node(struct kmem_cache *s, int node,
 							int objects) {}
 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 							{ return 0; }
 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
 		void *object) {}
 static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
 #endif /* CONFIG_SLUB_DEBUG */
 /*
  * Slab allocation and freeing
  */
 static inline struct page *alloc_slab_page(gfp_t flags, int node,
 					struct kmem_cache_order_objects oo)
 {
 	int order = oo_order(oo);
 	flags |= __GFP_NOTRACK;
 	if (node == NUMA_NO_NODE)
 		return alloc_pages(flags, order);
 	else
 		return alloc_pages_exact_node(node, flags, order);
 }
 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
 	struct page *page;
 	struct kmem_cache_order_objects oo = s->oo;
 	gfp_t alloc_gfp;
 	flags &= gfp_allowed_mask;
 	if (flags & __GFP_WAIT)
 		local_irq_enable();
 	flags |= s->allocflags;
 	/*
 	 * Let the initial higher-order allocation fail under memory pressure
 	 * so we fall-back to the minimum order allocation.
 	 */
 	alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
 	page = alloc_slab_page(alloc_gfp, node, oo);
 	if (unlikely(!page)) {
 		oo = s->min;
 		/*
 		 * Allocation may have failed due to fragmentation.
 		 * Try a lower order alloc if possible
 		 */
 		page = alloc_slab_page(flags, node, oo);
 		if (page)
 			stat(s, ORDER_FALLBACK);
 	}
 	if (flags & __GFP_WAIT)
 		local_irq_disable();
 	if (!page)
 		return NULL;
 	if (kmemcheck_enabled
 		&& !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
 		int pages = 1 << oo_order(oo);
 		kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
 		/*
 		 * Objects from caches that have a constructor don't get
 		 * cleared when they're allocated, so we need to do it here.
 		 */
 		if (s->ctor)
 			kmemcheck_mark_uninitialized_pages(page, pages);
 		else
 			kmemcheck_mark_unallocated_pages(page, pages);
 	}
 	page->objects = oo_objects(oo);
 	mod_zone_page_state(page_zone(page),
 		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 		1 << oo_order(oo));
 	return page;
 }
 static void setup_object(struct kmem_cache *s, struct page *page,
 				void *object)
 {
 	setup_object_debug(s, page, object);
 	if (unlikely(s->ctor))
 		s->ctor(object);
 }
 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
 	struct page *page;
 	void *start;
 	void *last;
 	void *p;
 	BUG_ON(flags & GFP_SLAB_BUG_MASK);
 	page = allocate_slab(s,
 		flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
 	if (!page)
 		goto out;
 	inc_slabs_node(s, page_to_nid(page), page->objects);
 	page->slab = s;
 	page->flags |= 1 << PG_slab;
 	start = page_address(page);
 	if (unlikely(s->flags & SLAB_POISON))
 		memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
 	last = start;
 	for_each_object(p, s, start, page->objects) {
 		setup_object(s, page, last);
 		set_freepointer(s, last, p);
 		last = p;
 	}
 	setup_object(s, page, last);
 	set_freepointer(s, last, NULL);
 	page->freelist = start;
 	page->inuse = page->objects;
 	page->frozen = 1;
 out:
 	return page;
 }
 static void __free_slab(struct kmem_cache *s, struct page *page)
 {
 	int order = compound_order(page);
 	int pages = 1 << order;
 	if (kmem_cache_debug(s)) {
 		void *p;
 		slab_pad_check(s, page);
 		for_each_object(p, s, page_address(page),
 						page->objects)
 			check_object(s, page, p, SLUB_RED_INACTIVE);
 	}
 	kmemcheck_free_shadow(page, compound_order(page));
 	mod_zone_page_state(page_zone(page),
 		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 		-pages);
 	__ClearPageSlab(page);
 	reset_page_mapcount(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
 	__free_pages(page, order);
 }
 #define need_reserve_slab_rcu						\
 	(sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
 static void rcu_free_slab(struct rcu_head *h)
 {
 	struct page *page;
 	if (need_reserve_slab_rcu)
 		page = virt_to_head_page(h);
 	else
 		page = container_of((struct list_head *)h, struct page, lru);
 	__free_slab(page->slab, page);
 }
 static void free_slab(struct kmem_cache *s, struct page *page)
 {
 	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
 		struct rcu_head *head;
 		if (need_reserve_slab_rcu) {
 			int order = compound_order(page);
 			int offset = (PAGE_SIZE << order) - s->reserved;
 			VM_BUG_ON(s->reserved != sizeof(*head));
 			head = page_address(page) + offset;
 		} else {
 			/*
 			 * RCU free overloads the RCU head over the LRU
 			 */
 			head = (void *)&page->lru;
 		}
 		call_rcu(head, rcu_free_slab);
 	} else
 		__free_slab(s, page);
 }
 static void discard_slab(struct kmem_cache *s, struct page *page)
 {
 	dec_slabs_node(s, page_to_nid(page), page->objects);
 	free_slab(s, page);
 }
 /*
  * Management of partially allocated slabs.
  *
  * list_lock must be held.
  */
 static inline void add_partial(struct kmem_cache_node *n,
 				struct page *page, int tail)
 {
 	n->nr_partial++;
 	if (tail == DEACTIVATE_TO_TAIL)
 		list_add_tail(&page->lru, &n->partial);
 	else
 		list_add(&page->lru, &n->partial);
 }
 /*
  * list_lock must be held.
  */
 static inline void remove_partial(struct kmem_cache_node *n,
 					struct page *page)
 {
 	list_del(&page->lru);
 	n->nr_partial--;
 }
 /*
  * Lock slab, remove from the partial list and put the object into the
  * per cpu freelist.
  *
  * Returns a list of objects or NULL if it fails.
  *
  * Must hold list_lock.
  */
 static inline void *acquire_slab(struct kmem_cache *s,
 		struct kmem_cache_node *n, struct page *page,
 		int mode)
 {
 	void *freelist;
 	unsigned long counters;
 	struct page new;
 	/*
 	 * Zap the freelist and set the frozen bit.
 	 * The old freelist is the list of objects for the
 	 * per cpu allocation list.
 	 */
 	do {
 		freelist = page->freelist;
 		counters = page->counters;
 		new.counters = counters;
 		if (mode)
 			new.inuse = page->objects;
 		VM_BUG_ON(new.frozen);
 		new.frozen = 1;
 	} while (!__cmpxchg_double_slab(s, page,
 			freelist, counters,
 			NULL, new.counters,
 			"lock and freeze"));
 	remove_partial(n, page);
 	return freelist;
 }
 static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
 /*
  * Try to allocate a partial slab from a specific node.
  */
 static void *get_partial_node(struct kmem_cache *s,
 		struct kmem_cache_node *n, struct kmem_cache_cpu *c)
 {
 	struct page *page, *page2;
 	void *object = NULL;
 	/*
 	 * Racy check. If we mistakenly see no partial slabs then we
 	 * just allocate an empty slab. If we mistakenly try to get a
 	 * partial slab and there is none available then get_partials()
 	 * will return NULL.
 	 */
 	if (!n || !n->nr_partial)
 		return NULL;
 	spin_lock(&n->list_lock);
 	list_for_each_entry_safe(page, page2, &n->partial, lru) {
 		void *t = acquire_slab(s, n, page, object == NULL);
 		int available;
 		if (!t)
 			break;
 		if (!object) {
 			c->page = page;
 			c->node = page_to_nid(page);
 			stat(s, ALLOC_FROM_PARTIAL);
 			object = t;
 			available =  page->objects - page->inuse;
 		} else {
 			page->freelist = t;
 			available = put_cpu_partial(s, page, 0);
 		}
 		if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
 			break;
 	}
 	spin_unlock(&n->list_lock);
 	return object;
 }
 /*
  * Get a page from somewhere. Search in increasing NUMA distances.
  */
 static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
 		struct kmem_cache_cpu *c)
 {
 #ifdef CONFIG_NUMA
 	struct zonelist *zonelist;
 	struct zoneref *z;
 	struct zone *zone;
 	enum zone_type high_zoneidx = gfp_zone(flags);
 	void *object;
 	/*
 	 * The defrag ratio allows a configuration of the tradeoffs between
 	 * inter node defragmentation and node local allocations. A lower
 	 * defrag_ratio increases the tendency to do local allocations
 	 * instead of attempting to obtain partial slabs from other nodes.
 	 *
 	 * If the defrag_ratio is set to 0 then kmalloc() always
 	 * returns node local objects. If the ratio is higher then kmalloc()
 	 * may return off node objects because partial slabs are obtained
 	 * from other nodes and filled up.
 	 *
 	 * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
 	 * defrag_ratio = 1000) then every (well almost) allocation will
 	 * first attempt to defrag slab caches on other nodes. This means
 	 * scanning over all nodes to look for partial slabs which may be
 	 * expensive if we do it every time we are trying to find a slab
 	 * with available objects.
 	 */
 	if (!s->remote_node_defrag_ratio ||
 			get_cycles() % 1024 > s->remote_node_defrag_ratio)
 		return NULL;
 	get_mems_allowed();
 	zonelist = node_zonelist(slab_node(current->mempolicy), flags);
 	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
 		struct kmem_cache_node *n;
 		n = get_node(s, zone_to_nid(zone));
 		if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
 				n->nr_partial > s->min_partial) {
 			object = get_partial_node(s, n, c);
 			if (object) {
 				put_mems_allowed();
 				return object;
 			}
 		}
 	}
 	put_mems_allowed();
 #endif
 	return NULL;
 }
 /*
  * Get a partial page, lock it and return it.
  */
 static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
 		struct kmem_cache_cpu *c)
 {
 	void *object;
 	int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
 	object = get_partial_node(s, get_node(s, searchnode), c);
 	if (object || node != NUMA_NO_NODE)
 		return object;
 	return get_any_partial(s, flags, c);
 }
 #ifdef CONFIG_PREEMPT
 /*
  * Calculate the next globally unique transaction for disambiguiation
  * during cmpxchg. The transactions start with the cpu number and are then
  * incremented by CONFIG_NR_CPUS.
  */
 #define TID_STEP  roundup_pow_of_two(CONFIG_NR_CPUS)
 #else
 /*
  * No preemption supported therefore also no need to check for
  * different cpus.
  */
 #define TID_STEP 1
 #endif
 static inline unsigned long next_tid(unsigned long tid)
 {
 	return tid + TID_STEP;
 }
 static inline unsigned int tid_to_cpu(unsigned long tid)
 {
 	return tid % TID_STEP;
 }
 static inline unsigned long tid_to_event(unsigned long tid)
 {
 	return tid / TID_STEP;
 }
 static inline unsigned int init_tid(int cpu)
 {
 	return cpu;
 }
 static inline void note_cmpxchg_failure(const char *n,
 		const struct kmem_cache *s, unsigned long tid)
 {
 #ifdef SLUB_DEBUG_CMPXCHG
 	unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
 	printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
 #ifdef CONFIG_PREEMPT
 	if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
 		printk("due to cpu change %d -> %d\n",
 			tid_to_cpu(tid), tid_to_cpu(actual_tid));
 	else
 #endif
 	if (tid_to_event(tid) != tid_to_event(actual_tid))
 		printk("due to cpu running other code. Event %ld->%ld\n",
 			tid_to_event(tid), tid_to_event(actual_tid));
 	else
 		printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
 			actual_tid, tid, next_tid(tid));
 #endif
 	stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
 }
 void init_kmem_cache_cpus(struct kmem_cache *s)
 {
 	int cpu;
 	for_each_possible_cpu(cpu)
 		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
 }
 /*
  * Remove the cpu slab
  */
 static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 {
 	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
 	struct page *page = c->page;
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
 	int lock = 0;
 	enum slab_modes l = M_NONE, m = M_NONE;
 	void *freelist;
 	void *nextfree;
 	int tail = DEACTIVATE_TO_HEAD;
 	struct page new;
 	struct page old;
 	if (page->freelist) {
 		stat(s, DEACTIVATE_REMOTE_FREES);
 		tail = DEACTIVATE_TO_TAIL;
 	}
 	c->tid = next_tid(c->tid);
 	c->page = NULL;
 	freelist = c->freelist;
 	c->freelist = NULL;
 	/*
 	 * Stage one: Free all available per cpu objects back
 	 * to the page freelist while it is still frozen. Leave the
 	 * last one.
 	 *
 	 * There is no need to take the list->lock because the page
 	 * is still frozen.
 	 */
 	while (freelist && (nextfree = get_freepointer(s, freelist))) {
 		void *prior;
 		unsigned long counters;
 		do {
 			prior = page->freelist;
 			counters = page->counters;
 			set_freepointer(s, freelist, prior);
 			new.counters = counters;
 			new.inuse--;
 			VM_BUG_ON(!new.frozen);
 		} while (!__cmpxchg_double_slab(s, page,
 			prior, counters,
 			freelist, new.counters,
 			"drain percpu freelist"));
 		freelist = nextfree;
 	}
 	/*
 	 * Stage two: Ensure that the page is unfrozen while the
 	 * list presence reflects the actual number of objects
 	 * during unfreeze.
 	 *
 	 * We setup the list membership and then perform a cmpxchg
 	 * with the count. If there is a mismatch then the page
 	 * is not unfrozen but the page is on the wrong list.
 	 *
 	 * Then we restart the process which may have to remove
 	 * the page from the list that we just put it on again
 	 * because the number of objects in the slab may have
 	 * changed.
 	 */
 redo:
 	old.freelist = page->freelist;
 	old.counters = page->counters;
 	VM_BUG_ON(!old.frozen);
 	/* Determine target state of the slab */
 	new.counters = old.counters;
 	if (freelist) {
 		new.inuse--;
 		set_freepointer(s, freelist, old.freelist);
 		new.freelist = freelist;
 	} else
 		new.freelist = old.freelist;
 	new.frozen = 0;
 	if (!new.inuse && n->nr_partial > s->min_partial)
 		m = M_FREE;
 	else if (new.freelist) {
 		m = M_PARTIAL;
 		if (!lock) {
 			lock = 1;
 			/*
 			 * Taking the spinlock removes the possiblity
 			 * that acquire_slab() will see a slab page that
 			 * is frozen
 			 */
 			spin_lock(&n->list_lock);
 		}
 	} else {
 		m = M_FULL;
 		if (kmem_cache_debug(s) && !lock) {
 			lock = 1;
 			/*
 			 * This also ensures that the scanning of full
 			 * slabs from diagnostic functions will not see
 			 * any frozen slabs.
 			 */
 			spin_lock(&n->list_lock);
 		}
 	}
 	if (l != m) {
 		if (l == M_PARTIAL)
 			remove_partial(n, page);
 		else if (l == M_FULL)
 			remove_full(s, page);
 		if (m == M_PARTIAL) {
 			add_partial(n, page, tail);
 			stat(s, tail);
 		} else if (m == M_FULL) {
 			stat(s, DEACTIVATE_FULL);
 			add_full(s, n, page);
 		}
 	}
 	l = m;
 	if (!__cmpxchg_double_slab(s, page,
 				old.freelist, old.counters,
 				new.freelist, new.counters,
 				"unfreezing slab"))
 		goto redo;
 	if (lock)
 		spin_unlock(&n->list_lock);
 	if (m == M_FREE) {
 		stat(s, DEACTIVATE_EMPTY);
 		discard_slab(s, page);
 		stat(s, FREE_SLAB);
 	}
 }
 /* Unfreeze all the cpu partial slabs */
 static void unfreeze_partials(struct kmem_cache *s)
 {
 	struct kmem_cache_node *n = NULL;
 	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
 	struct page *page, *discard_page = NULL;
 	while ((page = c->partial)) {
 		enum slab_modes { M_PARTIAL, M_FREE };
 		enum slab_modes l, m;
 		struct page new;
 		struct page old;
 		c->partial = page->next;
 		l = M_FREE;
 		do {
 			old.freelist = page->freelist;
 			old.counters = page->counters;
 			VM_BUG_ON(!old.frozen);
 			new.counters = old.counters;
 			new.freelist = old.freelist;
 			new.frozen = 0;
 			if (!new.inuse && (!n || n->nr_partial > s->min_partial))
 				m = M_FREE;
 			else {
 				struct kmem_cache_node *n2 = get_node(s,
 							page_to_nid(page));
 				m = M_PARTIAL;
 				if (n != n2) {
 					if (n)
 						spin_unlock(&n->list_lock);
 					n = n2;
 					spin_lock(&n->list_lock);
 				}
 			}
 			if (l != m) {
 				if (l == M_PARTIAL)
 					remove_partial(n, page);
 				else
 					add_partial(n, page,
 						DEACTIVATE_TO_TAIL);
 				l = m;
 			}
 		} while (!cmpxchg_double_slab(s, page,
 				old.freelist, old.counters,
 				new.freelist, new.counters,
 				"unfreezing slab"));
 		if (m == M_FREE) {
 			page->next = discard_page;
 			discard_page = page;
 		}
 	}
 	if (n)
 		spin_unlock(&n->list_lock);
 	while (discard_page) {
 		page = discard_page;
 		discard_page = discard_page->next;
 		stat(s, DEACTIVATE_EMPTY);
 		discard_slab(s, page);
 		stat(s, FREE_SLAB);
 	}
 }
 /*
  * Put a page that was just frozen (in __slab_free) into a partial page
  * slot if available. This is done without interrupts disabled and without
  * preemption disabled. The cmpxchg is racy and may put the partial page
  * onto a random cpus partial slot.
  *
  * If we did not find a slot then simply move all the partials to the
  * per node partial list.
  */
 int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 {
 	struct page *oldpage;
 	int pages;
 	int pobjects;
 	do {
 		pages = 0;
 		pobjects = 0;
 		oldpage = this_cpu_read(s->cpu_slab->partial);
 		if (oldpage) {
 			pobjects = oldpage->pobjects;
 			pages = oldpage->pages;
 			if (drain && pobjects > s->cpu_partial) {
 				unsigned long flags;
 				/*
 				 * partial array is full. Move the existing
 				 * set to the per node partial list.
 				 */
 				local_irq_save(flags);
 				unfreeze_partials(s);
 				local_irq_restore(flags);
 				pobjects = 0;
 				pages = 0;
 			}
 		}
 		pages++;
 		pobjects += page->objects - page->inuse;
 		page->pages = pages;
 		page->pobjects = pobjects;
 		page->next = oldpage;
 	} while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
 	stat(s, CPU_PARTIAL_FREE);
 	return pobjects;
 }
 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 {
 	stat(s, CPUSLAB_FLUSH);
 	deactivate_slab(s, c);
 }
 /*
  * Flush cpu slab.
  *
  * Called from IPI handler with interrupts disabled.
  */
 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
 {
 	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 	if (likely(c)) {
 		if (c->page)
 			flush_slab(s, c);
 		unfreeze_partials(s);
 	}
 }
 static void flush_cpu_slab(void *d)
 {
 	struct kmem_cache *s = d;
 	__flush_cpu_slab(s, smp_processor_id());
 }
 static void flush_all(struct kmem_cache *s)
 {
 	on_each_cpu(flush_cpu_slab, s, 1);
 }
 /*
  * Check if the objects in a per cpu structure fit numa
  * locality expectations.
  */
 static inline int node_match(struct kmem_cache_cpu *c, int node)
 {
 #ifdef CONFIG_NUMA
 	if (node != NUMA_NO_NODE && c->node != node)
 		return 0;
 #endif
 	return 1;
 }
 static int count_free(struct page *page)
 {
 	return page->objects - page->inuse;
 }
 static unsigned long count_partial(struct kmem_cache_node *n,
 					int (*get_count)(struct page *))
 {
 	unsigned long flags;
 	unsigned long x = 0;
 	struct page *page;
 	spin_lock_irqsave(&n->list_lock, flags);
 	list_for_each_entry(page, &n->partial, lru)
 		x += get_count(page);
 	spin_unlock_irqrestore(&n->list_lock, flags);
 	return x;
 }
 static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
 {
 #ifdef CONFIG_SLUB_DEBUG
 	return atomic_long_read(&n->total_objects);
 #else
 	return 0;
 #endif
 }
 static noinline void
 slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
 {
 	int node;
 	printk(KERN_WARNING
 		"SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
 		nid, gfpflags);
 	printk(KERN_WARNING "  cache: %s, object size: %d, buffer size: %d, "
 		"default order: %d, min order: %d\n", s->name, s->objsize,
 		s->size, oo_order(s->oo), oo_order(s->min));
 	if (oo_order(s->min) > get_order(s->objsize))
 		printk(KERN_WARNING "  %s debugging increased min order, use "
 		       "slub_debug=O to disable.\n", s->name);
 	for_each_online_node(node) {
 		struct kmem_cache_node *n = get_node(s, node);
 		unsigned long nr_slabs;
 		unsigned long nr_objs;
 		unsigned long nr_free;
 		if (!n)
 			continue;
 		nr_free  = count_partial(n, count_free);
 		nr_slabs = node_nr_slabs(n);
 		nr_objs  = node_nr_objs(n);
 		printk(KERN_WARNING
 			"  node %d: slabs: %ld, objs: %ld, free: %ld\n",
 			node, nr_slabs, nr_objs, nr_free);
 	}
 }
 static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
 			int node, struct kmem_cache_cpu **pc)
 {
 	void *object;
 	struct kmem_cache_cpu *c;
 	struct page *page = new_slab(s, flags, node);
 	if (page) {
 		c = __this_cpu_ptr(s->cpu_slab);
 		if (c->page)
 			flush_slab(s, c);
 		/*
 		 * No other reference to the page yet so we can
 		 * muck around with it freely without cmpxchg
 		 */
 		object = page->freelist;
 		page->freelist = NULL;
 		stat(s, ALLOC_SLAB);
 		c->node = page_to_nid(page);
 		c->page = page;
 		*pc = c;
 	} else
 		object = NULL;
 	return object;
 }
 /*
  * Slow path. The lockless freelist is empty or we need to perform
  * debugging duties.
  *
  * Processing is still very fast if new objects have been freed to the
  * regular freelist. In that case we simply take over the regular freelist
  * as the lockless freelist and zap the regular freelist.
  *
  * If that is not working then we fall back to the partial lists. We take the
  * first element of the freelist as the object to allocate now and move the
  * rest of the freelist to the lockless freelist.
  *
  * And if we were unable to get a new slab from the partial slab lists then
  * we need to allocate a new slab. This is the slowest path since it involves
  * a call to the page allocator and the setup of a new slab.
  */
 static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 			  unsigned long addr, struct kmem_cache_cpu *c)
 {
 	void **object;
 	unsigned long flags;
 	struct page new;
 	unsigned long counters;
 	local_irq_save(flags);
 #ifdef CONFIG_PREEMPT
 	/*
 	 * We may have been preempted and rescheduled on a different
 	 * cpu before disabling interrupts. Need to reload cpu area
 	 * pointer.
 	 */
 	c = this_cpu_ptr(s->cpu_slab);
 #endif
 	if (!c->page)
 		goto new_slab;
 redo:
 	if (unlikely(!node_match(c, node))) {
 		stat(s, ALLOC_NODE_MISMATCH);
 		deactivate_slab(s, c);
 		goto new_slab;
 	}
 	/* must check again c->freelist in case of cpu migration or IRQ */
 	object = c->freelist;
 	if (object)
 		goto load_freelist;
 	stat(s, ALLOC_SLOWPATH);
 	do {
 		object = c->page->freelist;
 		counters = c->page->counters;
 		new.counters = counters;
 		VM_BUG_ON(!new.frozen);
 		/*
 		 * If there is no object left then we use this loop to
 		 * deactivate the slab which is simple since no objects
 		 * are left in the slab and therefore we do not need to
 		 * put the page back onto the partial list.
 		 *
 		 * If there are objects left then we retrieve them
 		 * and use them to refill the per cpu queue.
 		 */
 		new.inuse = c->page->objects;
 		new.frozen = object != NULL;
 	} while (!__cmpxchg_double_slab(s, c->page,
 			object, counters,
 			NULL, new.counters,
 			"__slab_alloc"));
 	if (!object) {
 		c->page = NULL;
 		stat(s, DEACTIVATE_BYPASS);
 		goto new_slab;
 	}
 	stat(s, ALLOC_REFILL);
 load_freelist:
 	c->freelist = get_freepointer(s, object);
 	c->tid = next_tid(c->tid);
 	local_irq_restore(flags);
 	return object;
 new_slab:
 	if (c->partial) {
 		c->page = c->partial;
 		c->partial = c->page->next;
 		c->node = page_to_nid(c->page);
 		stat(s, CPU_PARTIAL_ALLOC);
 		c->freelist = NULL;
 		goto redo;
 	}
 	/* Then do expensive stuff like retrieving pages from the partial lists */
 	object = get_partial(s, gfpflags, node, c);
 	if (unlikely(!object)) {
 		object = new_slab_objects(s, gfpflags, node, &c);
 		if (unlikely(!object)) {
 			if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
 				slab_out_of_memory(s, gfpflags, node);
 			local_irq_restore(flags);
 			return NULL;
 		}
 	}
 	if (likely(!kmem_cache_debug(s)))
 		goto load_freelist;
 	/* Only entered in the debug case */
 	if (!alloc_debug_processing(s, c->page, object, addr))
 		goto new_slab;	/* Slab failed checks. Next slab needed */
 	c->freelist = get_freepointer(s, object);
 	deactivate_slab(s, c);
 	c->node = NUMA_NO_NODE;
 	local_irq_restore(flags);
 	return object;
 }
 /*
  * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
  * have the fastpath folded into their functions. So no function call
  * overhead for requests that can be satisfied on the fastpath.
  *
  * The fastpath works by first checking if the lockless freelist can be used.
  * If not then __slab_alloc is called for slow processing.
  *
  * Otherwise we can simply pick the next object from the lockless free list.
  */
 static __always_inline void *slab_alloc(struct kmem_cache *s,
 		gfp_t gfpflags, int node, unsigned long addr)
 {
 	void **object;
 	struct kmem_cache_cpu *c;
 	unsigned long tid;
 	if (slab_pre_alloc_hook(s, gfpflags))
 		return NULL;
 redo:
 	/*
 	 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
 	 * enabled. We may switch back and forth between cpus while
 	 * reading from one cpu area. That does not matter as long
 	 * as we end up on the original cpu again when doing the cmpxchg.
 	 */
 	c = __this_cpu_ptr(s->cpu_slab);
 	/*
 	 * The transaction ids are globally unique per cpu and per operation on
 	 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
 	 * occurs on the right processor and that there was no operation on the
 	 * linked list in between.
 	 */
 	tid = c->tid;
 	barrier();
 	object = c->freelist;
 	if (unlikely(!object || !node_match(c, node)))
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 	else {
 		/*
 		 * The cmpxchg will only match if there was no additional
 		 * operation and if we are on the right processor.
 		 *
 		 * The cmpxchg does the following atomically (without lock semantics!)
 		 * 1. Relocate first pointer to the current per cpu area.
 		 * 2. Verify that tid and freelist have not been changed
 		 * 3. If they were not changed replace tid and freelist
 		 *
 		 * Since this is without lock semantics the protection is only against
 		 * code executing on this cpu *not* from access by other cpus.
 		 */
 		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				object, tid,
 				get_freepointer_safe(s, object), next_tid(tid)))) {
 			note_cmpxchg_failure("slab_alloc", s, tid);
 			goto redo;
 		}
 		stat(s, ALLOC_FASTPATH);
 	}
 	if (unlikely(gfpflags & __GFP_ZERO) && object)
 		memset(object, 0, s->objsize);
 	slab_post_alloc_hook(s, gfpflags, object);
 	return object;
 }
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 {
 	void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
 	trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 #ifdef CONFIG_TRACING
 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
 	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_trace);
 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
 	void *ret = kmalloc_order(size, flags, order);
 	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
 	return ret;
 }
 EXPORT_SYMBOL(kmalloc_order_trace);
 #endif
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
 	void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
 	trace_kmem_cache_alloc_node(_RET_IP_, ret,
 				    s->objsize, s->size, gfpflags, node);
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 #ifdef CONFIG_TRACING
 void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 				    gfp_t gfpflags,
 				    int node, size_t size)
 {
 	void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
 	trace_kmalloc_node(_RET_IP_, ret,
 			   size, s->size, gfpflags, node);
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
 #endif
 #endif
 /*
  * Slow patch handling. This may still be called frequently since objects
  * have a longer lifetime than the cpu slabs in most processing loads.
  *
  * So we still attempt to reduce cache line usage. Just take the slab
  * lock and free the item. If there is no additional partial page
  * handling required then we can return immediately.
  */
 static void __slab_free(struct kmem_cache *s, struct page *page,
 			void *x, unsigned long addr)
 {
 	void *prior;
 	void **object = (void *)x;
 	int was_frozen;
 	int inuse;
 	struct page new;
 	unsigned long counters;
 	struct kmem_cache_node *n = NULL;
 	unsigned long uninitialized_var(flags);
 	stat(s, FREE_SLOWPATH);
 	if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
 		return;
 	do {
 		prior = page->freelist;
 		counters = page->counters;
 		set_freepointer(s, object, prior);
 		new.counters = counters;
 		was_frozen = new.frozen;
 		new.inuse--;
 		if ((!new.inuse || !prior) && !was_frozen && !n) {
 			if (!kmem_cache_debug(s) && !prior)
 				/*
 				 * Slab was on no list before and will be partially empty
 				 * We can defer the list move and instead freeze it.
 				 */
 				new.frozen = 1;
 			else { /* Needs to be taken off a list */
 	                        n = get_node(s, page_to_nid(page));
 				/*
 				 * Speculatively acquire the list_lock.
 				 * If the cmpxchg does not succeed then we may
 				 * drop the list_lock without any processing.
 				 *
 				 * Otherwise the list_lock will synchronize with
 				 * other processors updating the list of slabs.
 				 */
 				spin_lock_irqsave(&n->list_lock, flags);
 			}
 		}
 		inuse = new.inuse;
 	} while (!cmpxchg_double_slab(s, page,
 		prior, counters,
 		object, new.counters,
 		"__slab_free"));
 	if (likely(!n)) {
 		/*
 		 * If we just froze the page then put it onto the
 		 * per cpu partial list.
 		 */
 		if (new.frozen && !was_frozen)
 			put_cpu_partial(s, page, 1);
 		/*
 		 * The list lock was not taken therefore no list
 		 * activity can be necessary.
 		 */
                 if (was_frozen)
                         stat(s, FREE_FROZEN);
                 return;
         }
 	/*
 	 * was_frozen may have been set after we acquired the list_lock in
 	 * an earlier loop. So we need to check it here again.
 	 */
 	if (was_frozen)
 		stat(s, FREE_FROZEN);
 	else {
 		if (unlikely(!inuse && n->nr_partial > s->min_partial))
                         goto slab_empty;
 		/*
 		 * Objects left in the slab. If it was not on the partial list before
 		 * then add it.
 		 */
 		if (unlikely(!prior)) {
 			remove_full(s, page);
 			add_partial(n, page, DEACTIVATE_TO_TAIL);
 			stat(s, FREE_ADD_PARTIAL);
 		}
 	}
 	spin_unlock_irqrestore(&n->list_lock, flags);
 	return;
 slab_empty:
 	if (prior) {
 		/*
 		 * Slab on the partial list.
 		 */
 		remove_partial(n, page);
 		stat(s, FREE_REMOVE_PARTIAL);
 	} else
 		/* Slab must be on the full list */
 		remove_full(s, page);
 	spin_unlock_irqrestore(&n->list_lock, flags);
 	stat(s, FREE_SLAB);
 	discard_slab(s, page);
 }
 /*
  * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
  * can perform fastpath freeing without additional function calls.
  *
  * The fastpath is only possible if we are freeing to the current cpu slab
  * of this processor. This typically the case if we have just allocated
  * the item before.
  *
  * If fastpath is not possible then fall back to __slab_free where we deal
  * with all sorts of special processing.
  */
 static __always_inline void slab_free(struct kmem_cache *s,
 			struct page *page, void *x, unsigned long addr)
 {
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
 	unsigned long tid;
 	slab_free_hook(s, x);
 redo:
 	/*
 	 * Determine the currently cpus per cpu slab.
 	 * The cpu may change afterward. However that does not matter since
 	 * data is retrieved via this pointer. If we are on the same cpu
 	 * during the cmpxchg then the free will succedd.
 	 */
 	c = __this_cpu_ptr(s->cpu_slab);
 	tid = c->tid;
 	barrier();
 	if (likely(page == c->page)) {
 		set_freepointer(s, object, c->freelist);
 		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				c->freelist, tid,
 				object, next_tid(tid)))) {
 			note_cmpxchg_failure("slab_free", s, tid);
 			goto redo;
 		}
 		stat(s, FREE_FASTPATH);
 	} else
 		__slab_free(s, page, x, addr);
 }
 void kmem_cache_free(struct kmem_cache *s, void *x)
 {
 	struct page *page;
 	page = virt_to_head_page(x);
 	slab_free(s, page, x, _RET_IP_);
 	trace_kmem_cache_free(_RET_IP_, x);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 /*
  * Object placement in a slab is made very easy because we always start at
  * offset 0. If we tune the size of the object to the alignment then we can
  * get the required alignment by putting one properly sized object after
  * another.
  *
  * Notice that the allocation order determines the sizes of the per cpu
  * caches. Each processor has always one slab available for allocations.
  * Increasing the allocation order reduces the number of times that slabs
  * must be moved on and off the partial lists and is therefore a factor in
  * locking overhead.
  */
 /*
  * Mininum / Maximum order of slab pages. This influences locking overhead
  * and slab fragmentation. A higher order reduces the number of partial slabs
  * and increases the number of allocations possible without having to
  * take the list_lock.
  */
 static int slub_min_order;
 static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
 static int slub_min_objects;
 /*
  * Merge control. If this is set then no merging of slab caches will occur.
  * (Could be removed. This was introduced to pacify the merge skeptics.)
  */
 static int slub_nomerge;
 /*
  * Calculate the order of allocation given an slab object size.
  *
  * The order of allocation has significant impact on performance and other
  * system components. Generally order 0 allocations should be preferred since
  * order 0 does not cause fragmentation in the page allocator. Larger objects
  * be problematic to put into order 0 slabs because there may be too much
  * unused space left. We go to a higher order if more than 1/16th of the slab
  * would be wasted.
  *
  * In order to reach satisfactory performance we must ensure that a minimum
  * number of objects is in one slab. Otherwise we may generate too much
  * activity on the partial lists which requires taking the list_lock. This is
  * less a concern for large slabs though which are rarely used.
  *
  * slub_max_order specifies the order where we begin to stop considering the
  * number of objects in a slab as critical. If we reach slub_max_order then
  * we try to keep the page order as low as possible. So we accept more waste
  * of space in favor of a small page order.
  *
  * Higher order allocations also allow the placement of more objects in a
  * slab and thereby reduce object handling overhead. If the user has
  * requested a higher mininum order then we start with that one instead of
  * the smallest order which will fit the object.
  */
 static inline int slab_order(int size, int min_objects,
 				int max_order, int fract_leftover, int reserved)
 {
 	int order;
 	int rem;
 	int min_order = slub_min_order;
 	if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
 		return get_order(size * MAX_OBJS_PER_PAGE) - 1;
 	for (order = max(min_order,
 				fls(min_objects * size - 1) - PAGE_SHIFT);
 			order <= max_order; order++) {
 		unsigned long slab_size = PAGE_SIZE << order;
 		if (slab_size < min_objects * size + reserved)
 			continue;
 		rem = (slab_size - reserved) % size;
 		if (rem <= slab_size / fract_leftover)
 			break;
 	}
 	return order;
 }
 static inline int calculate_order(int size, int reserved)
 {
 	int order;
 	int min_objects;
 	int fraction;
 	int max_objects;
 	/*
 	 * Attempt to find best configuration for a slab. This
 	 * works by first attempting to generate a layout with
 	 * the best configuration and backing off gradually.
 	 *
 	 * First we reduce the acceptable waste in a slab. Then
 	 * we reduce the minimum objects required in a slab.
 	 */
 	min_objects = slub_min_objects;
 	if (!min_objects)
 		min_objects = 4 * (fls(nr_cpu_ids) + 1);
 	max_objects = order_objects(slub_max_order, size, reserved);
 	min_objects = min(min_objects, max_objects);
 	while (min_objects > 1) {
 		fraction = 16;
 		while (fraction >= 4) {
 			order = slab_order(size, min_objects,
 					slub_max_order, fraction, reserved);
 			if (order <= slub_max_order)
 				return order;
 			fraction /= 2;
 		}
 		min_objects--;
 	}
 	/*
 	 * We were unable to place multiple objects in a slab. Now
 	 * lets see if we can place a single object there.
 	 */
 	order = slab_order(size, 1, slub_max_order, 1, reserved);
 	if (order <= slub_max_order)
 		return order;
 	/*
 	 * Doh this slab cannot be placed using slub_max_order.
 	 */
 	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
 	if (order < MAX_ORDER)
 		return order;
 	return -ENOSYS;
 }
 /*
  * Figure out what the alignment of the objects will be.
  */
 static unsigned long calculate_alignment(unsigned long flags,
 		unsigned long align, unsigned long size)
 {
 	/*
 	 * If the user wants hardware cache aligned objects then follow that
 	 * suggestion if the object is sufficiently large.
 	 *
 	 * The hardware cache alignment cannot override the specified
 	 * alignment though. If that is greater then use it.
 	 */
 	if (flags & SLAB_HWCACHE_ALIGN) {
 		unsigned long ralign = cache_line_size();
 		while (size <= ralign / 2)
 			ralign /= 2;
 		align = max(align, ralign);
 	}
 	if (align < ARCH_SLAB_MINALIGN)
 		align = ARCH_SLAB_MINALIGN;
 	return ALIGN(align, sizeof(void *));
 }
 static void
 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
 {
 	n->nr_partial = 0;
 	spin_lock_init(&n->list_lock);
 	INIT_LIST_HEAD(&n->partial);
 #ifdef CONFIG_SLUB_DEBUG
 	atomic_long_set(&n->nr_slabs, 0);
 	atomic_long_set(&n->total_objects, 0);
 	INIT_LIST_HEAD(&n->full);
 #endif
 }
 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
 {
 	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
 			SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
 	/*
 	 * Must align to double word boundary for the double cmpxchg
 	 * instructions to work; see __pcpu_double_call_return_bool().
 	 */
 	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
 				     2 * sizeof(void *));
 	if (!s->cpu_slab)
 		return 0;
 	init_kmem_cache_cpus(s);
 	return 1;
 }
 static struct kmem_cache *kmem_cache_node;
 /*
  * No kmalloc_node yet so do it by hand. We know that this is the first
  * slab on the node for this slabcache. There are no concurrent accesses
  * possible.
  *
  * Note that this function only works on the kmalloc_node_cache
  * when allocating for the kmalloc_node_cache. This is used for bootstrapping
  * memory on a fresh node that has no slab structures yet.
  */
 static void early_kmem_cache_node_alloc(int node)
 {
 	struct page *page;
 	struct kmem_cache_node *n;
 	BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
 	page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
 	BUG_ON(!page);
 	if (page_to_nid(page) != node) {
 		printk(KERN_ERR "SLUB: Unable to allocate memory from "
 				"node %d\n", node);
 		printk(KERN_ERR "SLUB: Allocating a useless per node structure "
 				"in order to be able to continue\n");
 	}
 	n = page->freelist;
 	BUG_ON(!n);
 	page->freelist = get_freepointer(kmem_cache_node, n);
 	page->inuse = 1;
 	page->frozen = 0;
 	kmem_cache_node->node[node] = n;
 #ifdef CONFIG_SLUB_DEBUG
 	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
 	init_tracking(kmem_cache_node, n);
 #endif
 	init_kmem_cache_node(n, kmem_cache_node);
 	inc_slabs_node(kmem_cache_node, node, page->objects);
 	add_partial(n, page, DEACTIVATE_TO_HEAD);
 }
 static void free_kmem_cache_nodes(struct kmem_cache *s)
 {
 	int node;
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = s->node[node];
 		if (n)
 			kmem_cache_free(kmem_cache_node, n);
 		s->node[node] = NULL;
 	}
 }
 static int init_kmem_cache_nodes(struct kmem_cache *s)
 {
 	int node;
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n;
 		if (slab_state == DOWN) {
 			early_kmem_cache_node_alloc(node);
 			continue;
 		}
 		n = kmem_cache_alloc_node(kmem_cache_node,
 						GFP_KERNEL, node);
 		if (!n) {
 			free_kmem_cache_nodes(s);
 			return 0;
 		}
 		s->node[node] = n;
 		init_kmem_cache_node(n, s);
 	}
 	return 1;
 }
 static void set_min_partial(struct kmem_cache *s, unsigned long min)
 {
 	if (min < MIN_PARTIAL)
 		min = MIN_PARTIAL;
 	else if (min > MAX_PARTIAL)
 		min = MAX_PARTIAL;
 	s->min_partial = min;
 }
 /*
  * calculate_sizes() determines the order and the distribution of data within
  * a slab object.
  */
 static int calculate_sizes(struct kmem_cache *s, int forced_order)
 {
 	unsigned long flags = s->flags;
 	unsigned long size = s->objsize;
 	unsigned long align = s->align;
 	int order;
 	/*
 	 * Round up object size to the next word boundary. We can only
 	 * place the free pointer at word boundaries and this determines
 	 * the possible location of the free pointer.
 	 */
 	size = ALIGN(size, sizeof(void *));
 #ifdef CONFIG_SLUB_DEBUG
 	/*
 	 * Determine if we can poison the object itself. If the user of
 	 * the slab may touch the object after free or before allocation
 	 * then we should never poison the object itself.
 	 */
 	if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
 			!s->ctor)
 		s->flags |= __OBJECT_POISON;
 	else
 		s->flags &= ~__OBJECT_POISON;
 	/*
 	 * If we are Redzoning then check if there is some space between the
 	 * end of the object and the free pointer. If not then add an
 	 * additional word to have some bytes to store Redzone information.
 	 */
 	if ((flags & SLAB_RED_ZONE) && size == s->objsize)
 		size += sizeof(void *);
 #endif
 	/*
 	 * With that we have determined the number of bytes in actual use
 	 * by the object. This is the potential offset to the free pointer.
 	 */
 	s->inuse = size;
 	if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
 		s->ctor)) {
 		/*
 		 * Relocate free pointer after the object if it is not
 		 * permitted to overwrite the first word of the object on
 		 * kmem_cache_free.
 		 *
 		 * This is the case if we do RCU, have a constructor or
 		 * destructor or are poisoning the objects.
 		 */
 		s->offset = size;
 		size += sizeof(void *);
 	}
 #ifdef CONFIG_SLUB_DEBUG
 	if (flags & SLAB_STORE_USER)
 		/*
 		 * Need to store information about allocs and frees after
 		 * the object.
 		 */
 		size += 2 * sizeof(struct track);
 	if (flags & SLAB_RED_ZONE)
 		/*
 		 * Add some empty padding so that we can catch
 		 * overwrites from earlier objects rather than let
 		 * tracking information or the free pointer be
 		 * corrupted if a user writes before the start
 		 * of the object.
 		 */
 		size += sizeof(void *);
 #endif
 	/*
 	 * Determine the alignment based on various parameters that the
 	 * user specified and the dynamic determination of cache line size
 	 * on bootup.
 	 */
 	align = calculate_alignment(flags, align, s->objsize);
 	s->align = align;
 	/*
 	 * SLUB stores one object immediately after another beginning from
 	 * offset 0. In order to align the objects we have to simply size
 	 * each object to conform to the alignment.
 	 */
 	size = ALIGN(size, align);
 	s->size = size;
 	if (forced_order >= 0)
 		order = forced_order;
 	else
 		order = calculate_order(size, s->reserved);
 	if (order < 0)
 		return 0;
 	s->allocflags = 0;
 	if (order)
 		s->allocflags |= __GFP_COMP;
 	if (s->flags & SLAB_CACHE_DMA)
 		s->allocflags |= SLUB_DMA;
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		s->allocflags |= __GFP_RECLAIMABLE;
 	/*
 	 * Determine the number of objects per slab
 	 */
 	s->oo = oo_make(order, size, s->reserved);
 	s->min = oo_make(get_order(size), size, s->reserved);
 	if (oo_objects(s->oo) > oo_objects(s->max))
 		s->max = s->oo;
 	return !!oo_objects(s->oo);
 }
 static int kmem_cache_open(struct kmem_cache *s,
 		const char *name, size_t size,
 		size_t align, unsigned long flags,
 		void (*ctor)(void *))
 {
 	memset(s, 0, kmem_size);
 	s->name = name;
 	s->ctor = ctor;
 	s->objsize = size;
 	s->align = align;
 	s->flags = kmem_cache_flags(size, flags, name, ctor);
 	s->reserved = 0;
 	if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
 		s->reserved = sizeof(struct rcu_head);
 	if (!calculate_sizes(s, -1))
 		goto error;
 	if (disable_higher_order_debug) {
 		/*
 		 * Disable debugging flags that store metadata if the min slab
 		 * order increased.
 		 */
 		if (get_order(s->size) > get_order(s->objsize)) {
 			s->flags &= ~DEBUG_METADATA_FLAGS;
 			s->offset = 0;
 			if (!calculate_sizes(s, -1))
 				goto error;
 		}
 	}
 #ifdef CONFIG_CMPXCHG_DOUBLE
 	if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
 		/* Enable fast mode */
 		s->flags |= __CMPXCHG_DOUBLE;
 #endif
 	/*
 	 * The larger the object size is, the more pages we want on the partial
 	 * list to avoid pounding the page allocator excessively.
 	 */
 	set_min_partial(s, ilog2(s->size) / 2);
 	/*
 	 * cpu_partial determined the maximum number of objects kept in the
 	 * per cpu partial lists of a processor.
 	 *
 	 * Per cpu partial lists mainly contain slabs that just have one
 	 * object freed. If they are used for allocation then they can be
 	 * filled up again with minimal effort. The slab will never hit the
 	 * per node partial lists and therefore no locking will be required.
 	 *
 	 * This setting also determines
 	 *
 	 * A) The number of objects from per cpu partial slabs dumped to the
 	 *    per node list when we reach the limit.
 	 * B) The number of objects in cpu partial slabs to extract from the
 	 *    per node list when we run out of per cpu objects. We only fetch 50%
 	 *    to keep some capacity around for frees.
 	 */
 	if (s->size >= PAGE_SIZE)
 		s->cpu_partial = 2;
 	else if (s->size >= 1024)
 		s->cpu_partial = 6;
 	else if (s->size >= 256)
 		s->cpu_partial = 13;
 	else
 		s->cpu_partial = 30;
 	s->refcount = 1;
 #ifdef CONFIG_NUMA
 	s->remote_node_defrag_ratio = 1000;
 #endif
 	if (!init_kmem_cache_nodes(s))
 		goto error;
 	if (alloc_kmem_cache_cpus(s))
 		return 1;
 	free_kmem_cache_nodes(s);
 error:
 	if (flags & SLAB_PANIC)
 		panic("Cannot create slab %s size=%lu realsize=%u "
 			"order=%u offset=%u flags=%lx\n",
 			s->name, (unsigned long)size, s->size, oo_order(s->oo),
 			s->offset, flags);
 	return 0;
 }
 /*
  * Determine the size of a slab object
  */
 unsigned int kmem_cache_size(struct kmem_cache *s)
 {
 	return s->objsize;
 }
 EXPORT_SYMBOL(kmem_cache_size);
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
 							const char *text)
 {
 #ifdef CONFIG_SLUB_DEBUG
 	void *addr = page_address(page);
 	void *p;
 	unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
 				     sizeof(long), GFP_ATOMIC);
 	if (!map)
 		return;
 	slab_err(s, page, "%s", text);
 	slab_lock(page);
 	get_map(s, page, map);
 	for_each_object(p, s, addr, page->objects) {
 		if (!test_bit(slab_index(p, s, addr), map)) {
 			printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
 							p, p - addr);
 			print_tracking(s, p);
 		}
 	}
 	slab_unlock(page);
 	kfree(map);
 #endif
 }
 /*
  * Attempt to free all partial slabs on a node.
  * This is called from kmem_cache_close(). We must be the last thread
  * using the cache and therefore we do not need to lock anymore.
  */
 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 {
 	struct page *page, *h;
 	list_for_each_entry_safe(page, h, &n->partial, lru) {
 		if (!page->inuse) {
 			remove_partial(n, page);
 			discard_slab(s, page);
 		} else {
 			list_slab_objects(s, page,
 				"Objects remaining on kmem_cache_close()");
 		}
 	}
 }
 /*
  * Release all resources used by a slab cache.
  */
 static inline int kmem_cache_close(struct kmem_cache *s)
 {
 	int node;
 	flush_all(s);
 	free_percpu(s->cpu_slab);
 	/* Attempt to free all objects */
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 		free_partial(s, n);
 		if (n->nr_partial || slabs_node(s, node))
 			return 1;
 	}
 	free_kmem_cache_nodes(s);
 	return 0;
 }
 /*
  * Close a cache and release the kmem_cache structure
  * (must be used for caches created using kmem_cache_create)
  */
 void kmem_cache_destroy(struct kmem_cache *s)
 {
 	down_write(&slub_lock);
 	s->refcount--;
 	if (!s->refcount) {
 		list_del(&s->list);
 		up_write(&slub_lock);
 		if (kmem_cache_close(s)) {
 			printk(KERN_ERR "SLUB %s: %s called for cache that "
 				"still has objects.\n", s->name, __func__);
 			dump_stack();
 		}
 		if (s->flags & SLAB_DESTROY_BY_RCU)
 			rcu_barrier();
 		sysfs_slab_remove(s);
 	} else
 		up_write(&slub_lock);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 /********************************************************************
  *		Kmalloc subsystem
  *******************************************************************/
 struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
 EXPORT_SYMBOL(kmalloc_caches);
 static struct kmem_cache *kmem_cache;
 #ifdef CONFIG_ZONE_DMA
 static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
 #endif
 static int __init setup_slub_min_order(char *str)
 {
 	get_option(&str, &slub_min_order);
 	return 1;
 }
 __setup("slub_min_order=", setup_slub_min_order);
 static int __init setup_slub_max_order(char *str)
 {
 	get_option(&str, &slub_max_order);
 	slub_max_order = min(slub_max_order, MAX_ORDER - 1);
 	return 1;
 }
 __setup("slub_max_order=", setup_slub_max_order);
 static int __init setup_slub_min_objects(char *str)
 {
 	get_option(&str, &slub_min_objects);
 	return 1;
 }
 __setup("slub_min_objects=", setup_slub_min_objects);
 static int __init setup_slub_nomerge(char *str)
 {
 	slub_nomerge = 1;
 	return 1;
 }
 __setup("slub_nomerge", setup_slub_nomerge);
 static struct kmem_cache *__init create_kmalloc_cache(const char *name,
 						int size, unsigned int flags)
 {
 	struct kmem_cache *s;
 	s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
 	/*
 	 * This function is called with IRQs disabled during early-boot on
 	 * single CPU so there's no need to take slub_lock here.
 	 */
 	if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
 								flags, NULL))
 		goto panic;
 	list_add(&s->list, &slab_caches);
 	return s;
 panic:
 	panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
 	return NULL;
 }
 /*
  * Conversion table for small slabs sizes / 8 to the index in the
  * kmalloc array. This is necessary for slabs < 192 since we have non power
  * of two cache sizes there. The size of larger slabs can be determined using
  * fls.
  */
 static s8 size_index[24] = {
 	3,	/* 8 */
 	4,	/* 16 */
 	5,	/* 24 */
 	5,	/* 32 */
 	6,	/* 40 */
 	6,	/* 48 */
 	6,	/* 56 */
 	6,	/* 64 */
 	1,	/* 72 */
 	1,	/* 80 */
 	1,	/* 88 */
 	1,	/* 96 */
 	7,	/* 104 */
 	7,	/* 112 */
 	7,	/* 120 */
 	7,	/* 128 */
 	2,	/* 136 */
 	2,	/* 144 */
 	2,	/* 152 */
 	2,	/* 160 */
 	2,	/* 168 */
 	2,	/* 176 */
 	2,	/* 184 */
 	2	/* 192 */
 };
 static inline int size_index_elem(size_t bytes)
 {
 	return (bytes - 1) / 8;
 }
 static struct kmem_cache *get_slab(size_t size, gfp_t flags)
 {
 	int index;
 	if (size <= 192) {
 		if (!size)
 			return ZERO_SIZE_PTR;
 		index = size_index[size_index_elem(size)];
 	} else
 		index = fls(size - 1);
 #ifdef CONFIG_ZONE_DMA
 	if (unlikely((flags & SLUB_DMA)))
 		return kmalloc_dma_caches[index];
 #endif
 	return kmalloc_caches[index];
 }
 void *__kmalloc(size_t size, gfp_t flags)
 {
 	struct kmem_cache *s;
 	void *ret;
 	if (unlikely(size > SLUB_MAX_SIZE))
 		return kmalloc_large(size, flags);
 	s = get_slab(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 	ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
 	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
 	return ret;
 }
 EXPORT_SYMBOL(__kmalloc);
 #ifdef CONFIG_NUMA
 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 {
 	struct page *page;
 	void *ptr = NULL;
 	flags |= __GFP_COMP | __GFP_NOTRACK;
 	page = alloc_pages_node(node, flags, get_order(size));
 	if (page)
 		ptr = page_address(page);
 	kmemleak_alloc(ptr, size, 1, flags);
 	return ptr;
 }
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	struct kmem_cache *s;
 	void *ret;
 	if (unlikely(size > SLUB_MAX_SIZE)) {
 		ret = kmalloc_large_node(size, flags, node);
 		trace_kmalloc_node(_RET_IP_, ret,
 				   size, PAGE_SIZE << get_order(size),
 				   flags, node);
 		return ret;
 	}
 	s = get_slab(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 	ret = slab_alloc(s, flags, node, _RET_IP_);
 	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
 	return ret;
 }
 EXPORT_SYMBOL(__kmalloc_node);
 #endif
 size_t ksize(const void *object)
 {
 	struct page *page;
 	if (unlikely(object == ZERO_SIZE_PTR))
 		return 0;
 	page = virt_to_head_page(object);
 	if (unlikely(!PageSlab(page))) {
 		WARN_ON(!PageCompound(page));
 		return PAGE_SIZE << compound_order(page);
 	}
 	return slab_ksize(page->slab);
 }
 EXPORT_SYMBOL(ksize);
 #ifdef CONFIG_SLUB_DEBUG
 bool verify_mem_not_deleted(const void *x)
 {
 	struct page *page;
 	void *object = (void *)x;
 	unsigned long flags;
 	bool rv;
 	if (unlikely(ZERO_OR_NULL_PTR(x)))
 		return false;
 	local_irq_save(flags);
 	page = virt_to_head_page(x);
 	if (unlikely(!PageSlab(page))) {
 		/* maybe it was from stack? */
 		rv = true;
 		goto out_unlock;
 	}
 	slab_lock(page);
 	if (on_freelist(page->slab, page, object)) {
 		object_err(page->slab, page, object, "Object is on free-list");
 		rv = false;
 	} else {
 		rv = true;
 	}
 	slab_unlock(page);
 out_unlock:
 	local_irq_restore(flags);
 	return rv;
 }
 EXPORT_SYMBOL(verify_mem_not_deleted);
 #endif
 void kfree(const void *x)
 {
 	struct page *page;
 	void *object = (void *)x;
 	trace_kfree(_RET_IP_, x);
 	if (unlikely(ZERO_OR_NULL_PTR(x)))
 		return;
 	page = virt_to_head_page(x);
 	if (unlikely(!PageSlab(page))) {
 		BUG_ON(!PageCompound(page));
 		kmemleak_free(x);
 		put_page(page);
 		return;
 	}
 	slab_free(page->slab, page, object, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);
 /*
  * kmem_cache_shrink removes empty slabs from the partial lists and sorts
  * the remaining slabs by the number of items in use. The slabs with the
  * most items in use come first. New allocations will then fill those up
  * and thus they can be removed from the partial lists.
  *
  * The slabs with the least items are placed last. This results in them
  * being allocated from last increasing the chance that the last objects
  * are freed in them.
  */
 int kmem_cache_shrink(struct kmem_cache *s)
 {
 	int node;
 	int i;
 	struct kmem_cache_node *n;
 	struct page *page;
 	struct page *t;
 	int objects = oo_objects(s->max);
 	struct list_head *slabs_by_inuse =
 		kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
 	unsigned long flags;
 	if (!slabs_by_inuse)
 		return -ENOMEM;
 	flush_all(s);
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		n = get_node(s, node);
 		if (!n->nr_partial)
 			continue;
 		for (i = 0; i < objects; i++)
 			INIT_LIST_HEAD(slabs_by_inuse + i);
 		spin_lock_irqsave(&n->list_lock, flags);
 		/*
 		 * Build lists indexed by the items in use in each slab.
 		 *
 		 * Note that concurrent frees may occur while we hold the
 		 * list_lock. page->inuse here is the upper limit.
 		 */
 		list_for_each_entry_safe(page, t, &n->partial, lru) {
 			list_move(&page->lru, slabs_by_inuse + page->inuse);
 			if (!page->inuse)
 				n->nr_partial--;
 		}
 		/*
 		 * Rebuild the partial list with the slabs filled up most
 		 * first and the least used slabs at the end.
 		 */
 		for (i = objects - 1; i > 0; i--)
 			list_splice(slabs_by_inuse + i, n->partial.prev);
 		spin_unlock_irqrestore(&n->list_lock, flags);
 		/* Release empty slabs */
 		list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
 			discard_slab(s, page);
 	}
 	kfree(slabs_by_inuse);
 	return 0;
 }
 EXPORT_SYMBOL(kmem_cache_shrink);
 #if defined(CONFIG_MEMORY_HOTPLUG)
 static int slab_mem_going_offline_callback(void *arg)
 {
 	struct kmem_cache *s;
 	down_read(&slub_lock);
 	list_for_each_entry(s, &slab_caches, list)
 		kmem_cache_shrink(s);
 	up_read(&slub_lock);
 	return 0;
 }
 static void slab_mem_offline_callback(void *arg)
 {
 	struct kmem_cache_node *n;
 	struct kmem_cache *s;
 	struct memory_notify *marg = arg;
 	int offline_node;
 	offline_node = marg->status_change_nid;
 	/*
 	 * If the node still has available memory. we need kmem_cache_node
 	 * for it yet.
 	 */
 	if (offline_node < 0)
 		return;
 	down_read(&slub_lock);
 	list_for_each_entry(s, &slab_caches, list) {
 		n = get_node(s, offline_node);
 		if (n) {
 			/*
 			 * if n->nr_slabs > 0, slabs still exist on the node
 			 * that is going down. We were unable to free them,
 			 * and offline_pages() function shouldn't call this
 			 * callback. So, we must fail.
 			 */
 			BUG_ON(slabs_node(s, offline_node));
 			s->node[offline_node] = NULL;
 			kmem_cache_free(kmem_cache_node, n);
 		}
 	}
 	up_read(&slub_lock);
 }
 static int slab_mem_going_online_callback(void *arg)
 {
 	struct kmem_cache_node *n;
 	struct kmem_cache *s;
 	struct memory_notify *marg = arg;
 	int nid = marg->status_change_nid;
 	int ret = 0;
 	/*
 	 * If the node's memory is already available, then kmem_cache_node is
 	 * already created. Nothing to do.
 	 */
 	if (nid < 0)
 		return 0;
 	/*
 	 * We are bringing a node online. No memory is available yet. We must
 	 * allocate a kmem_cache_node structure in order to bring the node
 	 * online.
 	 */
 	down_read(&slub_lock);
 	list_for_each_entry(s, &slab_caches, list) {
 		/*
 		 * XXX: kmem_cache_alloc_node will fallback to other nodes
 		 *      since memory is not yet available from the node that
 		 *      is brought up.
 		 */
 		n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
 		if (!n) {
 			ret = -ENOMEM;
 			goto out;
 		}
 		init_kmem_cache_node(n, s);
 		s->node[nid] = n;
 	}
 out:
 	up_read(&slub_lock);
 	return ret;
 }
 static int slab_memory_callback(struct notifier_block *self,
 				unsigned long action, void *arg)
 {
 	int ret = 0;
 	switch (action) {
 	case MEM_GOING_ONLINE:
 		ret = slab_mem_going_online_callback(arg);
 		break;
 	case MEM_GOING_OFFLINE:
 		ret = slab_mem_going_offline_callback(arg);
 		break;
 	case MEM_OFFLINE:
 	case MEM_CANCEL_ONLINE:
 		slab_mem_offline_callback(arg);
 		break;
 	case MEM_ONLINE:
 	case MEM_CANCEL_OFFLINE:
 		break;
 	}
 	if (ret)
 		ret = notifier_from_errno(ret);
 	else
 		ret = NOTIFY_OK;
 	return ret;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 /********************************************************************
  *			Basic setup of slabs
  *******************************************************************/
 /*
  * Used for early kmem_cache structures that were allocated using
  * the page allocator
  */
 static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
 {
 	int node;
 	list_add(&s->list, &slab_caches);
 	s->refcount = -1;
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 		struct page *p;
 		if (n) {
 			list_for_each_entry(p, &n->partial, lru)
 				p->slab = s;
 #ifdef CONFIG_SLUB_DEBUG
 			list_for_each_entry(p, &n->full, lru)
 				p->slab = s;
 #endif
 		}
 	}
 }
 void __init kmem_cache_init(void)
 {
 	int i;
 	int caches = 0;
 	struct kmem_cache *temp_kmem_cache;
 	int order;
 	struct kmem_cache *temp_kmem_cache_node;
 	unsigned long kmalloc_size;
 	kmem_size = offsetof(struct kmem_cache, node) +
 				nr_node_ids * sizeof(struct kmem_cache_node *);
 	/* Allocate two kmem_caches from the page allocator */
 	kmalloc_size = ALIGN(kmem_size, cache_line_size());
 	order = get_order(2 * kmalloc_size);
 	kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
 	/*
 	 * Must first have the slab cache available for the allocations of the
 	 * struct kmem_cache_node's. There is special bootstrap code in
 	 * kmem_cache_open for slab_state == DOWN.
 	 */
 	kmem_cache_node = (void *)kmem_cache + kmalloc_size;
 	kmem_cache_open(kmem_cache_node, "kmem_cache_node",
 		sizeof(struct kmem_cache_node),
 		0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
 	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
 	/* Able to allocate the per node structures */
 	slab_state = PARTIAL;
 	temp_kmem_cache = kmem_cache;
 	kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
 		0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
 	kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
 	memcpy(kmem_cache, temp_kmem_cache, kmem_size);
 	/*
 	 * Allocate kmem_cache_node properly from the kmem_cache slab.
 	 * kmem_cache_node is separately allocated so no need to
 	 * update any list pointers.
 	 */
 	temp_kmem_cache_node = kmem_cache_node;
 	kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
 	memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
 	kmem_cache_bootstrap_fixup(kmem_cache_node);
 	caches++;
 	kmem_cache_bootstrap_fixup(kmem_cache);
 	caches++;
 	/* Free temporary boot structure */
 	free_pages((unsigned long)temp_kmem_cache, order);
 	/* Now we can use the kmem_cache to allocate kmalloc slabs */
 	/*
 	 * Patch up the size_index table if we have strange large alignment
 	 * requirements for the kmalloc array. This is only the case for
 	 * MIPS it seems. The standard arches will not generate any code here.
 	 *
 	 * Largest permitted alignment is 256 bytes due to the way we
 	 * handle the index determination for the smaller caches.
 	 *
 	 * Make sure that nothing crazy happens if someone starts tinkering
 	 * around with ARCH_KMALLOC_MINALIGN
 	 */
 	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
 		(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
 	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
 		int elem = size_index_elem(i);
 		if (elem >= ARRAY_SIZE(size_index))
 			break;
 		size_index[elem] = KMALLOC_SHIFT_LOW;
 	}
 	if (KMALLOC_MIN_SIZE == 64) {
 		/*
 		 * The 96 byte size cache is not used if the alignment
 		 * is 64 byte.
 		 */
 		for (i = 64 + 8; i <= 96; i += 8)
 			size_index[size_index_elem(i)] = 7;
 	} else if (KMALLOC_MIN_SIZE == 128) {
 		/*
 		 * The 192 byte sized cache is not used if the alignment
 		 * is 128 byte. Redirect kmalloc to use the 256 byte cache
 		 * instead.
 		 */
 		for (i = 128 + 8; i <= 192; i += 8)
 			size_index[size_index_elem(i)] = 8;
 	}
 	/* Caches that are not of the two-to-the-power-of size */
 	if (KMALLOC_MIN_SIZE <= 32) {
 		kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
 		caches++;
 	}
 	if (KMALLOC_MIN_SIZE <= 64) {
 		kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
 		caches++;
 	}
 	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
 		kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
 		caches++;
 	}
 	slab_state = UP;
 	/* Provide the correct kmalloc names now that the caches are up */
 	if (KMALLOC_MIN_SIZE <= 32) {
 		kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[1]->name);
 	}
 	if (KMALLOC_MIN_SIZE <= 64) {
 		kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[2]->name);
 	}
 	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
 		char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
 		BUG_ON(!s);
 		kmalloc_caches[i]->name = s;
 	}
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&slab_notifier);
 #endif
 #ifdef CONFIG_ZONE_DMA
 	for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
 		struct kmem_cache *s = kmalloc_caches[i];
 		if (s && s->size) {
 			char *name = kasprintf(GFP_NOWAIT,
 				 "dma-kmalloc-%d", s->objsize);
 			BUG_ON(!name);
 			kmalloc_dma_caches[i] = create_kmalloc_cache(name,
 				s->objsize, SLAB_CACHE_DMA);
 		}
 	}
 #endif
 	printk(KERN_INFO
 		"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
 		" CPUs=%d, Nodes=%d\n",
 		caches, cache_line_size(),
 		slub_min_order, slub_max_order, slub_min_objects,
 		nr_cpu_ids, nr_node_ids);
 }
 void __init kmem_cache_init_late(void)
 {
 }
 /*
  * Find a mergeable slab cache
  */
 static int slab_unmergeable(struct kmem_cache *s)
 {
 	if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
 		return 1;
 	if (s->ctor)
 		return 1;
 	/*
 	 * We may have set a slab to be unmergeable during bootstrap.
 	 */
 	if (s->refcount < 0)
 		return 1;
 	return 0;
 }
 static struct kmem_cache *find_mergeable(size_t size,
 		size_t align, unsigned long flags, const char *name,
 		void (*ctor)(void *))
 {
 	struct kmem_cache *s;
 	if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
 		return NULL;
 	if (ctor)
 		return NULL;
 	size = ALIGN(size, sizeof(void *));
 	align = calculate_alignment(flags, align, size);
 	size = ALIGN(size, align);
 	flags = kmem_cache_flags(size, flags, name, NULL);
 	list_for_each_entry(s, &slab_caches, list) {
 		if (slab_unmergeable(s))
 			continue;
 		if (size > s->size)
 			continue;
 		if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
 				continue;
 		/*
 		 * Check if alignment is compatible.
 		 * Courtesy of Adrian Drzewiecki
 		 */
 		if ((s->size & ~(align - 1)) != s->size)
 			continue;
 		if (s->size - size >= sizeof(void *))
 			continue;
 		return s;
 	}
 	return NULL;
 }
 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 		size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *s;
 	char *n;
 	if (WARN_ON(!name))
 		return NULL;
 	down_write(&slub_lock);
 	s = find_mergeable(size, align, flags, name, ctor);
 	if (s) {
 		s->refcount++;
 		/*
 		 * Adjust the object sizes so that we clear
 		 * the complete object on kzalloc.
 		 */
 		s->objsize = max(s->objsize, (int)size);
 		s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
 		if (sysfs_slab_alias(s, name)) {
 			s->refcount--;
 			goto err;
 		}
 		up_write(&slub_lock);
 		return s;
 	}
 	n = kstrdup(name, GFP_KERNEL);
 	if (!n)
 		goto err;
 	s = kmalloc(kmem_size, GFP_KERNEL);
 	if (s) {
 		if (kmem_cache_open(s, n,
 				size, align, flags, ctor)) {
 			list_add(&s->list, &slab_caches);
+			up_write(&slub_lock);
 			if (sysfs_slab_add(s)) {
+				down_write(&slub_lock);
 				list_del(&s->list);
 				kfree(n);
 				kfree(s);
 				goto err;
 			}
-			up_write(&slub_lock);
 			return s;
 		}
 		kfree(n);
 		kfree(s);
 	}
 err:
 	up_write(&slub_lock);
 	if (flags & SLAB_PANIC)
 		panic("Cannot create slabcache %s\n", name);
 	else
 		s = NULL;
 	return s;
 }
 EXPORT_SYMBOL(kmem_cache_create);
 #ifdef CONFIG_SMP
 /*
  * Use the cpu notifier to insure that the cpu slabs are flushed when
  * necessary.
  */
 static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
 		unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
 	struct kmem_cache *s;
 	unsigned long flags;
 	switch (action) {
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		down_read(&slub_lock);
 		list_for_each_entry(s, &slab_caches, list) {
 			local_irq_save(flags);
 			__flush_cpu_slab(s, cpu);
 			local_irq_restore(flags);
 		}
 		up_read(&slub_lock);
 		break;
 	default:
 		break;
 	}
 	return NOTIFY_OK;
 }
 static struct notifier_block __cpuinitdata slab_notifier = {
 	.notifier_call = slab_cpuup_callback
 };
 #endif
 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 {
 	struct kmem_cache *s;
 	void *ret;
 	if (unlikely(size > SLUB_MAX_SIZE))
 		return kmalloc_large(size, gfpflags);
 	s = get_slab(size, gfpflags);
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 	ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
 	/* Honor the call site pointer we received. */
 	trace_kmalloc(caller, ret, size, s->size, gfpflags);
 	return ret;
 }
 #ifdef CONFIG_NUMA
 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 					int node, unsigned long caller)
 {
 	struct kmem_cache *s;
 	void *ret;
 	if (unlikely(size > SLUB_MAX_SIZE)) {
 		ret = kmalloc_large_node(size, gfpflags, node);
 		trace_kmalloc_node(caller, ret,
 				   size, PAGE_SIZE << get_order(size),
 				   gfpflags, node);
 		return ret;
 	}
 	s = get_slab(size, gfpflags);
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 	ret = slab_alloc(s, gfpflags, node, caller);
 	/* Honor the call site pointer we received. */
 	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
 	return ret;
 }
 #endif
 #ifdef CONFIG_SYSFS
 static int count_inuse(struct page *page)
 {
 	return page->inuse;
 }
 static int count_total(struct page *page)
 {
 	return page->objects;
 }
 #endif
 #ifdef CONFIG_SLUB_DEBUG
 static int validate_slab(struct kmem_cache *s, struct page *page,
 						unsigned long *map)
 {
 	void *p;
 	void *addr = page_address(page);
 	if (!check_slab(s, page) ||
 			!on_freelist(s, page, NULL))
 		return 0;
 	/* Now we know that a valid freelist exists */
 	bitmap_zero(map, page->objects);
 	get_map(s, page, map);
 	for_each_object(p, s, addr, page->objects) {
 		if (test_bit(slab_index(p, s, addr), map))
 			if (!check_object(s, page, p, SLUB_RED_INACTIVE))
 				return 0;
 	}
 	for_each_object(p, s, addr, page->objects)
 		if (!test_bit(slab_index(p, s, addr), map))
 			if (!check_object(s, page, p, SLUB_RED_ACTIVE))
 				return 0;
 	return 1;
 }
 static void validate_slab_slab(struct kmem_cache *s, struct page *page,
 						unsigned long *map)
 {
 	slab_lock(page);
 	validate_slab(s, page, map);
 	slab_unlock(page);
 }
 static int validate_slab_node(struct kmem_cache *s,
 		struct kmem_cache_node *n, unsigned long *map)
 {
 	unsigned long count = 0;
 	struct page *page;
 	unsigned long flags;
 	spin_lock_irqsave(&n->list_lock, flags);
 	list_for_each_entry(page, &n->partial, lru) {
 		validate_slab_slab(s, page, map);
 		count++;
 	}
 	if (count != n->nr_partial)
 		printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
 			"counter=%ld\n", s->name, count, n->nr_partial);
 	if (!(s->flags & SLAB_STORE_USER))
 		goto out;
 	list_for_each_entry(page, &n->full, lru) {
 		validate_slab_slab(s, page, map);
 		count++;
 	}
 	if (count != atomic_long_read(&n->nr_slabs))
 		printk(KERN_ERR "SLUB: %s %ld slabs counted but "
 			"counter=%ld\n", s->name, count,
 			atomic_long_read(&n->nr_slabs));
 out:
 	spin_unlock_irqrestore(&n->list_lock, flags);
 	return count;
 }
 static long validate_slab_cache(struct kmem_cache *s)
 {
 	int node;
 	unsigned long count = 0;
 	unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
 				sizeof(unsigned long), GFP_KERNEL);
 	if (!map)
 		return -ENOMEM;
 	flush_all(s);
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 		count += validate_slab_node(s, n, map);
 	}
 	kfree(map);
 	return count;
 }
 /*
  * Generate lists of code addresses where slabcache objects are allocated
  * and freed.
  */
 struct location {
 	unsigned long count;
 	unsigned long addr;
 	long long sum_time;
 	long min_time;
 	long max_time;
 	long min_pid;
 	long max_pid;
 	DECLARE_BITMAP(cpus, NR_CPUS);
 	nodemask_t nodes;
 };
 struct loc_track {
 	unsigned long max;
 	unsigned long count;
 	struct location *loc;
 };
 static void free_loc_track(struct loc_track *t)
 {
 	if (t->max)
 		free_pages((unsigned long)t->loc,
 			get_order(sizeof(struct location) * t->max));
 }
 static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
 {
 	struct location *l;
 	int order;
 	order = get_order(sizeof(struct location) * max);
 	l = (void *)__get_free_pages(flags, order);
 	if (!l)
 		return 0;
 	if (t->count) {
 		memcpy(l, t->loc, sizeof(struct location) * t->count);
 		free_loc_track(t);
 	}
 	t->max = max;
 	t->loc = l;
 	return 1;
 }
 static int add_location(struct loc_track *t, struct kmem_cache *s,
 				const struct track *track)
 {
 	long start, end, pos;
 	struct location *l;
 	unsigned long caddr;
 	unsigned long age = jiffies - track->when;
 	start = -1;
 	end = t->count;
 	for ( ; ; ) {
 		pos = start + (end - start + 1) / 2;
 		/*
 		 * There is nothing at "end". If we end up there
 		 * we need to add something to before end.
 		 */
 		if (pos == end)
 			break;
 		caddr = t->loc[pos].addr;
 		if (track->addr == caddr) {
 			l = &t->loc[pos];
 			l->count++;
 			if (track->when) {
 				l->sum_time += age;
 				if (age < l->min_time)
 					l->min_time = age;
 				if (age > l->max_time)
 					l->max_time = age;
 				if (track->pid < l->min_pid)
 					l->min_pid = track->pid;
 				if (track->pid > l->max_pid)
 					l->max_pid = track->pid;
 				cpumask_set_cpu(track->cpu,
 						to_cpumask(l->cpus));
 			}
 			node_set(page_to_nid(virt_to_page(track)), l->nodes);
 			return 1;
 		}
 		if (track->addr < caddr)
 			end = pos;
 		else
 			start = pos;
 	}
 	/*
 	 * Not found. Insert new tracking element.
 	 */
 	if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
 		return 0;
 	l = t->loc + pos;
 	if (pos < t->count)
 		memmove(l + 1, l,
 			(t->count - pos) * sizeof(struct location));
 	t->count++;
 	l->count = 1;
 	l->addr = track->addr;
 	l->sum_time = age;
 	l->min_time = age;
 	l->max_time = age;
 	l->min_pid = track->pid;
 	l->max_pid = track->pid;
 	cpumask_clear(to_cpumask(l->cpus));
 	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
 	nodes_clear(l->nodes);
 	node_set(page_to_nid(virt_to_page(track)), l->nodes);
 	return 1;
 }
 static void process_slab(struct loc_track *t, struct kmem_cache *s,
 		struct page *page, enum track_item alloc,
 		unsigned long *map)
 {
 	void *addr = page_address(page);
 	void *p;
 	bitmap_zero(map, page->objects);
 	get_map(s, page, map);
 	for_each_object(p, s, addr, page->objects)
 		if (!test_bit(slab_index(p, s, addr), map))
 			add_location(t, s, get_track(s, p, alloc));
 }
 static int list_locations(struct kmem_cache *s, char *buf,
 					enum track_item alloc)
 {
 	int len = 0;
 	unsigned long i;
 	struct loc_track t = { 0, 0, NULL };
 	int node;
 	unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
 				     sizeof(unsigned long), GFP_KERNEL);
 	if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
 				     GFP_TEMPORARY)) {
 		kfree(map);
 		return sprintf(buf, "Out of memory\n");
 	}
 	/* Push back cpu slabs */
 	flush_all(s);
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 		unsigned long flags;
 		struct page *page;
 		if (!atomic_long_read(&n->nr_slabs))
 			continue;
 		spin_lock_irqsave(&n->list_lock, flags);
 		list_for_each_entry(page, &n->partial, lru)
 			process_slab(&t, s, page, alloc, map);
 		list_for_each_entry(page, &n->full, lru)
 			process_slab(&t, s, page, alloc, map);
 		spin_unlock_irqrestore(&n->list_lock, flags);
 	}
 	for (i = 0; i < t.count; i++) {
 		struct location *l = &t.loc[i];
 		if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
 			break;
 		len += sprintf(buf + len, "%7ld ", l->count);
 		if (l->addr)
 			len += sprintf(buf + len, "%pS", (void *)l->addr);
 		else
 			len += sprintf(buf + len, "<not-available>");
 		if (l->sum_time != l->min_time) {
 			len += sprintf(buf + len, " age=%ld/%ld/%ld",
 				l->min_time,
 				(long)div_u64(l->sum_time, l->count),
 				l->max_time);
 		} else
 			len += sprintf(buf + len, " age=%ld",
 				l->min_time);
 		if (l->min_pid != l->max_pid)
 			len += sprintf(buf + len, " pid=%ld-%ld",
 				l->min_pid, l->max_pid);
 		else
 			len += sprintf(buf + len, " pid=%ld",
 				l->min_pid);
 		if (num_online_cpus() > 1 &&
 				!cpumask_empty(to_cpumask(l->cpus)) &&
 				len < PAGE_SIZE - 60) {
 			len += sprintf(buf + len, " cpus=");
 			len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
 						 to_cpumask(l->cpus));
 		}
 		if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
 				len < PAGE_SIZE - 60) {
 			len += sprintf(buf + len, " nodes=");
 			len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
 					l->nodes);
 		}
 		len += sprintf(buf + len, "\n");
 	}
 	free_loc_track(&t);
 	kfree(map);
 	if (!t.count)
 		len += sprintf(buf, "No data\n");
 	return len;
 }
 #endif
 #ifdef SLUB_RESILIENCY_TEST
 static void resiliency_test(void)
 {
 	u8 *p;
 	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
 	printk(KERN_ERR "SLUB resiliency testing\n");
 	printk(KERN_ERR "-----------------------\n");
 	printk(KERN_ERR "A. Corruption after allocation\n");
 	p = kzalloc(16, GFP_KERNEL);
 	p[16] = 0x12;
 	printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
 			" 0x12->0x%p\n\n", p + 16);
 	validate_slab_cache(kmalloc_caches[4]);
 	/* Hmmm... The next two are dangerous */
 	p = kzalloc(32, GFP_KERNEL);
 	p[32 + sizeof(void *)] = 0x34;
 	printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
 			" 0x34 -> -0x%p\n", p);
 	printk(KERN_ERR
 		"If allocated object is overwritten then not detectable\n\n");
 	validate_slab_cache(kmalloc_caches[5]);
 	p = kzalloc(64, GFP_KERNEL);
 	p += 64 + (get_cycles() & 0xff) * sizeof(void *);
 	*p = 0x56;
 	printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
 									p);
 	printk(KERN_ERR
 		"If allocated object is overwritten then not detectable\n\n");
 	validate_slab_cache(kmalloc_caches[6]);
 	printk(KERN_ERR "\nB. Corruption after free\n");
 	p = kzalloc(128, GFP_KERNEL);
 	kfree(p);
 	*p = 0x78;
 	printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
 	validate_slab_cache(kmalloc_caches[7]);
 	p = kzalloc(256, GFP_KERNEL);
 	kfree(p);
 	p[50] = 0x9a;
 	printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
 			p);
 	validate_slab_cache(kmalloc_caches[8]);
 	p = kzalloc(512, GFP_KERNEL);
 	kfree(p);
 	p[512] = 0xab;
 	printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
 	validate_slab_cache(kmalloc_caches[9]);
 }
 #else
 #ifdef CONFIG_SYSFS
 static void resiliency_test(void) {};
 #endif
 #endif
 #ifdef CONFIG_SYSFS
 enum slab_stat_type {
 	SL_ALL,			/* All slabs */
 	SL_PARTIAL,		/* Only partially allocated slabs */
 	SL_CPU,			/* Only slabs used for cpu caches */
 	SL_OBJECTS,		/* Determine allocated objects not slabs */
 	SL_TOTAL		/* Determine object capacity not slabs */
 };
 #define SO_ALL		(1 << SL_ALL)
 #define SO_PARTIAL	(1 << SL_PARTIAL)
 #define SO_CPU		(1 << SL_CPU)
 #define SO_OBJECTS	(1 << SL_OBJECTS)
 #define SO_TOTAL	(1 << SL_TOTAL)
 static ssize_t show_slab_objects(struct kmem_cache *s,
 			    char *buf, unsigned long flags)
 {
 	unsigned long total = 0;
 	int node;
 	int x;
 	unsigned long *nodes;
 	unsigned long *per_cpu;
 	nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
 	if (!nodes)
 		return -ENOMEM;
 	per_cpu = nodes + nr_node_ids;
 	if (flags & SO_CPU) {
 		int cpu;
 		for_each_possible_cpu(cpu) {
 			struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 			int node = ACCESS_ONCE(c->node);
 			struct page *page;
 			if (node < 0)
 				continue;
 			page = ACCESS_ONCE(c->page);
 			if (page) {
 				if (flags & SO_TOTAL)
 					x = page->objects;
 				else if (flags & SO_OBJECTS)
 					x = page->inuse;
 				else
 					x = 1;
 				total += x;
 				nodes[node] += x;
 			}
 			page = c->partial;
 			if (page) {
 				x = page->pobjects;
 				total += x;
 				nodes[node] += x;
 			}
 			per_cpu[node]++;
 		}
 	}
 	lock_memory_hotplug();
 #ifdef CONFIG_SLUB_DEBUG
 	if (flags & SO_ALL) {
 		for_each_node_state(node, N_NORMAL_MEMORY) {
 			struct kmem_cache_node *n = get_node(s, node);
 		if (flags & SO_TOTAL)
 			x = atomic_long_read(&n->total_objects);
 		else if (flags & SO_OBJECTS)
 			x = atomic_long_read(&n->total_objects) -
 				count_partial(n, count_free);
 			else
 				x = atomic_long_read(&n->nr_slabs);
 			total += x;
 			nodes[node] += x;
 		}
 	} else
 #endif
 	if (flags & SO_PARTIAL) {
 		for_each_node_state(node, N_NORMAL_MEMORY) {
 			struct kmem_cache_node *n = get_node(s, node);
 			if (flags & SO_TOTAL)
 				x = count_partial(n, count_total);
 			else if (flags & SO_OBJECTS)
 				x = count_partial(n, count_inuse);
 			else
 				x = n->nr_partial;
 			total += x;
 			nodes[node] += x;
 		}
 	}
 	x = sprintf(buf, "%lu", total);
 #ifdef CONFIG_NUMA
 	for_each_node_state(node, N_NORMAL_MEMORY)
 		if (nodes[node])
 			x += sprintf(buf + x, " N%d=%lu",
 					node, nodes[node]);
 #endif
 	unlock_memory_hotplug();
 	kfree(nodes);
 	return x + sprintf(buf + x, "\n");
 }
 #ifdef CONFIG_SLUB_DEBUG
 static int any_slab_objects(struct kmem_cache *s)
 {
 	int node;
 	for_each_online_node(node) {
 		struct kmem_cache_node *n = get_node(s, node);
 		if (!n)
 			continue;
 		if (atomic_long_read(&n->total_objects))
 			return 1;
 	}
 	return 0;
 }
 #endif
 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
 #define to_slab(n) container_of(n, struct kmem_cache, kobj)
 struct slab_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct kmem_cache *s, char *buf);
 	ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
 };
 #define SLAB_ATTR_RO(_name) \
 	static struct slab_attribute _name##_attr = \
 	__ATTR(_name, 0400, _name##_show, NULL)
 #define SLAB_ATTR(_name) \
 	static struct slab_attribute _name##_attr =  \
 	__ATTR(_name, 0600, _name##_show, _name##_store)
 static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", s->size);
 }
 SLAB_ATTR_RO(slab_size);
 static ssize_t align_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", s->align);
 }
 SLAB_ATTR_RO(align);
 static ssize_t object_size_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", s->objsize);
 }
 SLAB_ATTR_RO(object_size);
 static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", oo_objects(s->oo));
 }
 SLAB_ATTR_RO(objs_per_slab);
 static ssize_t order_store(struct kmem_cache *s,
 				const char *buf, size_t length)
 {
 	unsigned long order;
 	int err;
 	err = strict_strtoul(buf, 10, &order);
 	if (err)
 		return err;
 	if (order > slub_max_order || order < slub_min_order)
 		return -EINVAL;
 	calculate_sizes(s, order);
 	return length;
 }
 static ssize_t order_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", oo_order(s->oo));
 }
 SLAB_ATTR(order);
 static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%lu\n", s->min_partial);
 }
 static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
 				 size_t length)
 {
 	unsigned long min;
 	int err;
 	err = strict_strtoul(buf, 10, &min);
 	if (err)
 		return err;
 	set_min_partial(s, min);
 	return length;
 }
 SLAB_ATTR(min_partial);
 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%u\n", s->cpu_partial);
 }
 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
 				 size_t length)
 {
 	unsigned long objects;
 	int err;
 	err = strict_strtoul(buf, 10, &objects);
 	if (err)
 		return err;
 	s->cpu_partial = objects;
 	flush_all(s);
 	return length;
 }
 SLAB_ATTR(cpu_partial);
 static ssize_t ctor_show(struct kmem_cache *s, char *buf)
 {
 	if (!s->ctor)
 		return 0;
 	return sprintf(buf, "%pS\n", s->ctor);
 }
 SLAB_ATTR_RO(ctor);
 static ssize_t aliases_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", s->refcount - 1);
 }
 SLAB_ATTR_RO(aliases);
 static ssize_t partial_show(struct kmem_cache *s, char *buf)
 {
 	return show_slab_objects(s, buf, SO_PARTIAL);
 }
 SLAB_ATTR_RO(partial);
 static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
 {
 	return show_slab_objects(s, buf, SO_CPU);
 }
 SLAB_ATTR_RO(cpu_slabs);
 static ssize_t objects_show(struct kmem_cache *s, char *buf)
 {
 	return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
 }
 SLAB_ATTR_RO(objects);
 static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
 {
 	return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
 }
 SLAB_ATTR_RO(objects_partial);
 static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
 {
 	int objects = 0;
 	int pages = 0;
 	int cpu;
 	int len;
 	for_each_online_cpu(cpu) {
 		struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
 		if (page) {
 			pages += page->pages;
 			objects += page->pobjects;
 		}
 	}
 	len = sprintf(buf, "%d(%d)", objects, pages);
 #ifdef CONFIG_SMP
 	for_each_online_cpu(cpu) {
 		struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
 		if (page && len < PAGE_SIZE - 20)
 			len += sprintf(buf + len, " C%d=%d(%d)", cpu,
 				page->pobjects, page->pages);
 	}
 #endif
 	return len + sprintf(buf + len, "\n");
 }
 SLAB_ATTR_RO(slabs_cpu_partial);
 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
 }
 static ssize_t reclaim_account_store(struct kmem_cache *s,
 				const char *buf, size_t length)
 {
 	s->flags &= ~SLAB_RECLAIM_ACCOUNT;
 	if (buf[0] == '1')
 		s->flags |= SLAB_RECLAIM_ACCOUNT;
 	return length;
 }
 SLAB_ATTR(reclaim_account);
 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
 }
 SLAB_ATTR_RO(hwcache_align);
 #ifdef CONFIG_ZONE_DMA
 static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
 }
 SLAB_ATTR_RO(cache_dma);
 #endif
 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
 }
 SLAB_ATTR_RO(destroy_by_rcu);
 static ssize_t reserved_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", s->reserved);
 }
 SLAB_ATTR_RO(reserved);
 #ifdef CONFIG_SLUB_DEBUG
 static ssize_t slabs_show(struct kmem_cache *s, char *buf)
 {
 	return show_slab_objects(s, buf, SO_ALL);
 }
 SLAB_ATTR_RO(slabs);
 static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
 {
 	return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
 }
 SLAB_ATTR_RO(total_objects);
 static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
 }
 static ssize_t sanity_checks_store(struct kmem_cache *s,
 				const char *buf, size_t length)
 {
 	s->flags &= ~SLAB_DEBUG_FREE;
 	if (buf[0] == '1') {
 		s->flags &= ~__CMPXCHG_DOUBLE;
 		s->flags |= SLAB_DEBUG_FREE;
 	}
 	return length;
 }
 SLAB_ATTR(sanity_checks);
 static ssize_t trace_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
 }
 static ssize_t trace_store(struct kmem_cache *s, const char *buf,
 							size_t length)
 {
 	s->flags &= ~SLAB_TRACE;
 	if (buf[0] == '1') {
 		s->flags &= ~__CMPXCHG_DOUBLE;
 		s->flags |= SLAB_TRACE;
 	}
 	return length;
 }
 SLAB_ATTR(trace);
 static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
 }
 static ssize_t red_zone_store(struct kmem_cache *s,
 				const char *buf, size_t length)
 {
 	if (any_slab_objects(s))
 		return -EBUSY;
 	s->flags &= ~SLAB_RED_ZONE;
 	if (buf[0] == '1') {
 		s->flags &= ~__CMPXCHG_DOUBLE;
 		s->flags |= SLAB_RED_ZONE;
 	}
 	calculate_sizes(s, -1);
 	return length;
 }
 SLAB_ATTR(red_zone);
 static ssize_t poison_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
 }
 static ssize_t poison_store(struct kmem_cache *s,
 				const char *buf, size_t length)
 {
 	if (any_slab_objects(s))
 		return -EBUSY;
 	s->flags &= ~SLAB_POISON;
 	if (buf[0] == '1') {
 		s->flags &= ~__CMPXCHG_DOUBLE;
 		s->flags |= SLAB_POISON;
 	}
 	calculate_sizes(s, -1);
 	return length;
 }
 SLAB_ATTR(poison);
 static ssize_t store_user_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
 }
 static ssize_t store_user_store(struct kmem_cache *s,
 				const char *buf, size_t length)
 {
 	if (any_slab_objects(s))
 		return -EBUSY;
 	s->flags &= ~SLAB_STORE_USER;
 	if (buf[0] == '1') {
 		s->flags &= ~__CMPXCHG_DOUBLE;
 		s->flags |= SLAB_STORE_USER;
 	}
 	calculate_sizes(s, -1);
 	return length;
 }
 SLAB_ATTR(store_user);
 static ssize_t validate_show(struct kmem_cache *s, char *buf)
 {
 	return 0;
 }
 static ssize_t validate_store(struct kmem_cache *s,
 			const char *buf, size_t length)
 {
 	int ret = -EINVAL;
 	if (buf[0] == '1') {
 		ret = validate_slab_cache(s);
 		if (ret >= 0)
 			ret = length;
 	}
 	return ret;
 }
 SLAB_ATTR(validate);
 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
 {
 	if (!(s->flags & SLAB_STORE_USER))
 		return -ENOSYS;
 	return list_locations(s, buf, TRACK_ALLOC);
 }
 SLAB_ATTR_RO(alloc_calls);
 static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
 {
 	if (!(s->flags & SLAB_STORE_USER))
 		return -ENOSYS;
 	return list_locations(s, buf, TRACK_FREE);
 }
 SLAB_ATTR_RO(free_calls);
 #endif /* CONFIG_SLUB_DEBUG */
 #ifdef CONFIG_FAILSLAB
 static ssize_t failslab_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
 }
 static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
 							size_t length)
 {
 	s->flags &= ~SLAB_FAILSLAB;
 	if (buf[0] == '1')
 		s->flags |= SLAB_FAILSLAB;
 	return length;
 }
 SLAB_ATTR(failslab);
 #endif
 static ssize_t shrink_show(struct kmem_cache *s, char *buf)
 {
 	return 0;
 }
 static ssize_t shrink_store(struct kmem_cache *s,
 			const char *buf, size_t length)
 {
 	if (buf[0] == '1') {
 		int rc = kmem_cache_shrink(s);
 		if (rc)
 			return rc;
 	} else
 		return -EINVAL;
 	return length;
 }
 SLAB_ATTR(shrink);
 #ifdef CONFIG_NUMA
 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
 }
 static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
 				const char *buf, size_t length)
 {
 	unsigned long ratio;
 	int err;
 	err = strict_strtoul(buf, 10, &ratio);
 	if (err)
 		return err;
 	if (ratio <= 100)
 		s->remote_node_defrag_ratio = ratio * 10;
 	return length;
 }
 SLAB_ATTR(remote_node_defrag_ratio);
 #endif
 #ifdef CONFIG_SLUB_STATS
 static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
 {
 	unsigned long sum  = 0;
 	int cpu;
 	int len;
 	int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 	for_each_online_cpu(cpu) {
 		unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
 		data[cpu] = x;
 		sum += x;
 	}
 	len = sprintf(buf, "%lu", sum);
 #ifdef CONFIG_SMP
 	for_each_online_cpu(cpu) {
 		if (data[cpu] && len < PAGE_SIZE - 20)
 			len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
 	}
 #endif
 	kfree(data);
 	return len + sprintf(buf + len, "\n");
 }
 static void clear_stat(struct kmem_cache *s, enum stat_item si)
 {
 	int cpu;
 	for_each_online_cpu(cpu)
 		per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
 }
 #define STAT_ATTR(si, text) 					\
 static ssize_t text##_show(struct kmem_cache *s, char *buf)	\
 {								\
 	return show_stat(s, buf, si);				\
 }								\
 static ssize_t text##_store(struct kmem_cache *s,		\
 				const char *buf, size_t length)	\
 {								\
 	if (buf[0] != '0')					\
 		return -EINVAL;					\
 	clear_stat(s, si);					\
 	return length;						\
 }								\
 SLAB_ATTR(text);						\
 STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
 STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
 STAT_ATTR(FREE_FASTPATH, free_fastpath);
 STAT_ATTR(FREE_SLOWPATH, free_slowpath);
 STAT_ATTR(FREE_FROZEN, free_frozen);
 STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
 STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
 STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
 STAT_ATTR(ALLOC_SLAB, alloc_slab);
 STAT_ATTR(ALLOC_REFILL, alloc_refill);
 STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
 STAT_ATTR(FREE_SLAB, free_slab);
 STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
 STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
 STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
 STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
 STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
 STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
 STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
 STAT_ATTR(ORDER_FALLBACK, order_fallback);
 STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
 STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
 STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
 STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
 #endif
 static struct attribute *slab_attrs[] = {
 	&slab_size_attr.attr,
 	&object_size_attr.attr,
 	&objs_per_slab_attr.attr,
 	&order_attr.attr,
 	&min_partial_attr.attr,
 	&cpu_partial_attr.attr,
 	&objects_attr.attr,
 	&objects_partial_attr.attr,
 	&partial_attr.attr,
 	&cpu_slabs_attr.attr,
 	&ctor_attr.attr,
 	&aliases_attr.attr,
 	&align_attr.attr,
 	&hwcache_align_attr.attr,
 	&reclaim_account_attr.attr,
 	&destroy_by_rcu_attr.attr,
 	&shrink_attr.attr,
 	&reserved_attr.attr,
 	&slabs_cpu_partial_attr.attr,
 #ifdef CONFIG_SLUB_DEBUG
 	&total_objects_attr.attr,
 	&slabs_attr.attr,
 	&sanity_checks_attr.attr,
 	&trace_attr.attr,
 	&red_zone_attr.attr,
 	&poison_attr.attr,
 	&store_user_attr.attr,
 	&validate_attr.attr,
 	&alloc_calls_attr.attr,
 	&free_calls_attr.attr,
 #endif
 #ifdef CONFIG_ZONE_DMA
 	&cache_dma_attr.attr,
 #endif
 #ifdef CONFIG_NUMA
 	&remote_node_defrag_ratio_attr.attr,
 #endif
 #ifdef CONFIG_SLUB_STATS
 	&alloc_fastpath_attr.attr,
 	&alloc_slowpath_attr.attr,
 	&free_fastpath_attr.attr,
 	&free_slowpath_attr.attr,
 	&free_frozen_attr.attr,
 	&free_add_partial_attr.attr,
 	&free_remove_partial_attr.attr,
 	&alloc_from_partial_attr.attr,
 	&alloc_slab_attr.attr,
 	&alloc_refill_attr.attr,
 	&alloc_node_mismatch_attr.attr,
 	&free_slab_attr.attr,
 	&cpuslab_flush_attr.attr,
 	&deactivate_full_attr.attr,
 	&deactivate_empty_attr.attr,
 	&deactivate_to_head_attr.attr,
 	&deactivate_to_tail_attr.attr,
 	&deactivate_remote_frees_attr.attr,
 	&deactivate_bypass_attr.attr,
 	&order_fallback_attr.attr,
 	&cmpxchg_double_fail_attr.attr,
 	&cmpxchg_double_cpu_fail_attr.attr,
 	&cpu_partial_alloc_attr.attr,
 	&cpu_partial_free_attr.attr,
 #endif
 #ifdef CONFIG_FAILSLAB
 	&failslab_attr.attr,
 #endif
 	NULL
 };
 static struct attribute_group slab_attr_group = {
 	.attrs = slab_attrs,
 };
 static ssize_t slab_attr_show(struct kobject *kobj,
 				struct attribute *attr,
 				char *buf)
 {
 	struct slab_attribute *attribute;
 	struct kmem_cache *s;
 	int err;
 	attribute = to_slab_attr(attr);
 	s = to_slab(kobj);
 	if (!attribute->show)
 		return -EIO;
 	err = attribute->show(s, buf);
 	return err;
 }
 static ssize_t slab_attr_store(struct kobject *kobj,
 				struct attribute *attr,
 				const char *buf, size_t len)
 {
 	struct slab_attribute *attribute;
 	struct kmem_cache *s;
 	int err;
 	attribute = to_slab_attr(attr);
 	s = to_slab(kobj);
 	if (!attribute->store)
 		return -EIO;
 	err = attribute->store(s, buf, len);
 	return err;
 }
 static void kmem_cache_release(struct kobject *kobj)
 {
 	struct kmem_cache *s = to_slab(kobj);
 	kfree(s->name);
 	kfree(s);
 }
 static const struct sysfs_ops slab_sysfs_ops = {
 	.show = slab_attr_show,
 	.store = slab_attr_store,
 };
 static struct kobj_type slab_ktype = {
 	.sysfs_ops = &slab_sysfs_ops,
 	.release = kmem_cache_release
 };
 static int uevent_filter(struct kset *kset, struct kobject *kobj)
 {
 	struct kobj_type *ktype = get_ktype(kobj);
 	if (ktype == &slab_ktype)
 		return 1;
 	return 0;
 }
 static const struct kset_uevent_ops slab_uevent_ops = {
 	.filter = uevent_filter,
 };
 static struct kset *slab_kset;
 #define ID_STR_LENGTH 64
 /* Create a unique string id for a slab cache:
  *
  * Format	:[flags-]size
  */
 static char *create_unique_id(struct kmem_cache *s)
 {
 	char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
 	char *p = name;
 	BUG_ON(!name);
 	*p++ = ':';
 	/*
 	 * First flags affecting slabcache operations. We will only
 	 * get here for aliasable slabs so we do not need to support
 	 * too many flags. The flags here must cover all flags that
 	 * are matched during merging to guarantee that the id is
 	 * unique.
 	 */
 	if (s->flags & SLAB_CACHE_DMA)
 		*p++ = 'd';
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		*p++ = 'a';
 	if (s->flags & SLAB_DEBUG_FREE)
 		*p++ = 'F';
 	if (!(s->flags & SLAB_NOTRACK))
 		*p++ = 't';
 	if (p != name + 1)
 		*p++ = '-';
 	p += sprintf(p, "%07d", s->size);
 	BUG_ON(p > name + ID_STR_LENGTH - 1);
 	return name;
 }
 static int sysfs_slab_add(struct kmem_cache *s)
 {
 	int err;
 	const char *name;
 	int unmergeable;
 	if (slab_state < SYSFS)
 		/* Defer until later */
 		return 0;
 	unmergeable = slab_unmergeable(s);
 	if (unmergeable) {
 		/*
 		 * Slabcache can never be merged so we can use the name proper.
 		 * This is typically the case for debug situations. In that
 		 * case we can catch duplicate names easily.
 		 */
 		sysfs_remove_link(&slab_kset->kobj, s->name);
 		name = s->name;
 	} else {
 		/*
 		 * Create a unique name for the slab as a target
 		 * for the symlinks.
 		 */
 		name = create_unique_id(s);
 	}
 	s->kobj.kset = slab_kset;
 	err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
 	if (err) {
 		kobject_put(&s->kobj);
 		return err;
 	}
 	err = sysfs_create_group(&s->kobj, &slab_attr_group);
 	if (err) {
 		kobject_del(&s->kobj);
 		kobject_put(&s->kobj);
 		return err;
 	}
 	kobject_uevent(&s->kobj, KOBJ_ADD);
 	if (!unmergeable) {
 		/* Setup first alias */
 		sysfs_slab_alias(s, s->name);
 		kfree(name);
 	}
 	return 0;
 }
 static void sysfs_slab_remove(struct kmem_cache *s)
 {
 	if (slab_state < SYSFS)
 		/*
 		 * Sysfs has not been setup yet so no need to remove the
 		 * cache from sysfs.
 		 */
 		return;
 	kobject_uevent(&s->kobj, KOBJ_REMOVE);
 	kobject_del(&s->kobj);
 	kobject_put(&s->kobj);
 }
 /*
  * Need to buffer aliases during bootup until sysfs becomes
  * available lest we lose that information.
  */
 struct saved_alias {
 	struct kmem_cache *s;
 	const char *name;
 	struct saved_alias *next;
 };
 static struct saved_alias *alias_list;
 static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
 {
 	struct saved_alias *al;
 	if (slab_state == SYSFS) {
 		/*
 		 * If we have a leftover link then remove it.
 		 */
 		sysfs_remove_link(&slab_kset->kobj, name);
 		return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
 	}
 	al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
 	if (!al)
 		return -ENOMEM;
 	al->s = s;
 	al->name = name;
 	al->next = alias_list;
 	alias_list = al;
 	return 0;
 }
 static int __init slab_sysfs_init(void)
 {
 	struct kmem_cache *s;
 	int err;
 	down_write(&slub_lock);
 	slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
 	if (!slab_kset) {
 		up_write(&slub_lock);
 		printk(KERN_ERR "Cannot register slab subsystem.\n");
 		return -ENOSYS;
 	}
 	slab_state = SYSFS;
 	list_for_each_entry(s, &slab_caches, list) {
 		err = sysfs_slab_add(s);
 		if (err)
 			printk(KERN_ERR "SLUB: Unable to add boot slab %s"
 						" to sysfs\n", s->name);
 	}
 	while (alias_list) {
 		struct saved_alias *al = alias_list;
 		alias_list = alias_list->next;
 		err = sysfs_slab_alias(al->s, al->name);
 		if (err)
 			printk(KERN_ERR "SLUB: Unable to add boot slab alias"
 					" %s to sysfs\n", s->name);
 		kfree(al);
 	}
 	up_write(&slub_lock);
 	resiliency_test();
 	return 0;
 }
 __initcall(slab_sysfs_init);
 #endif /* CONFIG_SYSFS */
 /*
  * The /proc/slabinfo ABI
  */
 #ifdef CONFIG_SLABINFO
 static void print_slabinfo_header(struct seq_file *m)
 {
 	seq_puts(m, "slabinfo - version: 2.1\n");
 	seq_puts(m, "# name            <active_objs> <num_objs> <objsize> "
 		 "<objperslab> <pagesperslab>");
 	seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
 	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
 	seq_putc(m, '\n');
 }
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
 	loff_t n = *pos;
 	down_read(&slub_lock);
 	if (!n)
 		print_slabinfo_header(m);
 	return seq_list_start(&slab_caches, *pos);
 }
 static void *s_next(struct seq_file *m, void *p, loff_t *pos)
 {
 	return seq_list_next(p, &slab_caches, pos);
 }
 static void s_stop(struct seq_file *m, void *p)
 {
 	up_read(&slub_lock);
 }
 static int s_show(struct seq_file *m, void *p)
 {
 	unsigned long nr_partials = 0;
 	unsigned long nr_slabs = 0;
 	unsigned long nr_inuse = 0;
 	unsigned long nr_objs = 0;
 	unsigned long nr_free = 0;
 	struct kmem_cache *s;
 	int node;
 	s = list_entry(p, struct kmem_cache, list);
 	for_each_online_node(node) {
 		struct kmem_cache_node *n = get_node(s, node);
 		if (!n)
 			continue;
 		nr_partials += n->nr_partial;
 		nr_slabs += atomic_long_read(&n->nr_slabs);
 		nr_objs += atomic_long_read(&n->total_objects);
 		nr_free += count_partial(n, count_free);
 	}
 	nr_inuse = nr_objs - nr_free;
 	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
 		   nr_objs, s->size, oo_objects(s->oo),
 		   (1 << oo_order(s->oo)));
 	seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
 	seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
 		   0UL);
 	seq_putc(m, '\n');
 	return 0;
 }
 static const struct seq_operations slabinfo_op = {
 	.start = s_start,
 	.next = s_next,
 	.stop = s_stop,
 	.show = s_show,
 };
 static int slabinfo_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &slabinfo_op);
 }
 static const struct file_operations proc_slabinfo_operations = {
 	.open		= slabinfo_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
 static int __init slab_proc_init(void)
 {
 	proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
 	return 0;
 }
 module_init(slab_proc_init);
 #endif /* CONFIG_SLABINFO */