Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

* linux/mm/slab.c

2

* linux/mm/slab.c

3

* Written by Mark Hemment, 1996/97.

3

* Written by Mark Hemment, 1996/97.

4

* (markhe@nextd.demon.co.uk)

4

* (markhe@nextd.demon.co.uk)

5

*

5

*

6

* kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli

6

* kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli

7

*

7

*

8

* Major cleanup, different bufctl logic, per-cpu arrays

8

* Major cleanup, different bufctl logic, per-cpu arrays

9

10

*

10

*

11

* Cleanup, make the head arrays unconditional, preparation for NUMA

11

* Cleanup, make the head arrays unconditional, preparation for NUMA

12

13

*

13

*

14

* An implementation of the Slab Allocator as described in outline in;

14

* An implementation of the Slab Allocator as described in outline in;

15

* UNIX Internals: The New Frontiers by Uresh Vahalia

15

* UNIX Internals: The New Frontiers by Uresh Vahalia

16

* Pub: Prentice Hall ISBN 0-13-101908-2

16

* Pub: Prentice Hall ISBN 0-13-101908-2

17

* or with a little more detail in;

17

* or with a little more detail in;

18

* The Slab Allocator: An Object-Caching Kernel Memory Allocator

18

* The Slab Allocator: An Object-Caching Kernel Memory Allocator

19

* Jeff Bonwick (Sun Microsystems).

19

* Jeff Bonwick (Sun Microsystems).

20

* Presented at: USENIX Summer 1994 Technical Conference

20

* Presented at: USENIX Summer 1994 Technical Conference

21

*

21

*

22

* The memory is organized in caches, one cache for each object type.

22

* The memory is organized in caches, one cache for each object type.

23

* (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)

23

* (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)

24

* Each cache consists out of many slabs (they are small (usually one

24

* Each cache consists out of many slabs (they are small (usually one

25

* page long) and always contiguous), and each slab contains multiple

25

* page long) and always contiguous), and each slab contains multiple

26

* initialized objects.

26

* initialized objects.

27

*

27

*

28

* This means, that your constructor is used only for newly allocated

28

* This means, that your constructor is used only for newly allocated

29

* slabs and you must pass objects with the same initializations to

29

* slabs and you must pass objects with the same initializations to

30

* kmem_cache_free.

30

* kmem_cache_free.

31

*

31

*

32

* Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,

32

* Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,

33

* normal). If you need a special memory type, then must create a new

33

* normal). If you need a special memory type, then must create a new

34

* cache for that memory type.

34

* cache for that memory type.

35

*

35

*

36

* In order to reduce fragmentation, the slabs are sorted in 3 groups:

36

* In order to reduce fragmentation, the slabs are sorted in 3 groups:

37

* full slabs with 0 free objects

37

* full slabs with 0 free objects

38

* partial slabs

38

* partial slabs

39

* empty slabs with no allocated objects

39

* empty slabs with no allocated objects

40

*

40

*

41

* If partial slabs exist, then new allocations come from these slabs,

41

* If partial slabs exist, then new allocations come from these slabs,

42

* otherwise from empty slabs or new slabs are allocated.

42

* otherwise from empty slabs or new slabs are allocated.

43

*

43

*

44

* kmem_cache_destroy() CAN CRASH if you try to allocate from the cache

44

* kmem_cache_destroy() CAN CRASH if you try to allocate from the cache

45

* during kmem_cache_destroy(). The caller must prevent concurrent allocs.

45

* during kmem_cache_destroy(). The caller must prevent concurrent allocs.

46

*

46

*

47

* Each cache has a short per-cpu head array, most allocs

47

* Each cache has a short per-cpu head array, most allocs

48

* and frees go into that array, and if that array overflows, then 1/2

48

* and frees go into that array, and if that array overflows, then 1/2

49

* of the entries in the array are given back into the global cache.

49

* of the entries in the array are given back into the global cache.

50

* The head array is strictly LIFO and should improve the cache hit rates.

50

* The head array is strictly LIFO and should improve the cache hit rates.

51

* On SMP, it additionally reduces the spinlock operations.

51

* On SMP, it additionally reduces the spinlock operations.

52

*

52

*

53

* The c_cpuarray may not be read with enabled local interrupts -

53

* The c_cpuarray may not be read with enabled local interrupts -

54

* it's changed with a smp_call_function().

54

* it's changed with a smp_call_function().

55

*

55

*

56

* SMP synchronization:

56

* SMP synchronization:

57

* constructors and destructors are called without any locking.

57

* constructors and destructors are called without any locking.

58

* Several members in struct kmem_cache and struct slab never change, they

58

* Several members in struct kmem_cache and struct slab never change, they

59

* are accessed without any locking.

59

* are accessed without any locking.

60

* The per-cpu arrays are never accessed from the wrong cpu, no locking,

60

* The per-cpu arrays are never accessed from the wrong cpu, no locking,

61

* and local interrupts are disabled so slab code is preempt-safe.

61

* and local interrupts are disabled so slab code is preempt-safe.

62

* The non-constant members are protected with a per-cache irq spinlock.

62

* The non-constant members are protected with a per-cache irq spinlock.

63

*

63

*

64

* Many thanks to Mark Hemment, who wrote another per-cpu slab patch

64

* Many thanks to Mark Hemment, who wrote another per-cpu slab patch

65

* in 2000 - many ideas in the current implementation are derived from

65

* in 2000 - many ideas in the current implementation are derived from

66

* his patch.

66

* his patch.

67

*

67

*

68

* Further notes from the original documentation:

68

* Further notes from the original documentation:

69

*

69

*

70

* 11 April '97. Started multi-threading - markhe

70

* 11 April '97. Started multi-threading - markhe

71

* The global cache-chain is protected by the mutex 'cache_chain_mutex'.

71

* The global cache-chain is protected by the mutex 'cache_chain_mutex'.

72

* The sem is only needed when accessing/extending the cache-chain, which

72

* The sem is only needed when accessing/extending the cache-chain, which

73

* can never happen inside an interrupt (kmem_cache_create(),

73

* can never happen inside an interrupt (kmem_cache_create(),

74

* kmem_cache_shrink() and kmem_cache_reap()).

74

* kmem_cache_shrink() and kmem_cache_reap()).

75

*

75

*

76

* At present, each engine can be growing a cache. This should be blocked.

76

* At present, each engine can be growing a cache. This should be blocked.

77

*

77

*

78

* 15 March 2005. NUMA slab allocator.

78

* 15 March 2005. NUMA slab allocator.

79

* Shai Fultheim <shai@scalex86.org>.

79

* Shai Fultheim <shai@scalex86.org>.

80

* Shobhit Dayal <shobhit@calsoftinc.com>

80

* Shobhit Dayal <shobhit@calsoftinc.com>

81

* Alok N Kataria <alokk@calsoftinc.com>

81

* Alok N Kataria <alokk@calsoftinc.com>

82

* Christoph Lameter <christoph@lameter.com>

82

* Christoph Lameter <christoph@lameter.com>

83

*

83

*

84

* Modified the slab allocator to be node aware on NUMA systems.

84

* Modified the slab allocator to be node aware on NUMA systems.

85

* Each node has its own list of partial, free and full slabs.

85

* Each node has its own list of partial, free and full slabs.

86

* All object allocations for a node occur from node specific slab lists.

86

* All object allocations for a node occur from node specific slab lists.

87

*/

87

*/

88

89

#include <linux/slab.h>

89

#include <linux/slab.h>

90

#include <linux/mm.h>

90

#include <linux/mm.h>

91

#include <linux/poison.h>

91

#include <linux/poison.h>

92

#include <linux/swap.h>

92

#include <linux/swap.h>

93

#include <linux/cache.h>

93

#include <linux/cache.h>

94

#include <linux/interrupt.h>

94

#include <linux/interrupt.h>

95

#include <linux/init.h>

95

#include <linux/init.h>

96

#include <linux/compiler.h>

96

#include <linux/compiler.h>

97

#include <linux/cpuset.h>

97

#include <linux/cpuset.h>

98

#include <linux/proc_fs.h>

98

#include <linux/proc_fs.h>

99

#include <linux/seq_file.h>

99

#include <linux/seq_file.h>

100

#include <linux/notifier.h>

100

#include <linux/notifier.h>

101

#include <linux/kallsyms.h>

101

#include <linux/kallsyms.h>

102

#include <linux/cpu.h>

102

#include <linux/cpu.h>

103

#include <linux/sysctl.h>

103

#include <linux/sysctl.h>

104

#include <linux/module.h>

104

#include <linux/module.h>

105

#include <linux/kmemtrace.h>

105

#include <linux/kmemtrace.h>

106

#include <linux/rcupdate.h>

106

#include <linux/rcupdate.h>

107

#include <linux/string.h>

107

#include <linux/string.h>

108

#include <linux/uaccess.h>

108

#include <linux/uaccess.h>

109

#include <linux/nodemask.h>

109

#include <linux/nodemask.h>

110

#include <linux/kmemleak.h>

110

#include <linux/kmemleak.h>

111

#include <linux/mempolicy.h>

111

#include <linux/mempolicy.h>

112

#include <linux/mutex.h>

112

#include <linux/mutex.h>

113

#include <linux/fault-inject.h>

113

#include <linux/fault-inject.h>

114

#include <linux/rtmutex.h>

114

#include <linux/rtmutex.h>

115

#include <linux/reciprocal_div.h>

115

#include <linux/reciprocal_div.h>

116

#include <linux/debugobjects.h>

116

#include <linux/debugobjects.h>

117

#include <linux/kmemcheck.h>

117

#include <linux/kmemcheck.h>

118

119

#include <asm/cacheflush.h>

119

#include <asm/cacheflush.h>

120

#include <asm/tlbflush.h>

120

#include <asm/tlbflush.h>

121

#include <asm/page.h>

121

#include <asm/page.h>

122

123

/*

123

/*

124

* DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.

124

* DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.

125

* 0 for faster, smaller code (especially in the critical paths).

125

* 0 for faster, smaller code (especially in the critical paths).

126

*

126

*

127

* STATS - 1 to collect stats for /proc/slabinfo.

127

* STATS - 1 to collect stats for /proc/slabinfo.

128

* 0 for faster, smaller code (especially in the critical paths).

128

* 0 for faster, smaller code (especially in the critical paths).

129

*

129

*

130

* FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)

130

* FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)

131

*/

131

*/

132

133

#ifdef CONFIG_DEBUG_SLAB

133

#ifdef CONFIG_DEBUG_SLAB

134

#define DEBUG 1

134

#define DEBUG 1

135

#define STATS 1

135

#define STATS 1

136

#define FORCED_DEBUG 1

136

#define FORCED_DEBUG 1

137

#else

137

#else

138

#define DEBUG 0

138

#define DEBUG 0

139

#define STATS 0

139

#define STATS 0

140

#define FORCED_DEBUG 0

140

#define FORCED_DEBUG 0

141

#endif

141

#endif

142

143

/* Shouldn't this be in a header file somewhere? */

143

/* Shouldn't this be in a header file somewhere? */

144

#define BYTES_PER_WORD sizeof(void *)

144

#define BYTES_PER_WORD sizeof(void *)

145

#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))

145

#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))

146

147

#ifndef ARCH_KMALLOC_MINALIGN

147

#ifndef ARCH_KMALLOC_MINALIGN

148

/*

148

/*

149

* Enforce a minimum alignment for the kmalloc caches.

149

* Enforce a minimum alignment for the kmalloc caches.

150

* Usually, the kmalloc caches are cache_line_size() aligned, except when

150

* Usually, the kmalloc caches are cache_line_size() aligned, except when

151

* DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.

151

* DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.

152

* Some archs want to perform DMA into kmalloc caches and need a guaranteed

152

* Some archs want to perform DMA into kmalloc caches and need a guaranteed

153

* alignment larger than the alignment of a 64-bit integer.

153

* alignment larger than the alignment of a 64-bit integer.

154

* ARCH_KMALLOC_MINALIGN allows that.

154

* ARCH_KMALLOC_MINALIGN allows that.

155

* Note that increasing this value may disable some debug features.

155

* Note that increasing this value may disable some debug features.

156

*/

156

*/

157

#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)

157

#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)

158

#endif

158

#endif

159

160

#ifndef ARCH_SLAB_MINALIGN

160

#ifndef ARCH_SLAB_MINALIGN

161

/*

161

/*

162

* Enforce a minimum alignment for all caches.

162

* Enforce a minimum alignment for all caches.

163

* Intended for archs that get misalignment faults even for BYTES_PER_WORD

163

* Intended for archs that get misalignment faults even for BYTES_PER_WORD

164

* aligned buffers. Includes ARCH_KMALLOC_MINALIGN.

164

* aligned buffers. Includes ARCH_KMALLOC_MINALIGN.

165

* If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables

165

* If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables

166

* some debug features.

166

* some debug features.

167

*/

167

*/

168

#define ARCH_SLAB_MINALIGN 0

168

#define ARCH_SLAB_MINALIGN 0

169

#endif

169

#endif

170

171

#ifndef ARCH_KMALLOC_FLAGS

171

#ifndef ARCH_KMALLOC_FLAGS

172

#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN

172

#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN

173

#endif

173

#endif

174

175

/* Legal flag mask for kmem_cache_create(). */

175

/* Legal flag mask for kmem_cache_create(). */

176

#if DEBUG

176

#if DEBUG

177

# define CREATE_MASK (SLAB_RED_ZONE | \

177

# define CREATE_MASK (SLAB_RED_ZONE | \

178

SLAB_POISON | SLAB_HWCACHE_ALIGN | \

178

SLAB_POISON | SLAB_HWCACHE_ALIGN | \

179

SLAB_CACHE_DMA | \

179

SLAB_CACHE_DMA | \

180

SLAB_STORE_USER | \

180

SLAB_STORE_USER | \

181

SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \

181

SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \

182

SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \

182

SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \

183

SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)

183

SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)

184

#else

184

#else

185

# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \

185

# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \

186

SLAB_CACHE_DMA | \

186

SLAB_CACHE_DMA | \

187

SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \

187

SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \

188

SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \

188

SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \

189

SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)

189

SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)

190

#endif

190

#endif

191

192

/*

192

/*

193

* kmem_bufctl_t:

193

* kmem_bufctl_t:

194

*

194

*

195

* Bufctl's are used for linking objs within a slab

195

* Bufctl's are used for linking objs within a slab

196

* linked offsets.

196

* linked offsets.

197

*

197

*

198

* This implementation relies on "struct page" for locating the cache &

198

* This implementation relies on "struct page" for locating the cache &

199

* slab an object belongs to.

199

* slab an object belongs to.

200

* This allows the bufctl structure to be small (one int), but limits

200

* This allows the bufctl structure to be small (one int), but limits

201

* the number of objects a slab (not a cache) can contain when off-slab

201

* the number of objects a slab (not a cache) can contain when off-slab

202

* bufctls are used. The limit is the size of the largest general cache

202

* bufctls are used. The limit is the size of the largest general cache

203

* that does not use off-slab slabs.

203

* that does not use off-slab slabs.

204

* For 32bit archs with 4 kB pages, is this 56.

204

* For 32bit archs with 4 kB pages, is this 56.

205

* This is not serious, as it is only for large objects, when it is unwise

205

* This is not serious, as it is only for large objects, when it is unwise

206

* to have too many per slab.

206

* to have too many per slab.

207

* Note: This limit can be raised by introducing a general cache whose size

207

* Note: This limit can be raised by introducing a general cache whose size

208

* is less than 512 (PAGE_SIZE<<3), but greater than 256.

208

* is less than 512 (PAGE_SIZE<<3), but greater than 256.

209

*/

209

*/

210

211

typedef unsigned int kmem_bufctl_t;

211

typedef unsigned int kmem_bufctl_t;

212

#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)

212

#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)

213

#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)

213

#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)

214

#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)

214

#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)

215

#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)

215

#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)

216

217

/*

217

/*

218

* struct slab

218

* struct slab

219

*

219

*

220

* Manages the objs in a slab. Placed either at the beginning of mem allocated

220

* Manages the objs in a slab. Placed either at the beginning of mem allocated

221

* for a slab, or allocated from an general cache.

221

* for a slab, or allocated from an general cache.

222

* Slabs are chained into three list: fully used, partial, fully free slabs.

222

* Slabs are chained into three list: fully used, partial, fully free slabs.

223

*/

223

*/

224

struct slab {

224

struct slab {

225

struct list_head list;

225

struct list_head list;

226

unsigned long colouroff;

226

unsigned long colouroff;

227

void *s_mem; /* including colour offset */

227

void *s_mem; /* including colour offset */

228

unsigned int inuse; /* num of objs active in slab */

228

unsigned int inuse; /* num of objs active in slab */

229

kmem_bufctl_t free;

229

kmem_bufctl_t free;

230

unsigned short nodeid;

230

unsigned short nodeid;

231

};

231

};

232

233

/*

233

/*

234

* struct slab_rcu

234

* struct slab_rcu

235

*

235

*

236

* slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to

236

* slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to

237

* arrange for kmem_freepages to be called via RCU. This is useful if

237

* arrange for kmem_freepages to be called via RCU. This is useful if

238

* we need to approach a kernel structure obliquely, from its address

238

* we need to approach a kernel structure obliquely, from its address

239

* obtained without the usual locking. We can lock the structure to

239

* obtained without the usual locking. We can lock the structure to

240

* stabilize it and check it's still at the given address, only if we

240

* stabilize it and check it's still at the given address, only if we

241

* can be sure that the memory has not been meanwhile reused for some

241

* can be sure that the memory has not been meanwhile reused for some

242

* other kind of object (which our subsystem's lock might corrupt).

242

* other kind of object (which our subsystem's lock might corrupt).

243

*

243

*

244

* rcu_read_lock before reading the address, then rcu_read_unlock after

244

* rcu_read_lock before reading the address, then rcu_read_unlock after

245

* taking the spinlock within the structure expected at that address.

245

* taking the spinlock within the structure expected at that address.

246

*

246

*

247

* We assume struct slab_rcu can overlay struct slab when destroying.

247

* We assume struct slab_rcu can overlay struct slab when destroying.

248

*/

248

*/

249

struct slab_rcu {

249

struct slab_rcu {

250

struct rcu_head head;

250

struct rcu_head head;

251

struct kmem_cache *cachep;

251

struct kmem_cache *cachep;

252

void *addr;

252

void *addr;

253

};

253

};

254

255

/*

255

/*

256

* struct array_cache

256

* struct array_cache

257

*

257

*

258

* Purpose:

258

* Purpose:

259

* - LIFO ordering, to hand out cache-warm objects from _alloc

259

* - LIFO ordering, to hand out cache-warm objects from _alloc

260

* - reduce the number of linked list operations

260

* - reduce the number of linked list operations

261

* - reduce spinlock operations

261

* - reduce spinlock operations

262

*

262

*

263

* The limit is stored in the per-cpu structure to reduce the data cache

263

* The limit is stored in the per-cpu structure to reduce the data cache

264

* footprint.

264

* footprint.

265

*

265

*

266

*/

266

*/

267

struct array_cache {

267

struct array_cache {

268

unsigned int avail;

268

unsigned int avail;

269

unsigned int limit;

269

unsigned int limit;

270

unsigned int batchcount;

270

unsigned int batchcount;

271

unsigned int touched;

271

unsigned int touched;

272

spinlock_t lock;

272

spinlock_t lock;

273

void *entry[]; /*

273

void *entry[]; /*

274

* Must have this definition in here for the proper

274

* Must have this definition in here for the proper

275

* alignment of array_cache. Also simplifies accessing

275

* alignment of array_cache. Also simplifies accessing

276

* the entries.

276

* the entries.

277

*/

277

*/

278

};

278

};

279

280

/*

280

/*

281

* bootstrap: The caches do not work without cpuarrays anymore, but the

281

* bootstrap: The caches do not work without cpuarrays anymore, but the

282

* cpuarrays are allocated from the generic caches...

282

* cpuarrays are allocated from the generic caches...

283

*/

283

*/

284

#define BOOT_CPUCACHE_ENTRIES 1

284

#define BOOT_CPUCACHE_ENTRIES 1

285

struct arraycache_init {

285

struct arraycache_init {

286

struct array_cache cache;

286

struct array_cache cache;

287

void *entries[BOOT_CPUCACHE_ENTRIES];

287

void *entries[BOOT_CPUCACHE_ENTRIES];

288

};

288

};

289

290

/*

290

/*

291

* The slab lists for all objects.

291

* The slab lists for all objects.

292

*/

292

*/

293

struct kmem_list3 {

293

struct kmem_list3 {

294

struct list_head slabs_partial; /* partial list first, better asm code */

294

struct list_head slabs_partial; /* partial list first, better asm code */

295

struct list_head slabs_full;

295

struct list_head slabs_full;

296

struct list_head slabs_free;

296

struct list_head slabs_free;

297

unsigned long free_objects;

297

unsigned long free_objects;

298

unsigned int free_limit;

298

unsigned int free_limit;

299

unsigned int colour_next; /* Per-node cache coloring */

299

unsigned int colour_next; /* Per-node cache coloring */

300

spinlock_t list_lock;

300

spinlock_t list_lock;

301

struct array_cache *shared; /* shared per node */

301

struct array_cache *shared; /* shared per node */

302

struct array_cache **alien; /* on other nodes */

302

struct array_cache **alien; /* on other nodes */

303

unsigned long next_reap; /* updated without locking */

303

unsigned long next_reap; /* updated without locking */

304

int free_touched; /* updated without locking */

304

int free_touched; /* updated without locking */

305

};

305

};

306

307

/*

307

/*

308

* Need this for bootstrapping a per node allocator.

308

* Need this for bootstrapping a per node allocator.

309

*/

309

*/

310

#define NUM_INIT_LISTS (3 * MAX_NUMNODES)

310

#define NUM_INIT_LISTS (3 * MAX_NUMNODES)

311

struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];

311

struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];

312

#define CACHE_CACHE 0

312

#define CACHE_CACHE 0

313

#define SIZE_AC MAX_NUMNODES

313

#define SIZE_AC MAX_NUMNODES

314

#define SIZE_L3 (2 * MAX_NUMNODES)

314

#define SIZE_L3 (2 * MAX_NUMNODES)

315

316

static int drain_freelist(struct kmem_cache *cache,

316

static int drain_freelist(struct kmem_cache *cache,

317

struct kmem_list3 *l3, int tofree);

317

struct kmem_list3 *l3, int tofree);

318

static void free_block(struct kmem_cache *cachep, void **objpp, int len,

318

static void free_block(struct kmem_cache *cachep, void **objpp, int len,

319

int node);

319

int node);

320

static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);

320

static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);

321

static void cache_reap(struct work_struct *unused);

321

static void cache_reap(struct work_struct *unused);

322

323

/*

323

/*

324

* This function must be completely optimized away if a constant is passed to

324

* This function must be completely optimized away if a constant is passed to

325

* it. Mostly the same as what is in linux/slab.h except it returns an index.

325

* it. Mostly the same as what is in linux/slab.h except it returns an index.

326

*/

326

*/

327

static __always_inline int index_of(const size_t size)

327

static __always_inline int index_of(const size_t size)

328

{

328

{

329

extern void __bad_size(void);

329

extern void __bad_size(void);

330

331

if (__builtin_constant_p(size)) {

331

if (__builtin_constant_p(size)) {

332

int i = 0;

332

int i = 0;

333

334

#define CACHE(x) \

334

#define CACHE(x) \

335

if (size <=x) \

335

if (size <=x) \

336

return i; \

336

return i; \

337

else \

337

else \

338

i++;

338

i++;

339

#include <linux/kmalloc_sizes.h>

339

#include <linux/kmalloc_sizes.h>

340

#undef CACHE

340

#undef CACHE

341

__bad_size();

341

__bad_size();

342

} else

342

} else

343

__bad_size();

343

__bad_size();

344

return 0;

344

return 0;

345

}

345

}

346

347

static int slab_early_init = 1;

347

static int slab_early_init = 1;

348

349

#define INDEX_AC index_of(sizeof(struct arraycache_init))

349

#define INDEX_AC index_of(sizeof(struct arraycache_init))

350

#define INDEX_L3 index_of(sizeof(struct kmem_list3))

350

#define INDEX_L3 index_of(sizeof(struct kmem_list3))

351

352

static void kmem_list3_init(struct kmem_list3 *parent)

352

static void kmem_list3_init(struct kmem_list3 *parent)

353

{

353

{

354

INIT_LIST_HEAD(&parent->slabs_full);

354

INIT_LIST_HEAD(&parent->slabs_full);

355

INIT_LIST_HEAD(&parent->slabs_partial);

355

INIT_LIST_HEAD(&parent->slabs_partial);

356

INIT_LIST_HEAD(&parent->slabs_free);

356

INIT_LIST_HEAD(&parent->slabs_free);

357

parent->shared = NULL;

357

parent->shared = NULL;

358

parent->alien = NULL;

358

parent->alien = NULL;

359

parent->colour_next = 0;

359

parent->colour_next = 0;

360

spin_lock_init(&parent->list_lock);

360

spin_lock_init(&parent->list_lock);

361

parent->free_objects = 0;

361

parent->free_objects = 0;

362

parent->free_touched = 0;

362

parent->free_touched = 0;

363

}

363

}

364

365

#define MAKE_LIST(cachep, listp, slab, nodeid) \

365

#define MAKE_LIST(cachep, listp, slab, nodeid) \

366

do { \

366

do { \

367

INIT_LIST_HEAD(listp); \

367

INIT_LIST_HEAD(listp); \

368

list_splice(&(cachep->nodelists[nodeid]->slab), listp); \

368

list_splice(&(cachep->nodelists[nodeid]->slab), listp); \

369

} while (0)

369

} while (0)

370

371

#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \

371

#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \

372

do { \

372

do { \

373

MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \

373

MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \

374

MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \

374

MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \

375

MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \

375

MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \

376

} while (0)

376

} while (0)

377

378

#define CFLGS_OFF_SLAB (0x80000000UL)

378

#define CFLGS_OFF_SLAB (0x80000000UL)

379

#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)

379

#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)

380

381

#define BATCHREFILL_LIMIT 16

381

#define BATCHREFILL_LIMIT 16

382

/*

382

/*

383

* Optimization question: fewer reaps means less probability for unnessary

383

* Optimization question: fewer reaps means less probability for unnessary

384

* cpucache drain/refill cycles.

384

* cpucache drain/refill cycles.

385

*

385

*

386

* OTOH the cpuarrays can contain lots of objects,

386

* OTOH the cpuarrays can contain lots of objects,

387

* which could lock up otherwise freeable slabs.

387

* which could lock up otherwise freeable slabs.

388

*/

388

*/

389

#define REAPTIMEOUT_CPUC (2*HZ)

389

#define REAPTIMEOUT_CPUC (2*HZ)

390

#define REAPTIMEOUT_LIST3 (4*HZ)

390

#define REAPTIMEOUT_LIST3 (4*HZ)

391

392

#if STATS

392

#if STATS

393

#define STATS_INC_ACTIVE(x) ((x)->num_active++)

393

#define STATS_INC_ACTIVE(x) ((x)->num_active++)

394

#define STATS_DEC_ACTIVE(x) ((x)->num_active--)

394

#define STATS_DEC_ACTIVE(x) ((x)->num_active--)

395

#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)

395

#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)

396

#define STATS_INC_GROWN(x) ((x)->grown++)

396

#define STATS_INC_GROWN(x) ((x)->grown++)

397

#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))

397

#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))

398

#define STATS_SET_HIGH(x) \

398

#define STATS_SET_HIGH(x) \

399

do { \

399

do { \

400

if ((x)->num_active > (x)->high_mark) \

400

if ((x)->num_active > (x)->high_mark) \

401

(x)->high_mark = (x)->num_active; \

401

(x)->high_mark = (x)->num_active; \

402

} while (0)

402

} while (0)

403

#define STATS_INC_ERR(x) ((x)->errors++)

403

#define STATS_INC_ERR(x) ((x)->errors++)

404

#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)

404

#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)

405

#define STATS_INC_NODEFREES(x) ((x)->node_frees++)

405

#define STATS_INC_NODEFREES(x) ((x)->node_frees++)

406

#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)

406

#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)

407

#define STATS_SET_FREEABLE(x, i) \

407

#define STATS_SET_FREEABLE(x, i) \

408

do { \

408

do { \

409

if ((x)->max_freeable < i) \

409

if ((x)->max_freeable < i) \

410

(x)->max_freeable = i; \

410

(x)->max_freeable = i; \

411

} while (0)

411

} while (0)

412

#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)

412

#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)

413

#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)

413

#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)

414

#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)

414

#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)

415

#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)

415

#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)

416

#else

416

#else

417

#define STATS_INC_ACTIVE(x) do { } while (0)

417

#define STATS_INC_ACTIVE(x) do { } while (0)

418

#define STATS_DEC_ACTIVE(x) do { } while (0)

418

#define STATS_DEC_ACTIVE(x) do { } while (0)

419

#define STATS_INC_ALLOCED(x) do { } while (0)

419

#define STATS_INC_ALLOCED(x) do { } while (0)

420

#define STATS_INC_GROWN(x) do { } while (0)

420

#define STATS_INC_GROWN(x) do { } while (0)

421

#define STATS_ADD_REAPED(x,y) do { } while (0)

421

#define STATS_ADD_REAPED(x,y) do { } while (0)

422

#define STATS_SET_HIGH(x) do { } while (0)

422

#define STATS_SET_HIGH(x) do { } while (0)

423

#define STATS_INC_ERR(x) do { } while (0)

423

#define STATS_INC_ERR(x) do { } while (0)

424

#define STATS_INC_NODEALLOCS(x) do { } while (0)

424

#define STATS_INC_NODEALLOCS(x) do { } while (0)

425

#define STATS_INC_NODEFREES(x) do { } while (0)

425

#define STATS_INC_NODEFREES(x) do { } while (0)

426

#define STATS_INC_ACOVERFLOW(x) do { } while (0)

426

#define STATS_INC_ACOVERFLOW(x) do { } while (0)

427

#define STATS_SET_FREEABLE(x, i) do { } while (0)

427

#define STATS_SET_FREEABLE(x, i) do { } while (0)

428

#define STATS_INC_ALLOCHIT(x) do { } while (0)

428

#define STATS_INC_ALLOCHIT(x) do { } while (0)

429

#define STATS_INC_ALLOCMISS(x) do { } while (0)

429

#define STATS_INC_ALLOCMISS(x) do { } while (0)

430

#define STATS_INC_FREEHIT(x) do { } while (0)

430

#define STATS_INC_FREEHIT(x) do { } while (0)

431

#define STATS_INC_FREEMISS(x) do { } while (0)

431

#define STATS_INC_FREEMISS(x) do { } while (0)

432

#endif

432

#endif

433

434

#if DEBUG

434

#if DEBUG

435

436

/*

436

/*

437

* memory layout of objects:

437

* memory layout of objects:

438

* 0 : objp

438

* 0 : objp

439

* 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that

439

* 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that

440

* the end of an object is aligned with the end of the real

440

* the end of an object is aligned with the end of the real

441

* allocation. Catches writes behind the end of the allocation.

441

* allocation. Catches writes behind the end of the allocation.

442

* cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:

442

* cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:

443

* redzone word.

443

* redzone word.

444

* cachep->obj_offset: The real object.

444

* cachep->obj_offset: The real object.

445

* cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]

445

* cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]

446

* cachep->buffer_size - 1* BYTES_PER_WORD: last caller address

446

* cachep->buffer_size - 1* BYTES_PER_WORD: last caller address

447

* [BYTES_PER_WORD long]

447

* [BYTES_PER_WORD long]

448

*/

448

*/

449

static int obj_offset(struct kmem_cache *cachep)

449

static int obj_offset(struct kmem_cache *cachep)

450

{

450

{

451

return cachep->obj_offset;

451

return cachep->obj_offset;

452

}

452

}

453

454

static int obj_size(struct kmem_cache *cachep)

454

static int obj_size(struct kmem_cache *cachep)

455

{

455

{

456

return cachep->obj_size;

456

return cachep->obj_size;

457

}

457

}

458

459

static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)

459

static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)

460

{

460

{

461

BUG_ON(!(cachep->flags & SLAB_RED_ZONE));

461

BUG_ON(!(cachep->flags & SLAB_RED_ZONE));

462

return (unsigned long long*) (objp + obj_offset(cachep) -

462

return (unsigned long long*) (objp + obj_offset(cachep) -

463

sizeof(unsigned long long));

463

sizeof(unsigned long long));

464

}

464

}

465

466

static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)

466

static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)

467

{

467

{

468

BUG_ON(!(cachep->flags & SLAB_RED_ZONE));

468

BUG_ON(!(cachep->flags & SLAB_RED_ZONE));

469

if (cachep->flags & SLAB_STORE_USER)

469

if (cachep->flags & SLAB_STORE_USER)

470

return (unsigned long long *)(objp + cachep->buffer_size -

470

return (unsigned long long *)(objp + cachep->buffer_size -

471

sizeof(unsigned long long) -

471

sizeof(unsigned long long) -

472

REDZONE_ALIGN);

472

REDZONE_ALIGN);

473

return (unsigned long long *) (objp + cachep->buffer_size -

473

return (unsigned long long *) (objp + cachep->buffer_size -

474

sizeof(unsigned long long));

474

sizeof(unsigned long long));

475

}

475

}

476

477

static void **dbg_userword(struct kmem_cache *cachep, void *objp)

477

static void **dbg_userword(struct kmem_cache *cachep, void *objp)

478

{

478

{

479

BUG_ON(!(cachep->flags & SLAB_STORE_USER));

479

BUG_ON(!(cachep->flags & SLAB_STORE_USER));

480

return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);

480

return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);

481

}

481

}

482

483

#else

483

#else

484

485

#define obj_offset(x) 0

485

#define obj_offset(x) 0

486

#define obj_size(cachep) (cachep->buffer_size)

486

#define obj_size(cachep) (cachep->buffer_size)

487

#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})

487

#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})

488

#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})

488

#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})

489

#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})

489

#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})

490

491

#endif

491

#endif

492

493

#ifdef CONFIG_TRACING

493

#ifdef CONFIG_TRACING

494

size_t slab_buffer_size(struct kmem_cache *cachep)

494

size_t slab_buffer_size(struct kmem_cache *cachep)

495

{

495

{

496

return cachep->buffer_size;

496

return cachep->buffer_size;

497

}

497

}

498

EXPORT_SYMBOL(slab_buffer_size);

498

EXPORT_SYMBOL(slab_buffer_size);

499

#endif

499

#endif

500

501

/*

501

/*

502

* Do not go above this order unless 0 objects fit into the slab.

502

* Do not go above this order unless 0 objects fit into the slab.

503

*/

503

*/

504

#define BREAK_GFP_ORDER_HI 1

504

#define BREAK_GFP_ORDER_HI 1

505

#define BREAK_GFP_ORDER_LO 0

505

#define BREAK_GFP_ORDER_LO 0

506

static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;

506

static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;

507

508

/*

508

/*

509

* Functions for storing/retrieving the cachep and or slab from the page

509

* Functions for storing/retrieving the cachep and or slab from the page

510

* allocator. These are used to find the slab an obj belongs to. With kfree(),

510

* allocator. These are used to find the slab an obj belongs to. With kfree(),

511

* these are used to find the cache which an obj belongs to.

511

* these are used to find the cache which an obj belongs to.

512

*/

512

*/

513

static inline void page_set_cache(struct page *page, struct kmem_cache *cache)

513

static inline void page_set_cache(struct page *page, struct kmem_cache *cache)

514

{

514

{

515

page->lru.next = (struct list_head *)cache;

515

page->lru.next = (struct list_head *)cache;

516

}

516

}

517

518

static inline struct kmem_cache *page_get_cache(struct page *page)

518

static inline struct kmem_cache *page_get_cache(struct page *page)

519

{

519

{

520

page = compound_head(page);

520

page = compound_head(page);

521

BUG_ON(!PageSlab(page));

521

BUG_ON(!PageSlab(page));

522

return (struct kmem_cache *)page->lru.next;

522

return (struct kmem_cache *)page->lru.next;

523

}

523

}

524

525

static inline void page_set_slab(struct page *page, struct slab *slab)

525

static inline void page_set_slab(struct page *page, struct slab *slab)

526

{

526

{

527

page->lru.prev = (struct list_head *)slab;

527

page->lru.prev = (struct list_head *)slab;

528

}

528

}

529

530

static inline struct slab *page_get_slab(struct page *page)

530

static inline struct slab *page_get_slab(struct page *page)

531

{

531

{

532

BUG_ON(!PageSlab(page));

532

BUG_ON(!PageSlab(page));

533

return (struct slab *)page->lru.prev;

533

return (struct slab *)page->lru.prev;

534

}

534

}

535

536

static inline struct kmem_cache *virt_to_cache(const void *obj)

536

static inline struct kmem_cache *virt_to_cache(const void *obj)

537

{

537

{

538

struct page *page = virt_to_head_page(obj);

538

struct page *page = virt_to_head_page(obj);

539

return page_get_cache(page);

539

return page_get_cache(page);

540

}

540

}

541

542

static inline struct slab *virt_to_slab(const void *obj)

542

static inline struct slab *virt_to_slab(const void *obj)

543

{

543

{

544

struct page *page = virt_to_head_page(obj);

544

struct page *page = virt_to_head_page(obj);

545

return page_get_slab(page);

545

return page_get_slab(page);

546

}

546

}

547

548

static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,

548

static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,

549

unsigned int idx)

549

unsigned int idx)

550

{

550

{

551

return slab->s_mem + cache->buffer_size * idx;

551

return slab->s_mem + cache->buffer_size * idx;

552

}

552

}

553

554

/*

554

/*

555

* We want to avoid an expensive divide : (offset / cache->buffer_size)

555

* We want to avoid an expensive divide : (offset / cache->buffer_size)

556

* Using the fact that buffer_size is a constant for a particular cache,

556

* Using the fact that buffer_size is a constant for a particular cache,

557

* we can replace (offset / cache->buffer_size) by

557

* we can replace (offset / cache->buffer_size) by

558

* reciprocal_divide(offset, cache->reciprocal_buffer_size)

558

* reciprocal_divide(offset, cache->reciprocal_buffer_size)

559

*/

559

*/

560

static inline unsigned int obj_to_index(const struct kmem_cache *cache,

560

static inline unsigned int obj_to_index(const struct kmem_cache *cache,

561

const struct slab *slab, void *obj)

561

const struct slab *slab, void *obj)

562

{

562

{

563

u32 offset = (obj - slab->s_mem);

563

u32 offset = (obj - slab->s_mem);

564

return reciprocal_divide(offset, cache->reciprocal_buffer_size);

564

return reciprocal_divide(offset, cache->reciprocal_buffer_size);

565

}

565

}

566

567

/*

567

/*

568

* These are the default caches for kmalloc. Custom caches can have other sizes.

568

* These are the default caches for kmalloc. Custom caches can have other sizes.

569

*/

569

*/

570

struct cache_sizes malloc_sizes[] = {

570

struct cache_sizes malloc_sizes[] = {

571

#define CACHE(x) { .cs_size = (x) },

571

#define CACHE(x) { .cs_size = (x) },

572

#include <linux/kmalloc_sizes.h>

572

#include <linux/kmalloc_sizes.h>

573

CACHE(ULONG_MAX)

573

CACHE(ULONG_MAX)

574

#undef CACHE

574

#undef CACHE

575

};

575

};

576

EXPORT_SYMBOL(malloc_sizes);

576

EXPORT_SYMBOL(malloc_sizes);

577

578

/* Must match cache_sizes above. Out of line to keep cache footprint low. */

578

/* Must match cache_sizes above. Out of line to keep cache footprint low. */

579

struct cache_names {

579

struct cache_names {

580

char *name;

580

char *name;

581

char *name_dma;

581

char *name_dma;

582

};

582

};

583

584

static struct cache_names __initdata cache_names[] = {

584

static struct cache_names __initdata cache_names[] = {

585

#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },

585

#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },

586

#include <linux/kmalloc_sizes.h>

586

#include <linux/kmalloc_sizes.h>

587

{NULL,}

587

{NULL,}

588

#undef CACHE

588

#undef CACHE

589

};

589

};

590

591

static struct arraycache_init initarray_cache __initdata =

591

static struct arraycache_init initarray_cache __initdata =

592

{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };

592

{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };

593

static struct arraycache_init initarray_generic =

593

static struct arraycache_init initarray_generic =

594

{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };

594

{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };

595

596

/* internal cache of cache description objs */

596

/* internal cache of cache description objs */

597

static struct kmem_cache cache_cache = {

597

static struct kmem_cache cache_cache = {

598

.batchcount = 1,

598

.batchcount = 1,

599

.limit = BOOT_CPUCACHE_ENTRIES,

599

.limit = BOOT_CPUCACHE_ENTRIES,

600

.shared = 1,

600

.shared = 1,

601

.buffer_size = sizeof(struct kmem_cache),

601

.buffer_size = sizeof(struct kmem_cache),

602

.name = "kmem_cache",

602

.name = "kmem_cache",

603

};

603

};

604

605

#define BAD_ALIEN_MAGIC 0x01020304ul

605

#define BAD_ALIEN_MAGIC 0x01020304ul

606

607

/*

607

/*

608

* chicken and egg problem: delay the per-cpu array allocation

608

* chicken and egg problem: delay the per-cpu array allocation

609

* until the general caches are up.

609

* until the general caches are up.

610

*/

610

*/

611

static enum {

611

static enum {

612

NONE,

612

NONE,

613

PARTIAL_AC,

613

PARTIAL_AC,

614

PARTIAL_L3,

614

PARTIAL_L3,

615

EARLY,

615

EARLY,

616

FULL

616

FULL

617

} g_cpucache_up;

617

} g_cpucache_up;

618

619

/*

619

/*

620

* used by boot code to determine if it can use slab based allocator

620

* used by boot code to determine if it can use slab based allocator

621

*/

621

*/

622

int slab_is_available(void)

622

int slab_is_available(void)

623

{

623

{

624

return g_cpucache_up >= EARLY;

624

return g_cpucache_up >= EARLY;

625

}

625

}

626

627

#ifdef CONFIG_LOCKDEP

627

#ifdef CONFIG_LOCKDEP

628

629

/*

629

/*

630

* Slab sometimes uses the kmalloc slabs to store the slab headers

630

* Slab sometimes uses the kmalloc slabs to store the slab headers

631

* for other slabs "off slab".

631

* for other slabs "off slab".

632

* The locking for this is tricky in that it nests within the locks

632

* The locking for this is tricky in that it nests within the locks

633

* of all other slabs in a few places; to deal with this special

633

* of all other slabs in a few places; to deal with this special

634

* locking we put on-slab caches into a separate lock-class.

634

* locking we put on-slab caches into a separate lock-class.

635

*

635

*

636

* We set lock class for alien array caches which are up during init.

636

* We set lock class for alien array caches which are up during init.

637

* The lock annotation will be lost if all cpus of a node goes down and

637

* The lock annotation will be lost if all cpus of a node goes down and

638

* then comes back up during hotplug

638

* then comes back up during hotplug

639

*/

639

*/

640

static struct lock_class_key on_slab_l3_key;

640

static struct lock_class_key on_slab_l3_key;

641

static struct lock_class_key on_slab_alc_key;

641

static struct lock_class_key on_slab_alc_key;

642

643

static void init_node_lock_keys(int q)

643

static void init_node_lock_keys(int q)

644

{

644

{

645

struct cache_sizes *s = malloc_sizes;

645

struct cache_sizes *s = malloc_sizes;

646

647

if (g_cpucache_up != FULL)

647

if (g_cpucache_up != FULL)

648

return;

648

return;

649

650

for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {

650

for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {

651

struct array_cache **alc;

651

struct array_cache **alc;

652

struct kmem_list3 *l3;

652

struct kmem_list3 *l3;

653

int r;

653

int r;

654

655

l3 = s->cs_cachep->nodelists[q];

655

l3 = s->cs_cachep->nodelists[q];

656

if (!l3 || OFF_SLAB(s->cs_cachep))

656

if (!l3 || OFF_SLAB(s->cs_cachep))

657

return;

657

continue;

658

lockdep_set_class(&l3->list_lock, &on_slab_l3_key);

658

lockdep_set_class(&l3->list_lock, &on_slab_l3_key);

659

alc = l3->alien;

659

alc = l3->alien;

660

/*

660

/*

661

* FIXME: This check for BAD_ALIEN_MAGIC

661

* FIXME: This check for BAD_ALIEN_MAGIC

662

* should go away when common slab code is taught to

662

* should go away when common slab code is taught to

663

* work even without alien caches.

663

* work even without alien caches.

664

* Currently, non NUMA code returns BAD_ALIEN_MAGIC

664

* Currently, non NUMA code returns BAD_ALIEN_MAGIC

665

* for alloc_alien_cache,

665

* for alloc_alien_cache,

666

*/

666

*/

667

if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)

667

if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)

668

return;

668

continue;

669

for_each_node(r) {

669

for_each_node(r) {

670

if (alc[r])

670

if (alc[r])

671

lockdep_set_class(&alc[r]->lock,

671

lockdep_set_class(&alc[r]->lock,

672

&on_slab_alc_key);

672

&on_slab_alc_key);

673

}

673

}

674

}

674

}

675

}

675

}

676

677

static inline void init_lock_keys(void)

677

static inline void init_lock_keys(void)

678

{

678

{

679

int node;

679

int node;

680

681

for_each_node(node)

681

for_each_node(node)

682

init_node_lock_keys(node);

682

init_node_lock_keys(node);

683

}

683

}

684

#else

684

#else

685

static void init_node_lock_keys(int q)

685

static void init_node_lock_keys(int q)

686

{

686

{

687

}

687

}

688

689

static inline void init_lock_keys(void)

689

static inline void init_lock_keys(void)

690

{

690

{

691

}

691

}

692

#endif

692

#endif

693

694

/*

694

/*

695

* Guard access to the cache-chain.

695

* Guard access to the cache-chain.

696

*/

696

*/

697

static DEFINE_MUTEX(cache_chain_mutex);

697

static DEFINE_MUTEX(cache_chain_mutex);

698

static struct list_head cache_chain;

698

static struct list_head cache_chain;

699

700

static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);

700

static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);

701

702

static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)

702

static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)

703

{

703

{

704

return cachep->array[smp_processor_id()];

704

return cachep->array[smp_processor_id()];

705

}

705

}

706

707

static inline struct kmem_cache *__find_general_cachep(size_t size,

707

static inline struct kmem_cache *__find_general_cachep(size_t size,

708

gfp_t gfpflags)

708

gfp_t gfpflags)

709

{

709

{

710

struct cache_sizes *csizep = malloc_sizes;

710

struct cache_sizes *csizep = malloc_sizes;

711

712

#if DEBUG

712

#if DEBUG

713

/* This happens if someone tries to call

713

/* This happens if someone tries to call

714

* kmem_cache_create(), or __kmalloc(), before

714

* kmem_cache_create(), or __kmalloc(), before

715

* the generic caches are initialized.

715

* the generic caches are initialized.

716

*/

716

*/

717

BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);

717

BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);

718

#endif

718

#endif

719

if (!size)

719

if (!size)

720

return ZERO_SIZE_PTR;

720

return ZERO_SIZE_PTR;

721

722

while (size > csizep->cs_size)

722

while (size > csizep->cs_size)

723

csizep++;

723

csizep++;

724

725

/*

725

/*

726

* Really subtle: The last entry with cs->cs_size==ULONG_MAX

726

* Really subtle: The last entry with cs->cs_size==ULONG_MAX

727

* has cs_{dma,}cachep==NULL. Thus no special case

727

* has cs_{dma,}cachep==NULL. Thus no special case

728

* for large kmalloc calls required.

728

* for large kmalloc calls required.

729

*/

729

*/

730

#ifdef CONFIG_ZONE_DMA

730

#ifdef CONFIG_ZONE_DMA

731

if (unlikely(gfpflags & GFP_DMA))

731

if (unlikely(gfpflags & GFP_DMA))

732

return csizep->cs_dmacachep;

732

return csizep->cs_dmacachep;

733

#endif

733

#endif

734

return csizep->cs_cachep;

734

return csizep->cs_cachep;

735

}

735

}

736

737

static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)

737

static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)

738

{

738

{

739

return __find_general_cachep(size, gfpflags);

739

return __find_general_cachep(size, gfpflags);

740

}

740

}

741

742

static size_t slab_mgmt_size(size_t nr_objs, size_t align)

742

static size_t slab_mgmt_size(size_t nr_objs, size_t align)

743

{

743

{

744

return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);

744

return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);

745

}

745

}

746

747

/*

747

/*

748

* Calculate the number of objects and left-over bytes for a given buffer size.

748

* Calculate the number of objects and left-over bytes for a given buffer size.

749

*/

749

*/

750

static void cache_estimate(unsigned long gfporder, size_t buffer_size,

750

static void cache_estimate(unsigned long gfporder, size_t buffer_size,

751

size_t align, int flags, size_t *left_over,

751

size_t align, int flags, size_t *left_over,

752

unsigned int *num)

752

unsigned int *num)

753

{

753

{

754

int nr_objs;

754

int nr_objs;

755

size_t mgmt_size;

755

size_t mgmt_size;

756

size_t slab_size = PAGE_SIZE << gfporder;

756

size_t slab_size = PAGE_SIZE << gfporder;

757

758

/*

758

/*

759

* The slab management structure can be either off the slab or

759

* The slab management structure can be either off the slab or

760

* on it. For the latter case, the memory allocated for a

760

* on it. For the latter case, the memory allocated for a

761

* slab is used for:

761

* slab is used for:

762

*

762

*

763

* - The struct slab

763

* - The struct slab

764

* - One kmem_bufctl_t for each object

764

* - One kmem_bufctl_t for each object

765

* - Padding to respect alignment of @align

765

* - Padding to respect alignment of @align

766

* - @buffer_size bytes for each object

766

* - @buffer_size bytes for each object

767

*

767

*

768

* If the slab management structure is off the slab, then the

768

* If the slab management structure is off the slab, then the

769

* alignment will already be calculated into the size. Because

769

* alignment will already be calculated into the size. Because

770

* the slabs are all pages aligned, the objects will be at the

770

* the slabs are all pages aligned, the objects will be at the

771

* correct alignment when allocated.

771

* correct alignment when allocated.

772

*/

772

*/

773

if (flags & CFLGS_OFF_SLAB) {

773

if (flags & CFLGS_OFF_SLAB) {

774

mgmt_size = 0;

774

mgmt_size = 0;

775

nr_objs = slab_size / buffer_size;

775

nr_objs = slab_size / buffer_size;

776

777

if (nr_objs > SLAB_LIMIT)

777

if (nr_objs > SLAB_LIMIT)

778

nr_objs = SLAB_LIMIT;

778

nr_objs = SLAB_LIMIT;

779

} else {

779

} else {

780

/*

780

/*

781

* Ignore padding for the initial guess. The padding

781

* Ignore padding for the initial guess. The padding

782

* is at most @align-1 bytes, and @buffer_size is at

782

* is at most @align-1 bytes, and @buffer_size is at

783

* least @align. In the worst case, this result will

783

* least @align. In the worst case, this result will

784

* be one greater than the number of objects that fit

784

* be one greater than the number of objects that fit

785

* into the memory allocation when taking the padding

785

* into the memory allocation when taking the padding

786

* into account.

786

* into account.

787

*/

787

*/

788

nr_objs = (slab_size - sizeof(struct slab)) /

788

nr_objs = (slab_size - sizeof(struct slab)) /

789

(buffer_size + sizeof(kmem_bufctl_t));

789

(buffer_size + sizeof(kmem_bufctl_t));

790

791

/*

791

/*

792

* This calculated number will be either the right

792

* This calculated number will be either the right

793

* amount, or one greater than what we want.

793

* amount, or one greater than what we want.

794

*/

794

*/

795

if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size

795

if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size

796

> slab_size)

796

> slab_size)

797

nr_objs--;

797

nr_objs--;

798

799

if (nr_objs > SLAB_LIMIT)

799

if (nr_objs > SLAB_LIMIT)

800

nr_objs = SLAB_LIMIT;

800

nr_objs = SLAB_LIMIT;

801

802

mgmt_size = slab_mgmt_size(nr_objs, align);

802

mgmt_size = slab_mgmt_size(nr_objs, align);

803

}

803

}

804

*num = nr_objs;

804

*num = nr_objs;

805

*left_over = slab_size - nr_objs*buffer_size - mgmt_size;

805

*left_over = slab_size - nr_objs*buffer_size - mgmt_size;

806

}

806

}

807

808

#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)

808

#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)

809

810

static void __slab_error(const char *function, struct kmem_cache *cachep,

810

static void __slab_error(const char *function, struct kmem_cache *cachep,

811

char *msg)

811

char *msg)

812

{

812

{

813

printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",

813

printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",

814

function, cachep->name, msg);

814

function, cachep->name, msg);

815

dump_stack();

815

dump_stack();

816

}

816

}

817

818

/*

818

/*

819

* By default on NUMA we use alien caches to stage the freeing of

819

* By default on NUMA we use alien caches to stage the freeing of

820

* objects allocated from other nodes. This causes massive memory

820

* objects allocated from other nodes. This causes massive memory

821

* inefficiencies when using fake NUMA setup to split memory into a

821

* inefficiencies when using fake NUMA setup to split memory into a

822

* large number of small nodes, so it can be disabled on the command

822

* large number of small nodes, so it can be disabled on the command

823

* line

823

* line

824

*/

824

*/

825

826

static int use_alien_caches __read_mostly = 1;

826

static int use_alien_caches __read_mostly = 1;

827

static int __init noaliencache_setup(char *s)

827

static int __init noaliencache_setup(char *s)

828

{

828

{

829

use_alien_caches = 0;

829

use_alien_caches = 0;

830

return 1;

830

return 1;

831

}

831

}

832

__setup("noaliencache", noaliencache_setup);

832

__setup("noaliencache", noaliencache_setup);

833

834

#ifdef CONFIG_NUMA

834

#ifdef CONFIG_NUMA

835

/*

835

/*

836

* Special reaping functions for NUMA systems called from cache_reap().

836

* Special reaping functions for NUMA systems called from cache_reap().

837

* These take care of doing round robin flushing of alien caches (containing

837

* These take care of doing round robin flushing of alien caches (containing

838

* objects freed on different nodes from which they were allocated) and the

838

* objects freed on different nodes from which they were allocated) and the

839

* flushing of remote pcps by calling drain_node_pages.

839

* flushing of remote pcps by calling drain_node_pages.

840

*/

840

*/

841

static DEFINE_PER_CPU(unsigned long, slab_reap_node);

841

static DEFINE_PER_CPU(unsigned long, slab_reap_node);

842

843

static void init_reap_node(int cpu)

843

static void init_reap_node(int cpu)

844

{

844

{

845

int node;

845

int node;

846

847

node = next_node(cpu_to_node(cpu), node_online_map);

847

node = next_node(cpu_to_node(cpu), node_online_map);

848

if (node == MAX_NUMNODES)

848

if (node == MAX_NUMNODES)

849

node = first_node(node_online_map);

849

node = first_node(node_online_map);

850

851

per_cpu(slab_reap_node, cpu) = node;

851

per_cpu(slab_reap_node, cpu) = node;

852

}

852

}

853

854

static void next_reap_node(void)

854

static void next_reap_node(void)

855

{

855

{

856

int node = __get_cpu_var(slab_reap_node);

856

int node = __get_cpu_var(slab_reap_node);

857

858

node = next_node(node, node_online_map);

858

node = next_node(node, node_online_map);

859

if (unlikely(node >= MAX_NUMNODES))

859

if (unlikely(node >= MAX_NUMNODES))

860

node = first_node(node_online_map);

860

node = first_node(node_online_map);

861

__get_cpu_var(slab_reap_node) = node;

861

__get_cpu_var(slab_reap_node) = node;

862

}

862

}

863

864

#else

864

#else

865

#define init_reap_node(cpu) do { } while (0)

865

#define init_reap_node(cpu) do { } while (0)

866

#define next_reap_node(void) do { } while (0)

866

#define next_reap_node(void) do { } while (0)

867

#endif

867

#endif

868

869

/*

869

/*

870

* Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz

870

* Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz

871

* via the workqueue/eventd.

871

* via the workqueue/eventd.

872

* Add the CPU number into the expiration time to minimize the possibility of

872

* Add the CPU number into the expiration time to minimize the possibility of

873

* the CPUs getting into lockstep and contending for the global cache chain

873

* the CPUs getting into lockstep and contending for the global cache chain

874

* lock.

874

* lock.

875

*/

875

*/

876

static void __cpuinit start_cpu_timer(int cpu)

876

static void __cpuinit start_cpu_timer(int cpu)

877

{

877

{

878

struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);

878

struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);

879

880

/*

880

/*

881

* When this gets called from do_initcalls via cpucache_init(),

881

* When this gets called from do_initcalls via cpucache_init(),

882

* init_workqueues() has already run, so keventd will be setup

882

* init_workqueues() has already run, so keventd will be setup

883

* at that time.

883

* at that time.

884

*/

884

*/

885

if (keventd_up() && reap_work->work.func == NULL) {

885

if (keventd_up() && reap_work->work.func == NULL) {

886

init_reap_node(cpu);

886

init_reap_node(cpu);

887

INIT_DELAYED_WORK(reap_work, cache_reap);

887

INIT_DELAYED_WORK(reap_work, cache_reap);

888

schedule_delayed_work_on(cpu, reap_work,

888

schedule_delayed_work_on(cpu, reap_work,

889

__round_jiffies_relative(HZ, cpu));

889

__round_jiffies_relative(HZ, cpu));

890

}

890

}

891

}

891

}

892

893

static struct array_cache *alloc_arraycache(int node, int entries,

893

static struct array_cache *alloc_arraycache(int node, int entries,

894

int batchcount, gfp_t gfp)

894

int batchcount, gfp_t gfp)

895

{

895

{

896

int memsize = sizeof(void *) * entries + sizeof(struct array_cache);

896

int memsize = sizeof(void *) * entries + sizeof(struct array_cache);

897

struct array_cache *nc = NULL;

897

struct array_cache *nc = NULL;

898

899

nc = kmalloc_node(memsize, gfp, node);

899

nc = kmalloc_node(memsize, gfp, node);

900

/*

900

/*

901

* The array_cache structures contain pointers to free object.

901

* The array_cache structures contain pointers to free object.

902

* However, when such objects are allocated or transfered to another

902

* However, when such objects are allocated or transfered to another

903

* cache the pointers are not cleared and they could be counted as

903

* cache the pointers are not cleared and they could be counted as

904

* valid references during a kmemleak scan. Therefore, kmemleak must

904

* valid references during a kmemleak scan. Therefore, kmemleak must

905

* not scan such objects.

905

* not scan such objects.

906

*/

906

*/

907

kmemleak_no_scan(nc);

907

kmemleak_no_scan(nc);

908

if (nc) {

908

if (nc) {

909

nc->avail = 0;

909

nc->avail = 0;

910

nc->limit = entries;

910

nc->limit = entries;

911

nc->batchcount = batchcount;

911

nc->batchcount = batchcount;

912

nc->touched = 0;

912

nc->touched = 0;

913

spin_lock_init(&nc->lock);

913

spin_lock_init(&nc->lock);

914

}

914

}

915

return nc;

915

return nc;

916

}

916

}

917

918

/*

918

/*

919

* Transfer objects in one arraycache to another.

919

* Transfer objects in one arraycache to another.

920

* Locking must be handled by the caller.

920

* Locking must be handled by the caller.

921

*

921

*

922

* Return the number of entries transferred.

922

* Return the number of entries transferred.

923

*/

923

*/

924

static int transfer_objects(struct array_cache *to,

924

static int transfer_objects(struct array_cache *to,

925

struct array_cache *from, unsigned int max)

925

struct array_cache *from, unsigned int max)

926

{

926

{

927

/* Figure out how many entries to transfer */

927

/* Figure out how many entries to transfer */

928

int nr = min(min(from->avail, max), to->limit - to->avail);

928

int nr = min(min(from->avail, max), to->limit - to->avail);

929

930

if (!nr)

930

if (!nr)

931

return 0;

931

return 0;

932

933

memcpy(to->entry + to->avail, from->entry + from->avail -nr,

933

memcpy(to->entry + to->avail, from->entry + from->avail -nr,

934

sizeof(void *) *nr);

934

sizeof(void *) *nr);

935

936

from->avail -= nr;

936

from->avail -= nr;

937

to->avail += nr;

937

to->avail += nr;

938

to->touched = 1;

938

to->touched = 1;

939

return nr;

939

return nr;

940

}

940

}

941

942

#ifndef CONFIG_NUMA

942

#ifndef CONFIG_NUMA

943

944

#define drain_alien_cache(cachep, alien) do { } while (0)

944

#define drain_alien_cache(cachep, alien) do { } while (0)

945

#define reap_alien(cachep, l3) do { } while (0)

945

#define reap_alien(cachep, l3) do { } while (0)

946

947

static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)

947

static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)

948

{

948

{

949

return (struct array_cache **)BAD_ALIEN_MAGIC;

949

return (struct array_cache **)BAD_ALIEN_MAGIC;

950

}

950

}

951

952

static inline void free_alien_cache(struct array_cache **ac_ptr)

952

static inline void free_alien_cache(struct array_cache **ac_ptr)

953

{

953

{

954

}

954

}

955

956

static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)

956

static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)

957

{

957

{

958

return 0;

958

return 0;

959

}

959

}

960

961

static inline void *alternate_node_alloc(struct kmem_cache *cachep,

961

static inline void *alternate_node_alloc(struct kmem_cache *cachep,

962

gfp_t flags)

962

gfp_t flags)

963

{

963

{

964

return NULL;

964

return NULL;

965

}

965

}

966

967

static inline void *____cache_alloc_node(struct kmem_cache *cachep,

967

static inline void *____cache_alloc_node(struct kmem_cache *cachep,

968

gfp_t flags, int nodeid)

968

gfp_t flags, int nodeid)

969

{

969

{

970

return NULL;

970

return NULL;

971

}

971

}

972

973

#else /* CONFIG_NUMA */

973

#else /* CONFIG_NUMA */

974

975

static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);

975

static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);

976

static void *alternate_node_alloc(struct kmem_cache *, gfp_t);

976

static void *alternate_node_alloc(struct kmem_cache *, gfp_t);

977

978

static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)

978

static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)

979

{

979

{

980

struct array_cache **ac_ptr;

980

struct array_cache **ac_ptr;

981

int memsize = sizeof(void *) * nr_node_ids;

981

int memsize = sizeof(void *) * nr_node_ids;

982

int i;

982

int i;

983

984

if (limit > 1)

984

if (limit > 1)

985

limit = 12;

985

limit = 12;

986

ac_ptr = kmalloc_node(memsize, gfp, node);

986

ac_ptr = kmalloc_node(memsize, gfp, node);

987

if (ac_ptr) {

987

if (ac_ptr) {

988

for_each_node(i) {

988

for_each_node(i) {

989

if (i == node || !node_online(i)) {

989

if (i == node || !node_online(i)) {

990

ac_ptr[i] = NULL;

990

ac_ptr[i] = NULL;

991

continue;

991

continue;

992

}

992

}

993

ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);

993

ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);

994

if (!ac_ptr[i]) {

994

if (!ac_ptr[i]) {

995

for (i--; i >= 0; i--)

995

for (i--; i >= 0; i--)

996

kfree(ac_ptr[i]);

996

kfree(ac_ptr[i]);

997

kfree(ac_ptr);

997

kfree(ac_ptr);

998

return NULL;

998

return NULL;

999

}

999

}

1000

}

1000

}

1001

}

1001

}

1002

return ac_ptr;

1002

return ac_ptr;

1003

}

1003

}

1004

1005

static void free_alien_cache(struct array_cache **ac_ptr)

1005

static void free_alien_cache(struct array_cache **ac_ptr)

1006

{

1006

{

1007

int i;

1007

int i;

1008

1009

if (!ac_ptr)

1009

if (!ac_ptr)

1010

return;

1010

return;

1011

for_each_node(i)

1011

for_each_node(i)

1012

kfree(ac_ptr[i]);

1012

kfree(ac_ptr[i]);

1013

kfree(ac_ptr);

1013

kfree(ac_ptr);

1014

}

1014

}

1015

1016

static void __drain_alien_cache(struct kmem_cache *cachep,

1016

static void __drain_alien_cache(struct kmem_cache *cachep,

1017

struct array_cache *ac, int node)

1017

struct array_cache *ac, int node)

1018

{

1018

{

1019

struct kmem_list3 *rl3 = cachep->nodelists[node];

1019

struct kmem_list3 *rl3 = cachep->nodelists[node];

1020

1021

if (ac->avail) {

1021

if (ac->avail) {

1022

spin_lock(&rl3->list_lock);

1022

spin_lock(&rl3->list_lock);

1023

/*

1023

/*

1024

* Stuff objects into the remote nodes shared array first.

1024

* Stuff objects into the remote nodes shared array first.

1025

* That way we could avoid the overhead of putting the objects

1025

* That way we could avoid the overhead of putting the objects

1026

* into the free lists and getting them back later.

1026

* into the free lists and getting them back later.

1027

*/

1027

*/

1028

if (rl3->shared)

1028

if (rl3->shared)

1029

transfer_objects(rl3->shared, ac, ac->limit);

1029

transfer_objects(rl3->shared, ac, ac->limit);

1030

1031

free_block(cachep, ac->entry, ac->avail, node);

1031

free_block(cachep, ac->entry, ac->avail, node);

1032

ac->avail = 0;

1032

ac->avail = 0;

1033

spin_unlock(&rl3->list_lock);

1033

spin_unlock(&rl3->list_lock);

1034

}

1034

}

1035

}

1035

}

1036

1037

/*

1037

/*

1038

* Called from cache_reap() to regularly drain alien caches round robin.

1038

* Called from cache_reap() to regularly drain alien caches round robin.

1039

*/

1039

*/

1040

static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)

1040

static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)

1041

{

1041

{

1042

int node = __get_cpu_var(slab_reap_node);

1042

int node = __get_cpu_var(slab_reap_node);

1043

1044

if (l3->alien) {

1044

if (l3->alien) {

1045

struct array_cache *ac = l3->alien[node];

1045

struct array_cache *ac = l3->alien[node];

1046

1047

if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {

1047

if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {

1048

__drain_alien_cache(cachep, ac, node);

1048

__drain_alien_cache(cachep, ac, node);

1049

spin_unlock_irq(&ac->lock);

1049

spin_unlock_irq(&ac->lock);

1050

}

1050

}

1051

}

1051

}

1052

}

1052

}

1053

1054

static void drain_alien_cache(struct kmem_cache *cachep,

1054

static void drain_alien_cache(struct kmem_cache *cachep,

1055

struct array_cache **alien)

1055

struct array_cache **alien)

1056

{

1056

{

1057

int i = 0;

1057

int i = 0;

1058

struct array_cache *ac;

1058

struct array_cache *ac;

1059

unsigned long flags;

1059

unsigned long flags;

1060

1061

for_each_online_node(i) {

1061

for_each_online_node(i) {

1062

ac = alien[i];

1062

ac = alien[i];

1063

if (ac) {

1063

if (ac) {

1064

spin_lock_irqsave(&ac->lock, flags);

1064

spin_lock_irqsave(&ac->lock, flags);

1065

__drain_alien_cache(cachep, ac, i);

1065

__drain_alien_cache(cachep, ac, i);

1066

spin_unlock_irqrestore(&ac->lock, flags);

1066

spin_unlock_irqrestore(&ac->lock, flags);

1067

}

1067

}

1068

}

1068

}

1069

}

1069

}

1070

1071

static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)

1071

static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)

1072

{

1072

{

1073

struct slab *slabp = virt_to_slab(objp);

1073

struct slab *slabp = virt_to_slab(objp);

1074

int nodeid = slabp->nodeid;

1074

int nodeid = slabp->nodeid;

1075

struct kmem_list3 *l3;

1075

struct kmem_list3 *l3;

1076

struct array_cache *alien = NULL;

1076

struct array_cache *alien = NULL;

1077

int node;

1077

int node;

1078

1079

node = numa_node_id();

1079

node = numa_node_id();

1080

1081

/*

1081

/*

1082

* Make sure we are not freeing a object from another node to the array

1082

* Make sure we are not freeing a object from another node to the array

1083

* cache on this cpu.

1083

* cache on this cpu.

1084

*/

1084

*/

1085

if (likely(slabp->nodeid == node))

1085

if (likely(slabp->nodeid == node))

1086

return 0;

1086

return 0;

1087

1088

l3 = cachep->nodelists[node];

1088

l3 = cachep->nodelists[node];

1089

STATS_INC_NODEFREES(cachep);

1089

STATS_INC_NODEFREES(cachep);

1090

if (l3->alien && l3->alien[nodeid]) {

1090

if (l3->alien && l3->alien[nodeid]) {

1091

alien = l3->alien[nodeid];

1091

alien = l3->alien[nodeid];

1092

spin_lock(&alien->lock);

1092

spin_lock(&alien->lock);

1093

if (unlikely(alien->avail == alien->limit)) {

1093

if (unlikely(alien->avail == alien->limit)) {

1094

STATS_INC_ACOVERFLOW(cachep);

1094

STATS_INC_ACOVERFLOW(cachep);

1095

__drain_alien_cache(cachep, alien, nodeid);

1095

__drain_alien_cache(cachep, alien, nodeid);

1096

}

1096

}

1097

alien->entry[alien->avail++] = objp;

1097

alien->entry[alien->avail++] = objp;

1098

spin_unlock(&alien->lock);

1098

spin_unlock(&alien->lock);

1099

} else {

1099

} else {

1100

spin_lock(&(cachep->nodelists[nodeid])->list_lock);

1100

spin_lock(&(cachep->nodelists[nodeid])->list_lock);

1101

free_block(cachep, &objp, 1, nodeid);

1101

free_block(cachep, &objp, 1, nodeid);

1102

spin_unlock(&(cachep->nodelists[nodeid])->list_lock);

1102

spin_unlock(&(cachep->nodelists[nodeid])->list_lock);

1103

}

1103

}

1104

return 1;

1104

return 1;

1105

}

1105

}

1106

#endif

1106

#endif

1107

1108

static void __cpuinit cpuup_canceled(long cpu)

1108

static void __cpuinit cpuup_canceled(long cpu)

1109

{

1109

{

1110

struct kmem_cache *cachep;

1110

struct kmem_cache *cachep;

1111

struct kmem_list3 *l3 = NULL;

1111

struct kmem_list3 *l3 = NULL;

1112

int node = cpu_to_node(cpu);

1112

int node = cpu_to_node(cpu);

1113

const struct cpumask *mask = cpumask_of_node(node);

1113

const struct cpumask *mask = cpumask_of_node(node);

1114

1115

list_for_each_entry(cachep, &cache_chain, next) {

1115

list_for_each_entry(cachep, &cache_chain, next) {

1116

struct array_cache *nc;

1116

struct array_cache *nc;

1117

struct array_cache *shared;

1117

struct array_cache *shared;

1118

struct array_cache **alien;

1118

struct array_cache **alien;

1119

1120

/* cpu is dead; no one can alloc from it. */

1120

/* cpu is dead; no one can alloc from it. */

1121

nc = cachep->array[cpu];

1121

nc = cachep->array[cpu];

1122

cachep->array[cpu] = NULL;

1122

cachep->array[cpu] = NULL;

1123

l3 = cachep->nodelists[node];

1123

l3 = cachep->nodelists[node];

1124

1125

if (!l3)

1125

if (!l3)

1126

goto free_array_cache;

1126

goto free_array_cache;

1127

1128

spin_lock_irq(&l3->list_lock);

1128

spin_lock_irq(&l3->list_lock);

1129

1130

/* Free limit for this kmem_list3 */

1130

/* Free limit for this kmem_list3 */

1131

l3->free_limit -= cachep->batchcount;

1131

l3->free_limit -= cachep->batchcount;

1132

if (nc)

1132

if (nc)

1133

free_block(cachep, nc->entry, nc->avail, node);

1133

free_block(cachep, nc->entry, nc->avail, node);

1134

1135

if (!cpumask_empty(mask)) {

1135

if (!cpumask_empty(mask)) {

1136

spin_unlock_irq(&l3->list_lock);

1136

spin_unlock_irq(&l3->list_lock);

1137

goto free_array_cache;

1137

goto free_array_cache;

1138

}

1138

}

1139

1140

shared = l3->shared;

1140

shared = l3->shared;

1141

if (shared) {

1141

if (shared) {

1142

free_block(cachep, shared->entry,

1142

free_block(cachep, shared->entry,

1143

shared->avail, node);

1143

shared->avail, node);

1144

l3->shared = NULL;

1144

l3->shared = NULL;

1145

}

1145

}

1146

1147

alien = l3->alien;

1147

alien = l3->alien;

1148

l3->alien = NULL;

1148

l3->alien = NULL;

1149

1150

spin_unlock_irq(&l3->list_lock);

1150

spin_unlock_irq(&l3->list_lock);

1151

1152

kfree(shared);

1152

kfree(shared);

1153

if (alien) {

1153

if (alien) {

1154

drain_alien_cache(cachep, alien);

1154

drain_alien_cache(cachep, alien);

1155

free_alien_cache(alien);

1155

free_alien_cache(alien);

1156

}

1156

}

1157

free_array_cache:

1157

free_array_cache:

1158

kfree(nc);

1158

kfree(nc);

1159

}

1159

}

1160

/*

1160

/*

1161

* In the previous loop, all the objects were freed to

1161

* In the previous loop, all the objects were freed to

1162

* the respective cache's slabs, now we can go ahead and

1162

* the respective cache's slabs, now we can go ahead and

1163

* shrink each nodelist to its limit.

1163

* shrink each nodelist to its limit.

1164

*/

1164

*/

1165

list_for_each_entry(cachep, &cache_chain, next) {

1165

list_for_each_entry(cachep, &cache_chain, next) {

1166

l3 = cachep->nodelists[node];

1166

l3 = cachep->nodelists[node];

1167

if (!l3)

1167

if (!l3)

1168

continue;

1168

continue;

1169

drain_freelist(cachep, l3, l3->free_objects);

1169

drain_freelist(cachep, l3, l3->free_objects);

1170

}

1170

}

1171

}

1171

}

1172

1173

static int __cpuinit cpuup_prepare(long cpu)

1173

static int __cpuinit cpuup_prepare(long cpu)

1174

{

1174

{

1175

struct kmem_cache *cachep;

1175

struct kmem_cache *cachep;

1176

struct kmem_list3 *l3 = NULL;

1176

struct kmem_list3 *l3 = NULL;

1177

int node = cpu_to_node(cpu);

1177

int node = cpu_to_node(cpu);

1178

const int memsize = sizeof(struct kmem_list3);

1178

const int memsize = sizeof(struct kmem_list3);

1179

1180

/*

1180

/*

1181

* We need to do this right in the beginning since

1181

* We need to do this right in the beginning since

1182

* alloc_arraycache's are going to use this list.

1182

* alloc_arraycache's are going to use this list.

1183

* kmalloc_node allows us to add the slab to the right

1183

* kmalloc_node allows us to add the slab to the right

1184

* kmem_list3 and not this cpu's kmem_list3

1184

* kmem_list3 and not this cpu's kmem_list3

1185

*/

1185

*/

1186

1187

list_for_each_entry(cachep, &cache_chain, next) {

1187

list_for_each_entry(cachep, &cache_chain, next) {

1188

/*

1188

/*

1189

* Set up the size64 kmemlist for cpu before we can

1189

* Set up the size64 kmemlist for cpu before we can

1190

* begin anything. Make sure some other cpu on this

1190

* begin anything. Make sure some other cpu on this

1191

* node has not already allocated this

1191

* node has not already allocated this

1192

*/

1192

*/

1193

if (!cachep->nodelists[node]) {

1193

if (!cachep->nodelists[node]) {

1194

l3 = kmalloc_node(memsize, GFP_KERNEL, node);

1194

l3 = kmalloc_node(memsize, GFP_KERNEL, node);

1195

if (!l3)

1195

if (!l3)

1196

goto bad;

1196

goto bad;

1197

kmem_list3_init(l3);

1197

kmem_list3_init(l3);

1198

l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +

1198

l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +

1199

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

1199

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

1200

1201

/*

1201

/*

1202

* The l3s don't come and go as CPUs come and

1202

* The l3s don't come and go as CPUs come and

1203

* go. cache_chain_mutex is sufficient

1203

* go. cache_chain_mutex is sufficient

1204

* protection here.

1204

* protection here.

1205

*/

1205

*/

1206

cachep->nodelists[node] = l3;

1206

cachep->nodelists[node] = l3;

1207

}

1207

}

1208

1209

spin_lock_irq(&cachep->nodelists[node]->list_lock);

1209

spin_lock_irq(&cachep->nodelists[node]->list_lock);

1210

cachep->nodelists[node]->free_limit =

1210

cachep->nodelists[node]->free_limit =

1211

(1 + nr_cpus_node(node)) *

1211

(1 + nr_cpus_node(node)) *

1212

cachep->batchcount + cachep->num;

1212

cachep->batchcount + cachep->num;

1213

spin_unlock_irq(&cachep->nodelists[node]->list_lock);

1213

spin_unlock_irq(&cachep->nodelists[node]->list_lock);

1214

}

1214

}

1215

1216

/*

1216

/*

1217

* Now we can go ahead with allocating the shared arrays and

1217

* Now we can go ahead with allocating the shared arrays and

1218

* array caches

1218

* array caches

1219

*/

1219

*/

1220

list_for_each_entry(cachep, &cache_chain, next) {

1220

list_for_each_entry(cachep, &cache_chain, next) {

1221

struct array_cache *nc;

1221

struct array_cache *nc;

1222

struct array_cache *shared = NULL;

1222

struct array_cache *shared = NULL;

1223

struct array_cache **alien = NULL;

1223

struct array_cache **alien = NULL;

1224

1225

nc = alloc_arraycache(node, cachep->limit,

1225

nc = alloc_arraycache(node, cachep->limit,

1226

cachep->batchcount, GFP_KERNEL);

1226

cachep->batchcount, GFP_KERNEL);

1227

if (!nc)

1227

if (!nc)

1228

goto bad;

1228

goto bad;

1229

if (cachep->shared) {

1229

if (cachep->shared) {

1230

shared = alloc_arraycache(node,

1230

shared = alloc_arraycache(node,

1231

cachep->shared * cachep->batchcount,

1231

cachep->shared * cachep->batchcount,

1232

0xbaadf00d, GFP_KERNEL);

1232

0xbaadf00d, GFP_KERNEL);

1233

if (!shared) {

1233

if (!shared) {

1234

kfree(nc);

1234

kfree(nc);

1235

goto bad;

1235

goto bad;

1236

}

1236

}

1237

}

1237

}

1238

if (use_alien_caches) {

1238

if (use_alien_caches) {

1239

alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);

1239

alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);

1240

if (!alien) {

1240

if (!alien) {

1241

kfree(shared);

1241

kfree(shared);

1242

kfree(nc);

1242

kfree(nc);

1243

goto bad;

1243

goto bad;

1244

}

1244

}

1245

}

1245

}

1246

cachep->array[cpu] = nc;

1246

cachep->array[cpu] = nc;

1247

l3 = cachep->nodelists[node];

1247

l3 = cachep->nodelists[node];

1248

BUG_ON(!l3);

1248

BUG_ON(!l3);

1249

1250

spin_lock_irq(&l3->list_lock);

1250

spin_lock_irq(&l3->list_lock);

1251

if (!l3->shared) {

1251

if (!l3->shared) {

1252

/*

1252

/*

1253

* We are serialised from CPU_DEAD or

1253

* We are serialised from CPU_DEAD or

1254

* CPU_UP_CANCELLED by the cpucontrol lock

1254

* CPU_UP_CANCELLED by the cpucontrol lock

1255

*/

1255

*/

1256

l3->shared = shared;

1256

l3->shared = shared;

1257

shared = NULL;

1257

shared = NULL;

1258

}

1258

}

1259

#ifdef CONFIG_NUMA

1259

#ifdef CONFIG_NUMA

1260

if (!l3->alien) {

1260

if (!l3->alien) {

1261

l3->alien = alien;

1261

l3->alien = alien;

1262

alien = NULL;

1262

alien = NULL;

1263

}

1263

}

1264

#endif

1264

#endif

1265

spin_unlock_irq(&l3->list_lock);

1265

spin_unlock_irq(&l3->list_lock);

1266

kfree(shared);

1266

kfree(shared);

1267

free_alien_cache(alien);

1267

free_alien_cache(alien);

1268

}

1268

}

1269

init_node_lock_keys(node);

1269

init_node_lock_keys(node);

1270

1271

return 0;

1271

return 0;

1272

bad:

1272

bad:

1273

cpuup_canceled(cpu);

1273

cpuup_canceled(cpu);

1274

return -ENOMEM;

1274

return -ENOMEM;

1275

}

1275

}

1276

1277

static int __cpuinit cpuup_callback(struct notifier_block *nfb,

1277

static int __cpuinit cpuup_callback(struct notifier_block *nfb,

1278

unsigned long action, void *hcpu)

1278

unsigned long action, void *hcpu)

1279

{

1279

{

1280

long cpu = (long)hcpu;

1280

long cpu = (long)hcpu;

1281

int err = 0;

1281

int err = 0;

1282

1283

switch (action) {

1283

switch (action) {

1284

case CPU_UP_PREPARE:

1284

case CPU_UP_PREPARE:

1285

case CPU_UP_PREPARE_FROZEN:

1285

case CPU_UP_PREPARE_FROZEN:

1286

mutex_lock(&cache_chain_mutex);

1286

mutex_lock(&cache_chain_mutex);

1287

err = cpuup_prepare(cpu);

1287

err = cpuup_prepare(cpu);

1288

mutex_unlock(&cache_chain_mutex);

1288

mutex_unlock(&cache_chain_mutex);

1289

break;

1289

break;

1290

case CPU_ONLINE:

1290

case CPU_ONLINE:

1291

case CPU_ONLINE_FROZEN:

1291

case CPU_ONLINE_FROZEN:

1292

start_cpu_timer(cpu);

1292

start_cpu_timer(cpu);

1293

break;

1293

break;

1294

#ifdef CONFIG_HOTPLUG_CPU

1294

#ifdef CONFIG_HOTPLUG_CPU

1295

case CPU_DOWN_PREPARE:

1295

case CPU_DOWN_PREPARE:

1296

case CPU_DOWN_PREPARE_FROZEN:

1296

case CPU_DOWN_PREPARE_FROZEN:

1297

/*

1297

/*

1298

* Shutdown cache reaper. Note that the cache_chain_mutex is

1298

* Shutdown cache reaper. Note that the cache_chain_mutex is

1299

* held so that if cache_reap() is invoked it cannot do

1299

* held so that if cache_reap() is invoked it cannot do

1300

* anything expensive but will only modify reap_work

1300

* anything expensive but will only modify reap_work

1301

* and reschedule the timer.

1301

* and reschedule the timer.

1302

*/

1302

*/

1303

cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));

1303

cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));

1304

/* Now the cache_reaper is guaranteed to be not running. */

1304

/* Now the cache_reaper is guaranteed to be not running. */

1305

per_cpu(slab_reap_work, cpu).work.func = NULL;

1305

per_cpu(slab_reap_work, cpu).work.func = NULL;

1306

break;

1306

break;

1307

case CPU_DOWN_FAILED:

1307

case CPU_DOWN_FAILED:

1308

case CPU_DOWN_FAILED_FROZEN:

1308

case CPU_DOWN_FAILED_FROZEN:

1309

start_cpu_timer(cpu);

1309

start_cpu_timer(cpu);

1310

break;

1310

break;

1311

case CPU_DEAD:

1311

case CPU_DEAD:

1312

case CPU_DEAD_FROZEN:

1312

case CPU_DEAD_FROZEN:

1313

/*

1313

/*

1314

* Even if all the cpus of a node are down, we don't free the

1314

* Even if all the cpus of a node are down, we don't free the

1315

* kmem_list3 of any cache. This to avoid a race between

1315

* kmem_list3 of any cache. This to avoid a race between

1316

* cpu_down, and a kmalloc allocation from another cpu for

1316

* cpu_down, and a kmalloc allocation from another cpu for

1317

* memory from the node of the cpu going down. The list3

1317

* memory from the node of the cpu going down. The list3

1318

* structure is usually allocated from kmem_cache_create() and

1318

* structure is usually allocated from kmem_cache_create() and

1319

* gets destroyed at kmem_cache_destroy().

1319

* gets destroyed at kmem_cache_destroy().

1320

*/

1320

*/

1321

/* fall through */

1321

/* fall through */

1322

#endif

1322

#endif

1323

case CPU_UP_CANCELED:

1323

case CPU_UP_CANCELED:

1324

case CPU_UP_CANCELED_FROZEN:

1324

case CPU_UP_CANCELED_FROZEN:

1325

mutex_lock(&cache_chain_mutex);

1325

mutex_lock(&cache_chain_mutex);

1326

cpuup_canceled(cpu);

1326

cpuup_canceled(cpu);

1327

mutex_unlock(&cache_chain_mutex);

1327

mutex_unlock(&cache_chain_mutex);

1328

break;

1328

break;

1329

}

1329

}

1330

return err ? NOTIFY_BAD : NOTIFY_OK;

1330

return err ? NOTIFY_BAD : NOTIFY_OK;

1331

}

1331

}

1332

1333

static struct notifier_block __cpuinitdata cpucache_notifier = {

1333

static struct notifier_block __cpuinitdata cpucache_notifier = {

1334

&cpuup_callback, NULL, 0

1334

&cpuup_callback, NULL, 0

1335

};

1335

};

1336

1337

/*

1337

/*

1338

* swap the static kmem_list3 with kmalloced memory

1338

* swap the static kmem_list3 with kmalloced memory

1339

*/

1339

*/

1340

static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,

1340

static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,

1341

int nodeid)

1341

int nodeid)

1342

{

1342

{

1343

struct kmem_list3 *ptr;

1343

struct kmem_list3 *ptr;

1344

1345

ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);

1345

ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);

1346

BUG_ON(!ptr);

1346

BUG_ON(!ptr);

1347

1348

memcpy(ptr, list, sizeof(struct kmem_list3));

1348

memcpy(ptr, list, sizeof(struct kmem_list3));

1349

/*

1349

/*

1350

* Do not assume that spinlocks can be initialized via memcpy:

1350

* Do not assume that spinlocks can be initialized via memcpy:

1351

*/

1351

*/

1352

spin_lock_init(&ptr->list_lock);

1352

spin_lock_init(&ptr->list_lock);

1353

1354

MAKE_ALL_LISTS(cachep, ptr, nodeid);

1354

MAKE_ALL_LISTS(cachep, ptr, nodeid);

1355

cachep->nodelists[nodeid] = ptr;

1355

cachep->nodelists[nodeid] = ptr;

1356

}

1356

}

1357

1358

/*

1358

/*

1359

* For setting up all the kmem_list3s for cache whose buffer_size is same as

1359

* For setting up all the kmem_list3s for cache whose buffer_size is same as

1360

* size of kmem_list3.

1360

* size of kmem_list3.

1361

*/

1361

*/

1362

static void __init set_up_list3s(struct kmem_cache *cachep, int index)

1362

static void __init set_up_list3s(struct kmem_cache *cachep, int index)

1363

{

1363

{

1364

int node;

1364

int node;

1365

1366

for_each_online_node(node) {

1366

for_each_online_node(node) {

1367

cachep->nodelists[node] = &initkmem_list3[index + node];

1367

cachep->nodelists[node] = &initkmem_list3[index + node];

1368

cachep->nodelists[node]->next_reap = jiffies +

1368

cachep->nodelists[node]->next_reap = jiffies +

1369

REAPTIMEOUT_LIST3 +

1369

REAPTIMEOUT_LIST3 +

1370

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

1370

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

1371

}

1371

}

1372

}

1372

}

1373

1374

/*

1374

/*

1375

* Initialisation. Called after the page allocator have been initialised and

1375

* Initialisation. Called after the page allocator have been initialised and

1376

* before smp_init().

1376

* before smp_init().

1377

*/

1377

*/

1378

void __init kmem_cache_init(void)

1378

void __init kmem_cache_init(void)

1379

{

1379

{

1380

size_t left_over;

1380

size_t left_over;

1381

struct cache_sizes *sizes;

1381

struct cache_sizes *sizes;

1382

struct cache_names *names;

1382

struct cache_names *names;

1383

int i;

1383

int i;

1384

int order;

1384

int order;

1385

int node;

1385

int node;

1386

1387

if (num_possible_nodes() == 1)

1387

if (num_possible_nodes() == 1)

1388

use_alien_caches = 0;

1388

use_alien_caches = 0;

1389

1390

for (i = 0; i < NUM_INIT_LISTS; i++) {

1390

for (i = 0; i < NUM_INIT_LISTS; i++) {

1391

kmem_list3_init(&initkmem_list3[i]);

1391

kmem_list3_init(&initkmem_list3[i]);

1392

if (i < MAX_NUMNODES)

1392

if (i < MAX_NUMNODES)

1393

cache_cache.nodelists[i] = NULL;

1393

cache_cache.nodelists[i] = NULL;

1394

}

1394

}

1395

set_up_list3s(&cache_cache, CACHE_CACHE);

1395

set_up_list3s(&cache_cache, CACHE_CACHE);

1396

1397

/*

1397

/*

1398

* Fragmentation resistance on low memory - only use bigger

1398

* Fragmentation resistance on low memory - only use bigger

1399

* page orders on machines with more than 32MB of memory.

1399

* page orders on machines with more than 32MB of memory.

1400

*/

1400

*/

1401

if (totalram_pages > (32 << 20) >> PAGE_SHIFT)

1401

if (totalram_pages > (32 << 20) >> PAGE_SHIFT)

1402

slab_break_gfp_order = BREAK_GFP_ORDER_HI;

1402

slab_break_gfp_order = BREAK_GFP_ORDER_HI;

1403

1404

/* Bootstrap is tricky, because several objects are allocated

1404

/* Bootstrap is tricky, because several objects are allocated

1405

* from caches that do not exist yet:

1405

* from caches that do not exist yet:

1406

* 1) initialize the cache_cache cache: it contains the struct

1406

* 1) initialize the cache_cache cache: it contains the struct

1407

* kmem_cache structures of all caches, except cache_cache itself:

1407

* kmem_cache structures of all caches, except cache_cache itself:

1408

* cache_cache is statically allocated.

1408

* cache_cache is statically allocated.

1409

* Initially an __init data area is used for the head array and the

1409

* Initially an __init data area is used for the head array and the

1410

* kmem_list3 structures, it's replaced with a kmalloc allocated

1410

* kmem_list3 structures, it's replaced with a kmalloc allocated

1411

* array at the end of the bootstrap.

1411

* array at the end of the bootstrap.

1412

* 2) Create the first kmalloc cache.

1412

* 2) Create the first kmalloc cache.

1413

* The struct kmem_cache for the new cache is allocated normally.

1413

* The struct kmem_cache for the new cache is allocated normally.

1414

* An __init data area is used for the head array.

1414

* An __init data area is used for the head array.

1415

* 3) Create the remaining kmalloc caches, with minimally sized

1415

* 3) Create the remaining kmalloc caches, with minimally sized

1416

* head arrays.

1416

* head arrays.

1417

* 4) Replace the __init data head arrays for cache_cache and the first

1417

* 4) Replace the __init data head arrays for cache_cache and the first

1418

* kmalloc cache with kmalloc allocated arrays.

1418

* kmalloc cache with kmalloc allocated arrays.

1419

* 5) Replace the __init data for kmem_list3 for cache_cache and

1419

* 5) Replace the __init data for kmem_list3 for cache_cache and

1420

* the other cache's with kmalloc allocated memory.

1420

* the other cache's with kmalloc allocated memory.

1421

* 6) Resize the head arrays of the kmalloc caches to their final sizes.

1421

* 6) Resize the head arrays of the kmalloc caches to their final sizes.

1422

*/

1422

*/

1423

1424

node = numa_node_id();

1424

node = numa_node_id();

1425

1426

/* 1) create the cache_cache */

1426

/* 1) create the cache_cache */

1427

INIT_LIST_HEAD(&cache_chain);

1427

INIT_LIST_HEAD(&cache_chain);

1428

list_add(&cache_cache.next, &cache_chain);

1428

list_add(&cache_cache.next, &cache_chain);

1429

cache_cache.colour_off = cache_line_size();

1429

cache_cache.colour_off = cache_line_size();

1430

cache_cache.array[smp_processor_id()] = &initarray_cache.cache;

1430

cache_cache.array[smp_processor_id()] = &initarray_cache.cache;

1431

cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];

1431

cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];

1432

1433

/*

1433

/*

1434

* struct kmem_cache size depends on nr_node_ids, which

1434

* struct kmem_cache size depends on nr_node_ids, which

1435

* can be less than MAX_NUMNODES.

1435

* can be less than MAX_NUMNODES.

1436

*/

1436

*/

1437

cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +

1437

cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +

1438

nr_node_ids * sizeof(struct kmem_list3 *);

1438

nr_node_ids * sizeof(struct kmem_list3 *);

1439

#if DEBUG

1439

#if DEBUG

1440

cache_cache.obj_size = cache_cache.buffer_size;

1440

cache_cache.obj_size = cache_cache.buffer_size;

1441

#endif

1441

#endif

1442

cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,

1442

cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,

1443

cache_line_size());

1443

cache_line_size());

1444

cache_cache.reciprocal_buffer_size =

1444

cache_cache.reciprocal_buffer_size =

1445

reciprocal_value(cache_cache.buffer_size);

1445

reciprocal_value(cache_cache.buffer_size);

1446

1447

for (order = 0; order < MAX_ORDER; order++) {

1447

for (order = 0; order < MAX_ORDER; order++) {

1448

cache_estimate(order, cache_cache.buffer_size,

1448

cache_estimate(order, cache_cache.buffer_size,

1449

cache_line_size(), 0, &left_over, &cache_cache.num);

1449

cache_line_size(), 0, &left_over, &cache_cache.num);

1450

if (cache_cache.num)

1450

if (cache_cache.num)

1451

break;

1451

break;

1452

}

1452

}

1453

BUG_ON(!cache_cache.num);

1453

BUG_ON(!cache_cache.num);

1454

cache_cache.gfporder = order;

1454

cache_cache.gfporder = order;

1455

cache_cache.colour = left_over / cache_cache.colour_off;

1455

cache_cache.colour = left_over / cache_cache.colour_off;

1456

cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +

1456

cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +

1457

sizeof(struct slab), cache_line_size());

1457

sizeof(struct slab), cache_line_size());

1458

1459

/* 2+3) create the kmalloc caches */

1459

/* 2+3) create the kmalloc caches */

1460

sizes = malloc_sizes;

1460

sizes = malloc_sizes;

1461

names = cache_names;

1461

names = cache_names;

1462

1463

/*

1463

/*

1464

* Initialize the caches that provide memory for the array cache and the

1464

* Initialize the caches that provide memory for the array cache and the

1465

* kmem_list3 structures first. Without this, further allocations will

1465

* kmem_list3 structures first. Without this, further allocations will

1466

* bug.

1466

* bug.

1467

*/

1467

*/

1468

1469

sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,

1469

sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,

1470

sizes[INDEX_AC].cs_size,

1470

sizes[INDEX_AC].cs_size,

1471

ARCH_KMALLOC_MINALIGN,

1471

ARCH_KMALLOC_MINALIGN,

1472

ARCH_KMALLOC_FLAGS|SLAB_PANIC,

1472

ARCH_KMALLOC_FLAGS|SLAB_PANIC,

1473

NULL);

1473

NULL);

1474

1475

if (INDEX_AC != INDEX_L3) {

1475

if (INDEX_AC != INDEX_L3) {

1476

sizes[INDEX_L3].cs_cachep =

1476

sizes[INDEX_L3].cs_cachep =

1477

kmem_cache_create(names[INDEX_L3].name,

1477

kmem_cache_create(names[INDEX_L3].name,

1478

sizes[INDEX_L3].cs_size,

1478

sizes[INDEX_L3].cs_size,

1479

ARCH_KMALLOC_MINALIGN,

1479

ARCH_KMALLOC_MINALIGN,

1480

ARCH_KMALLOC_FLAGS|SLAB_PANIC,

1480

ARCH_KMALLOC_FLAGS|SLAB_PANIC,

1481

NULL);

1481

NULL);

1482

}

1482

}

1483

1484

slab_early_init = 0;

1484

slab_early_init = 0;

1485

1486

while (sizes->cs_size != ULONG_MAX) {

1486

while (sizes->cs_size != ULONG_MAX) {

1487

/*

1487

/*

1488

* For performance, all the general caches are L1 aligned.

1488

* For performance, all the general caches are L1 aligned.

1489

* This should be particularly beneficial on SMP boxes, as it

1489

* This should be particularly beneficial on SMP boxes, as it

1490

* eliminates "false sharing".

1490

* eliminates "false sharing".

1491

* Note for systems short on memory removing the alignment will

1491

* Note for systems short on memory removing the alignment will

1492

* allow tighter packing of the smaller caches.

1492

* allow tighter packing of the smaller caches.

1493

*/

1493

*/

1494

if (!sizes->cs_cachep) {

1494

if (!sizes->cs_cachep) {

1495

sizes->cs_cachep = kmem_cache_create(names->name,

1495

sizes->cs_cachep = kmem_cache_create(names->name,

1496

sizes->cs_size,

1496

sizes->cs_size,

1497

ARCH_KMALLOC_MINALIGN,

1497

ARCH_KMALLOC_MINALIGN,

1498

ARCH_KMALLOC_FLAGS|SLAB_PANIC,

1498

ARCH_KMALLOC_FLAGS|SLAB_PANIC,

1499

NULL);

1499

NULL);

1500

}

1500

}

1501

#ifdef CONFIG_ZONE_DMA

1501

#ifdef CONFIG_ZONE_DMA

1502

sizes->cs_dmacachep = kmem_cache_create(

1502

sizes->cs_dmacachep = kmem_cache_create(

1503

names->name_dma,

1503

names->name_dma,

1504

sizes->cs_size,

1504

sizes->cs_size,

1505

ARCH_KMALLOC_MINALIGN,

1505

ARCH_KMALLOC_MINALIGN,

1506

ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|

1506

ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|

1507

SLAB_PANIC,

1507

SLAB_PANIC,

1508

NULL);

1508

NULL);

1509

#endif

1509

#endif

1510

sizes++;

1510

sizes++;

1511

names++;

1511

names++;

1512

}

1512

}

1513

/* 4) Replace the bootstrap head arrays */

1513

/* 4) Replace the bootstrap head arrays */

1514

{

1514

{

1515

struct array_cache *ptr;

1515

struct array_cache *ptr;

1516

1517

ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

1517

ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

1518

1519

BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);

1519

BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);

1520

memcpy(ptr, cpu_cache_get(&cache_cache),

1520

memcpy(ptr, cpu_cache_get(&cache_cache),

1521

sizeof(struct arraycache_init));

1521

sizeof(struct arraycache_init));

1522

/*

1522

/*

1523

* Do not assume that spinlocks can be initialized via memcpy:

1523

* Do not assume that spinlocks can be initialized via memcpy:

1524

*/

1524

*/

1525

spin_lock_init(&ptr->lock);

1525

spin_lock_init(&ptr->lock);

1526

1527

cache_cache.array[smp_processor_id()] = ptr;

1527

cache_cache.array[smp_processor_id()] = ptr;

1528

1529

ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

1529

ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

1530

1531

BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)

1531

BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)

1532

!= &initarray_generic.cache);

1532

!= &initarray_generic.cache);

1533

memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),

1533

memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),

1534

sizeof(struct arraycache_init));

1534

sizeof(struct arraycache_init));

1535

/*

1535

/*

1536

* Do not assume that spinlocks can be initialized via memcpy:

1536

* Do not assume that spinlocks can be initialized via memcpy:

1537

*/

1537

*/

1538

spin_lock_init(&ptr->lock);

1538

spin_lock_init(&ptr->lock);

1539

1540

malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =

1540

malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =

1541

ptr;

1541

ptr;

1542

}

1542

}

1543

/* 5) Replace the bootstrap kmem_list3's */

1543

/* 5) Replace the bootstrap kmem_list3's */

1544

{

1544

{

1545

int nid;

1545

int nid;

1546

1547

for_each_online_node(nid) {

1547

for_each_online_node(nid) {

1548

init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);

1548

init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);

1549

1550

init_list(malloc_sizes[INDEX_AC].cs_cachep,

1550

init_list(malloc_sizes[INDEX_AC].cs_cachep,

1551

&initkmem_list3[SIZE_AC + nid], nid);

1551

&initkmem_list3[SIZE_AC + nid], nid);

1552

1553

if (INDEX_AC != INDEX_L3) {

1553

if (INDEX_AC != INDEX_L3) {

1554

init_list(malloc_sizes[INDEX_L3].cs_cachep,

1554

init_list(malloc_sizes[INDEX_L3].cs_cachep,

1555

&initkmem_list3[SIZE_L3 + nid], nid);

1555

&initkmem_list3[SIZE_L3 + nid], nid);

1556

}

1556

}

1557

}

1557

}

1558

}

1558

}

1559

1560

g_cpucache_up = EARLY;

1560

g_cpucache_up = EARLY;

1561

}

1561

}

1562

1563

void __init kmem_cache_init_late(void)

1563

void __init kmem_cache_init_late(void)

1564

{

1564

{

1565

struct kmem_cache *cachep;

1565

struct kmem_cache *cachep;

1566

1567

/* 6) resize the head arrays to their final sizes */

1567

/* 6) resize the head arrays to their final sizes */

1568

mutex_lock(&cache_chain_mutex);

1568

mutex_lock(&cache_chain_mutex);

1569

list_for_each_entry(cachep, &cache_chain, next)

1569

list_for_each_entry(cachep, &cache_chain, next)

1570

if (enable_cpucache(cachep, GFP_NOWAIT))

1570

if (enable_cpucache(cachep, GFP_NOWAIT))

1571

BUG();

1571

BUG();

1572

mutex_unlock(&cache_chain_mutex);

1572

mutex_unlock(&cache_chain_mutex);

1573

1574

/* Done! */

1574

/* Done! */

1575

g_cpucache_up = FULL;

1575

g_cpucache_up = FULL;

1576

1577

/* Annotate slab for lockdep -- annotate the malloc caches */

1577

/* Annotate slab for lockdep -- annotate the malloc caches */

1578

init_lock_keys();

1578

init_lock_keys();

1579

1580

/*

1580

/*

1581

* Register a cpu startup notifier callback that initializes

1581

* Register a cpu startup notifier callback that initializes

1582

* cpu_cache_get for all new cpus

1582

* cpu_cache_get for all new cpus

1583

*/

1583

*/

1584

register_cpu_notifier(&cpucache_notifier);

1584

register_cpu_notifier(&cpucache_notifier);

1585

1586

/*

1586

/*

1587

* The reap timers are started later, with a module init call: That part

1587

* The reap timers are started later, with a module init call: That part

1588

* of the kernel is not yet operational.

1588

* of the kernel is not yet operational.

1589

*/

1589

*/

1590

}

1590

}

1591

1592

static int __init cpucache_init(void)

1592

static int __init cpucache_init(void)

1593

{

1593

{

1594

int cpu;

1594

int cpu;

1595

1596

/*

1596

/*

1597

* Register the timers that return unneeded pages to the page allocator

1597

* Register the timers that return unneeded pages to the page allocator

1598

*/

1598

*/

1599

for_each_online_cpu(cpu)

1599

for_each_online_cpu(cpu)

1600

start_cpu_timer(cpu);

1600

start_cpu_timer(cpu);

1601

return 0;

1601

return 0;

1602

}

1602

}

1603

__initcall(cpucache_init);

1603

__initcall(cpucache_init);

1604

1605

/*

1605

/*

1606

* Interface to system's page allocator. No need to hold the cache-lock.

1606

* Interface to system's page allocator. No need to hold the cache-lock.

1607

*

1607

*

1608

* If we requested dmaable memory, we will get it. Even if we

1608

* If we requested dmaable memory, we will get it. Even if we

1609

* did not request dmaable memory, we might get it, but that

1609

* did not request dmaable memory, we might get it, but that

1610

* would be relatively rare and ignorable.

1610

* would be relatively rare and ignorable.

1611

*/

1611

*/

1612

static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)

1612

static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)

1613

{

1613

{

1614

struct page *page;

1614

struct page *page;

1615

int nr_pages;

1615

int nr_pages;

1616

int i;

1616

int i;

1617

1618

#ifndef CONFIG_MMU

1618

#ifndef CONFIG_MMU

1619

/*

1619

/*

1620

* Nommu uses slab's for process anonymous memory allocations, and thus

1620

* Nommu uses slab's for process anonymous memory allocations, and thus

1621

* requires __GFP_COMP to properly refcount higher order allocations

1621

* requires __GFP_COMP to properly refcount higher order allocations

1622

*/

1622

*/

1623

flags |= __GFP_COMP;

1623

flags |= __GFP_COMP;

1624

#endif

1624

#endif

1625

1626

flags |= cachep->gfpflags;

1626

flags |= cachep->gfpflags;

1627

if (cachep->flags & SLAB_RECLAIM_ACCOUNT)

1627

if (cachep->flags & SLAB_RECLAIM_ACCOUNT)

1628

flags |= __GFP_RECLAIMABLE;

1628

flags |= __GFP_RECLAIMABLE;

1629

1630

page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);

1630

page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);

1631

if (!page)

1631

if (!page)

1632

return NULL;

1632

return NULL;

1633

1634

nr_pages = (1 << cachep->gfporder);

1634

nr_pages = (1 << cachep->gfporder);

1635

if (cachep->flags & SLAB_RECLAIM_ACCOUNT)

1635

if (cachep->flags & SLAB_RECLAIM_ACCOUNT)

1636

add_zone_page_state(page_zone(page),

1636

add_zone_page_state(page_zone(page),

1637

NR_SLAB_RECLAIMABLE, nr_pages);

1637

NR_SLAB_RECLAIMABLE, nr_pages);

1638

else

1638

else

1639

add_zone_page_state(page_zone(page),

1639

add_zone_page_state(page_zone(page),

1640

NR_SLAB_UNRECLAIMABLE, nr_pages);

1640

NR_SLAB_UNRECLAIMABLE, nr_pages);

1641

for (i = 0; i < nr_pages; i++)

1641

for (i = 0; i < nr_pages; i++)

1642

__SetPageSlab(page + i);

1642

__SetPageSlab(page + i);

1643

1644

if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {

1644

if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {

1645

kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);

1645

kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);

1646

1647

if (cachep->ctor)

1647

if (cachep->ctor)

1648

kmemcheck_mark_uninitialized_pages(page, nr_pages);

1648

kmemcheck_mark_uninitialized_pages(page, nr_pages);

1649

else

1649

else

1650

kmemcheck_mark_unallocated_pages(page, nr_pages);

1650

kmemcheck_mark_unallocated_pages(page, nr_pages);

1651

}

1651

}

1652

1653

return page_address(page);

1653

return page_address(page);

1654

}

1654

}

1655

1656

/*

1656

/*

1657

* Interface to system's page release.

1657

* Interface to system's page release.

1658

*/

1658

*/

1659

static void kmem_freepages(struct kmem_cache *cachep, void *addr)

1659

static void kmem_freepages(struct kmem_cache *cachep, void *addr)

1660

{

1660

{

1661

unsigned long i = (1 << cachep->gfporder);

1661

unsigned long i = (1 << cachep->gfporder);

1662

struct page *page = virt_to_page(addr);

1662

struct page *page = virt_to_page(addr);

1663

const unsigned long nr_freed = i;

1663

const unsigned long nr_freed = i;

1664

1665

kmemcheck_free_shadow(page, cachep->gfporder);

1665

kmemcheck_free_shadow(page, cachep->gfporder);

1666

1667

if (cachep->flags & SLAB_RECLAIM_ACCOUNT)

1667

if (cachep->flags & SLAB_RECLAIM_ACCOUNT)

1668

sub_zone_page_state(page_zone(page),

1668

sub_zone_page_state(page_zone(page),

1669

NR_SLAB_RECLAIMABLE, nr_freed);

1669

NR_SLAB_RECLAIMABLE, nr_freed);

1670

else

1670

else

1671

sub_zone_page_state(page_zone(page),

1671

sub_zone_page_state(page_zone(page),

1672

NR_SLAB_UNRECLAIMABLE, nr_freed);

1672

NR_SLAB_UNRECLAIMABLE, nr_freed);

1673

while (i--) {

1673

while (i--) {

1674

BUG_ON(!PageSlab(page));

1674

BUG_ON(!PageSlab(page));

1675

__ClearPageSlab(page);

1675

__ClearPageSlab(page);

1676

page++;

1676

page++;

1677

}

1677

}

1678

if (current->reclaim_state)

1678

if (current->reclaim_state)

1679

current->reclaim_state->reclaimed_slab += nr_freed;

1679

current->reclaim_state->reclaimed_slab += nr_freed;

1680

free_pages((unsigned long)addr, cachep->gfporder);

1680

free_pages((unsigned long)addr, cachep->gfporder);

1681

}

1681

}

1682

1683

static void kmem_rcu_free(struct rcu_head *head)

1683

static void kmem_rcu_free(struct rcu_head *head)

1684

{

1684

{

1685

struct slab_rcu *slab_rcu = (struct slab_rcu *)head;

1685

struct slab_rcu *slab_rcu = (struct slab_rcu *)head;

1686

struct kmem_cache *cachep = slab_rcu->cachep;

1686

struct kmem_cache *cachep = slab_rcu->cachep;

1687

1688

kmem_freepages(cachep, slab_rcu->addr);

1688

kmem_freepages(cachep, slab_rcu->addr);

1689

if (OFF_SLAB(cachep))

1689

if (OFF_SLAB(cachep))

1690

kmem_cache_free(cachep->slabp_cache, slab_rcu);

1690

kmem_cache_free(cachep->slabp_cache, slab_rcu);

1691

}

1691

}

1692

1693

#if DEBUG

1693

#if DEBUG

1694

1695

#ifdef CONFIG_DEBUG_PAGEALLOC

1695

#ifdef CONFIG_DEBUG_PAGEALLOC

1696

static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,

1696

static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,

1697

unsigned long caller)

1697

unsigned long caller)

1698

{

1698

{

1699

int size = obj_size(cachep);

1699

int size = obj_size(cachep);

1700

1701

addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];

1701

addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];

1702

1703

if (size < 5 * sizeof(unsigned long))

1703

if (size < 5 * sizeof(unsigned long))

1704

return;

1704

return;

1705

1706

*addr++ = 0x12345678;

1706

*addr++ = 0x12345678;

1707

*addr++ = caller;

1707

*addr++ = caller;

1708

*addr++ = smp_processor_id();

1708

*addr++ = smp_processor_id();

1709

size -= 3 * sizeof(unsigned long);

1709

size -= 3 * sizeof(unsigned long);

1710

{

1710

{

1711

unsigned long *sptr = &caller;

1711

unsigned long *sptr = &caller;

1712

unsigned long svalue;

1712

unsigned long svalue;

1713

1714

while (!kstack_end(sptr)) {

1714

while (!kstack_end(sptr)) {

1715

svalue = *sptr++;

1715

svalue = *sptr++;

1716

if (kernel_text_address(svalue)) {

1716

if (kernel_text_address(svalue)) {

1717

*addr++ = svalue;

1717

*addr++ = svalue;

1718

size -= sizeof(unsigned long);

1718

size -= sizeof(unsigned long);

1719

if (size <= sizeof(unsigned long))

1719

if (size <= sizeof(unsigned long))

1720

break;

1720

break;

1721

}

1721

}

1722

}

1722

}

1723

1724

}

1724

}

1725

*addr++ = 0x87654321;

1725

*addr++ = 0x87654321;

1726

}

1726

}

1727

#endif

1727

#endif

1728

1729

static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)

1729

static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)

1730

{

1730

{

1731

int size = obj_size(cachep);

1731

int size = obj_size(cachep);

1732

addr = &((char *)addr)[obj_offset(cachep)];

1732

addr = &((char *)addr)[obj_offset(cachep)];

1733

1734

memset(addr, val, size);

1734

memset(addr, val, size);

1735

*(unsigned char *)(addr + size - 1) = POISON_END;

1735

*(unsigned char *)(addr + size - 1) = POISON_END;

1736

}

1736

}

1737

1738

static void dump_line(char *data, int offset, int limit)

1738

static void dump_line(char *data, int offset, int limit)

1739

{

1739

{

1740

int i;

1740

int i;

1741

unsigned char error = 0;

1741

unsigned char error = 0;

1742

int bad_count = 0;

1742

int bad_count = 0;

1743

1744

printk(KERN_ERR "%03x:", offset);

1744

printk(KERN_ERR "%03x:", offset);

1745

for (i = 0; i < limit; i++) {

1745

for (i = 0; i < limit; i++) {

1746

if (data[offset + i] != POISON_FREE) {

1746

if (data[offset + i] != POISON_FREE) {

1747

error = data[offset + i];

1747

error = data[offset + i];

1748

bad_count++;

1748

bad_count++;

1749

}

1749

}

1750

printk(" %02x", (unsigned char)data[offset + i]);

1750

printk(" %02x", (unsigned char)data[offset + i]);

1751

}

1751

}

1752

printk("\n");

1752

printk("\n");

1753

1754

if (bad_count == 1) {

1754

if (bad_count == 1) {

1755

error ^= POISON_FREE;

1755

error ^= POISON_FREE;

1756

if (!(error & (error - 1))) {

1756

if (!(error & (error - 1))) {

1757

printk(KERN_ERR "Single bit error detected. Probably "

1757

printk(KERN_ERR "Single bit error detected. Probably "

1758

"bad RAM.\n");

1758

"bad RAM.\n");

1759

#ifdef CONFIG_X86

1759

#ifdef CONFIG_X86

1760

printk(KERN_ERR "Run memtest86+ or a similar memory "

1760

printk(KERN_ERR "Run memtest86+ or a similar memory "

1761

"test tool.\n");

1761

"test tool.\n");

1762

#else

1762

#else

1763

printk(KERN_ERR "Run a memory test tool.\n");

1763

printk(KERN_ERR "Run a memory test tool.\n");

1764

#endif

1764

#endif

1765

}

1765

}

1766

}

1766

}

1767

}

1767

}

1768

#endif

1768

#endif

1769

1770

#if DEBUG

1770

#if DEBUG

1771

1772

static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)

1772

static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)

1773

{

1773

{

1774

int i, size;

1774

int i, size;

1775

char *realobj;

1775

char *realobj;

1776

1777

if (cachep->flags & SLAB_RED_ZONE) {

1777

if (cachep->flags & SLAB_RED_ZONE) {

1778

printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",

1778

printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",

1779

*dbg_redzone1(cachep, objp),

1779

*dbg_redzone1(cachep, objp),

1780

*dbg_redzone2(cachep, objp));

1780

*dbg_redzone2(cachep, objp));

1781

}

1781

}

1782

1783

if (cachep->flags & SLAB_STORE_USER) {

1783

if (cachep->flags & SLAB_STORE_USER) {

1784

printk(KERN_ERR "Last user: [<%p>]",

1784

printk(KERN_ERR "Last user: [<%p>]",

1785

*dbg_userword(cachep, objp));

1785

*dbg_userword(cachep, objp));

1786

print_symbol("(%s)",

1786

print_symbol("(%s)",

1787

(unsigned long)*dbg_userword(cachep, objp));

1787

(unsigned long)*dbg_userword(cachep, objp));

1788

printk("\n");

1788

printk("\n");

1789

}

1789

}

1790

realobj = (char *)objp + obj_offset(cachep);

1790

realobj = (char *)objp + obj_offset(cachep);

1791

size = obj_size(cachep);

1791

size = obj_size(cachep);

1792

for (i = 0; i < size && lines; i += 16, lines--) {

1792

for (i = 0; i < size && lines; i += 16, lines--) {

1793

int limit;

1793

int limit;

1794

limit = 16;

1794

limit = 16;

1795

if (i + limit > size)

1795

if (i + limit > size)

1796

limit = size - i;

1796

limit = size - i;

1797

dump_line(realobj, i, limit);

1797

dump_line(realobj, i, limit);

1798

}

1798

}

1799

}

1799

}

1800

1801

static void check_poison_obj(struct kmem_cache *cachep, void *objp)

1801

static void check_poison_obj(struct kmem_cache *cachep, void *objp)

1802

{

1802

{

1803

char *realobj;

1803

char *realobj;

1804

int size, i;

1804

int size, i;

1805

int lines = 0;

1805

int lines = 0;

1806

1807

realobj = (char *)objp + obj_offset(cachep);

1807

realobj = (char *)objp + obj_offset(cachep);

1808

size = obj_size(cachep);

1808

size = obj_size(cachep);

1809

1810

for (i = 0; i < size; i++) {

1810

for (i = 0; i < size; i++) {

1811

char exp = POISON_FREE;

1811

char exp = POISON_FREE;

1812

if (i == size - 1)

1812

if (i == size - 1)

1813

exp = POISON_END;

1813

exp = POISON_END;

1814

if (realobj[i] != exp) {

1814

if (realobj[i] != exp) {

1815

int limit;

1815

int limit;

1816

/* Mismatch ! */

1816

/* Mismatch ! */

1817

/* Print header */

1817

/* Print header */

1818

if (lines == 0) {

1818

if (lines == 0) {

1819

printk(KERN_ERR

1819

printk(KERN_ERR

1820

"Slab corruption: %s start=%p, len=%d\n",

1820

"Slab corruption: %s start=%p, len=%d\n",

1821

cachep->name, realobj, size);

1821

cachep->name, realobj, size);

1822

print_objinfo(cachep, objp, 0);

1822

print_objinfo(cachep, objp, 0);

1823

}

1823

}

1824

/* Hexdump the affected line */

1824

/* Hexdump the affected line */

1825

i = (i / 16) * 16;

1825

i = (i / 16) * 16;

1826

limit = 16;

1826

limit = 16;

1827

if (i + limit > size)

1827

if (i + limit > size)

1828

limit = size - i;

1828

limit = size - i;

1829

dump_line(realobj, i, limit);

1829

dump_line(realobj, i, limit);

1830

i += 16;

1830

i += 16;

1831

lines++;

1831

lines++;

1832

/* Limit to 5 lines */

1832

/* Limit to 5 lines */

1833

if (lines > 5)

1833

if (lines > 5)

1834

break;

1834

break;

1835

}

1835

}

1836

}

1836

}

1837

if (lines != 0) {

1837

if (lines != 0) {

1838

/* Print some data about the neighboring objects, if they

1838

/* Print some data about the neighboring objects, if they

1839

* exist:

1839

* exist:

1840

*/

1840

*/

1841

struct slab *slabp = virt_to_slab(objp);

1841

struct slab *slabp = virt_to_slab(objp);

1842

unsigned int objnr;

1842

unsigned int objnr;

1843

1844

objnr = obj_to_index(cachep, slabp, objp);

1844

objnr = obj_to_index(cachep, slabp, objp);

1845

if (objnr) {

1845

if (objnr) {

1846

objp = index_to_obj(cachep, slabp, objnr - 1);

1846

objp = index_to_obj(cachep, slabp, objnr - 1);

1847

realobj = (char *)objp + obj_offset(cachep);

1847

realobj = (char *)objp + obj_offset(cachep);

1848

printk(KERN_ERR "Prev obj: start=%p, len=%d\n",

1848

printk(KERN_ERR "Prev obj: start=%p, len=%d\n",

1849

realobj, size);

1849

realobj, size);

1850

print_objinfo(cachep, objp, 2);

1850

print_objinfo(cachep, objp, 2);

1851

}

1851

}

1852

if (objnr + 1 < cachep->num) {

1852

if (objnr + 1 < cachep->num) {

1853

objp = index_to_obj(cachep, slabp, objnr + 1);

1853

objp = index_to_obj(cachep, slabp, objnr + 1);

1854

realobj = (char *)objp + obj_offset(cachep);

1854

realobj = (char *)objp + obj_offset(cachep);

1855

printk(KERN_ERR "Next obj: start=%p, len=%d\n",

1855

printk(KERN_ERR "Next obj: start=%p, len=%d\n",

1856

realobj, size);

1856

realobj, size);

1857

print_objinfo(cachep, objp, 2);

1857

print_objinfo(cachep, objp, 2);

1858

}

1858

}

1859

}

1859

}

1860

}

1860

}

1861

#endif

1861

#endif

1862

1863

#if DEBUG

1863

#if DEBUG

1864

static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)

1864

static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)

1865

{

1865

{

1866

int i;

1866

int i;

1867

for (i = 0; i < cachep->num; i++) {

1867

for (i = 0; i < cachep->num; i++) {

1868

void *objp = index_to_obj(cachep, slabp, i);

1868

void *objp = index_to_obj(cachep, slabp, i);

1869

1870

if (cachep->flags & SLAB_POISON) {

1870

if (cachep->flags & SLAB_POISON) {

1871

#ifdef CONFIG_DEBUG_PAGEALLOC

1871

#ifdef CONFIG_DEBUG_PAGEALLOC

1872

if (cachep->buffer_size % PAGE_SIZE == 0 &&

1872

if (cachep->buffer_size % PAGE_SIZE == 0 &&

1873

OFF_SLAB(cachep))

1873

OFF_SLAB(cachep))

1874

kernel_map_pages(virt_to_page(objp),

1874

kernel_map_pages(virt_to_page(objp),

1875

cachep->buffer_size / PAGE_SIZE, 1);

1875

cachep->buffer_size / PAGE_SIZE, 1);

1876

else

1876

else

1877

check_poison_obj(cachep, objp);

1877

check_poison_obj(cachep, objp);

1878

#else

1878

#else

1879

check_poison_obj(cachep, objp);

1879

check_poison_obj(cachep, objp);

1880

#endif

1880

#endif

1881

}

1881

}

1882

if (cachep->flags & SLAB_RED_ZONE) {

1882

if (cachep->flags & SLAB_RED_ZONE) {

1883

if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)

1883

if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)

1884

slab_error(cachep, "start of a freed object "

1884

slab_error(cachep, "start of a freed object "

1885

"was overwritten");

1885

"was overwritten");

1886

if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)

1886

if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)

1887

slab_error(cachep, "end of a freed object "

1887

slab_error(cachep, "end of a freed object "

1888

"was overwritten");

1888

"was overwritten");

1889

}

1889

}

1890

}

1890

}

1891

}

1891

}

1892

#else

1892

#else

1893

static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)

1893

static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)

1894

{

1894

{

1895

}

1895

}

1896

#endif

1896

#endif

1897

1898

/**

1898

/**

1899

* slab_destroy - destroy and release all objects in a slab

1899

* slab_destroy - destroy and release all objects in a slab

1900

* @cachep: cache pointer being destroyed

1900

* @cachep: cache pointer being destroyed

1901

* @slabp: slab pointer being destroyed

1901

* @slabp: slab pointer being destroyed

1902

*

1902

*

1903

* Destroy all the objs in a slab, and release the mem back to the system.

1903

* Destroy all the objs in a slab, and release the mem back to the system.

1904

* Before calling the slab must have been unlinked from the cache. The

1904

* Before calling the slab must have been unlinked from the cache. The

1905

* cache-lock is not held/needed.

1905

* cache-lock is not held/needed.

1906

*/

1906

*/

1907

static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)

1907

static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)

1908

{

1908

{

1909

void *addr = slabp->s_mem - slabp->colouroff;

1909

void *addr = slabp->s_mem - slabp->colouroff;

1910

1911

slab_destroy_debugcheck(cachep, slabp);

1911

slab_destroy_debugcheck(cachep, slabp);

1912

if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {

1912

if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {

1913

struct slab_rcu *slab_rcu;

1913

struct slab_rcu *slab_rcu;

1914

1915

slab_rcu = (struct slab_rcu *)slabp;

1915

slab_rcu = (struct slab_rcu *)slabp;

1916

slab_rcu->cachep = cachep;

1916

slab_rcu->cachep = cachep;

1917

slab_rcu->addr = addr;

1917

slab_rcu->addr = addr;

1918

call_rcu(&slab_rcu->head, kmem_rcu_free);

1918

call_rcu(&slab_rcu->head, kmem_rcu_free);

1919

} else {

1919

} else {

1920

kmem_freepages(cachep, addr);

1920

kmem_freepages(cachep, addr);

1921

if (OFF_SLAB(cachep))

1921

if (OFF_SLAB(cachep))

1922

kmem_cache_free(cachep->slabp_cache, slabp);

1922

kmem_cache_free(cachep->slabp_cache, slabp);

1923

}

1923

}

1924

}

1924

}

1925

1926

static void __kmem_cache_destroy(struct kmem_cache *cachep)

1926

static void __kmem_cache_destroy(struct kmem_cache *cachep)

1927

{

1927

{

1928

int i;

1928

int i;

1929

struct kmem_list3 *l3;

1929

struct kmem_list3 *l3;

1930

1931

for_each_online_cpu(i)

1931

for_each_online_cpu(i)

1932

kfree(cachep->array[i]);

1932

kfree(cachep->array[i]);

1933

1934

/* NUMA: free the list3 structures */

1934

/* NUMA: free the list3 structures */

1935

for_each_online_node(i) {

1935

for_each_online_node(i) {

1936

l3 = cachep->nodelists[i];

1936

l3 = cachep->nodelists[i];

1937

if (l3) {

1937

if (l3) {

1938

kfree(l3->shared);

1938

kfree(l3->shared);

1939

free_alien_cache(l3->alien);

1939

free_alien_cache(l3->alien);

1940

kfree(l3);

1940

kfree(l3);

1941

}

1941

}

1942

}

1942

}

1943

kmem_cache_free(&cache_cache, cachep);

1943

kmem_cache_free(&cache_cache, cachep);

1944

}

1944

}

1945

1946

1947

/**

1947

/**

1948

* calculate_slab_order - calculate size (page order) of slabs

1948

* calculate_slab_order - calculate size (page order) of slabs

1949

* @cachep: pointer to the cache that is being created

1949

* @cachep: pointer to the cache that is being created

1950

* @size: size of objects to be created in this cache.

1950

* @size: size of objects to be created in this cache.

1951

* @align: required alignment for the objects.

1951

* @align: required alignment for the objects.

1952

* @flags: slab allocation flags

1952

* @flags: slab allocation flags

1953

*

1953

*

1954

* Also calculates the number of objects per slab.

1954

* Also calculates the number of objects per slab.

1955

*

1955

*

1956

* This could be made much more intelligent. For now, try to avoid using

1956

* This could be made much more intelligent. For now, try to avoid using

1957

* high order pages for slabs. When the gfp() functions are more friendly

1957

* high order pages for slabs. When the gfp() functions are more friendly

1958

* towards high-order requests, this should be changed.

1958

* towards high-order requests, this should be changed.

1959

*/

1959

*/

1960

static size_t calculate_slab_order(struct kmem_cache *cachep,

1960

static size_t calculate_slab_order(struct kmem_cache *cachep,

1961

size_t size, size_t align, unsigned long flags)

1961

size_t size, size_t align, unsigned long flags)

1962

{

1962

{

1963

unsigned long offslab_limit;

1963

unsigned long offslab_limit;

1964

size_t left_over = 0;

1964

size_t left_over = 0;

1965

int gfporder;

1965

int gfporder;

1966

1967

for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {

1967

for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {

1968

unsigned int num;

1968

unsigned int num;

1969

size_t remainder;

1969

size_t remainder;

1970

1971

cache_estimate(gfporder, size, align, flags, &remainder, &num);

1971

cache_estimate(gfporder, size, align, flags, &remainder, &num);

1972

if (!num)

1972

if (!num)

1973

continue;

1973

continue;

1974

1975

if (flags & CFLGS_OFF_SLAB) {

1975

if (flags & CFLGS_OFF_SLAB) {

1976

/*

1976

/*

1977

* Max number of objs-per-slab for caches which

1977

* Max number of objs-per-slab for caches which

1978

* use off-slab slabs. Needed to avoid a possible

1978

* use off-slab slabs. Needed to avoid a possible

1979

* looping condition in cache_grow().

1979

* looping condition in cache_grow().

1980

*/

1980

*/

1981

offslab_limit = size - sizeof(struct slab);

1981

offslab_limit = size - sizeof(struct slab);

1982

offslab_limit /= sizeof(kmem_bufctl_t);

1982

offslab_limit /= sizeof(kmem_bufctl_t);

1983

1984

if (num > offslab_limit)

1984

if (num > offslab_limit)

1985

break;

1985

break;

1986

}

1986

}

1987

1988

/* Found something acceptable - save it away */

1988

/* Found something acceptable - save it away */

1989

cachep->num = num;

1989

cachep->num = num;

1990

cachep->gfporder = gfporder;

1990

cachep->gfporder = gfporder;

1991

left_over = remainder;

1991

left_over = remainder;

1992

1993

/*

1993

/*

1994

* A VFS-reclaimable slab tends to have most allocations

1994

* A VFS-reclaimable slab tends to have most allocations

1995

* as GFP_NOFS and we really don't want to have to be allocating

1995

* as GFP_NOFS and we really don't want to have to be allocating

1996

* higher-order pages when we are unable to shrink dcache.

1996

* higher-order pages when we are unable to shrink dcache.

1997

*/

1997

*/

1998

if (flags & SLAB_RECLAIM_ACCOUNT)

1998

if (flags & SLAB_RECLAIM_ACCOUNT)

1999

break;

1999

break;

2000

2001

/*

2001

/*

2002

* Large number of objects is good, but very large slabs are

2002

* Large number of objects is good, but very large slabs are

2003

* currently bad for the gfp()s.

2003

* currently bad for the gfp()s.

2004

*/

2004

*/

2005

if (gfporder >= slab_break_gfp_order)

2005

if (gfporder >= slab_break_gfp_order)

2006

break;

2006

break;

2007

2008

/*

2008

/*

2009

* Acceptable internal fragmentation?

2009

* Acceptable internal fragmentation?

2010

*/

2010

*/

2011

if (left_over * 8 <= (PAGE_SIZE << gfporder))

2011

if (left_over * 8 <= (PAGE_SIZE << gfporder))

2012

break;

2012

break;

2013

}

2013

}

2014

return left_over;

2014

return left_over;

2015

}

2015

}

2016

2017

static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)

2017

static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)

2018

{

2018

{

2019

if (g_cpucache_up == FULL)

2019

if (g_cpucache_up == FULL)

2020

return enable_cpucache(cachep, gfp);

2020

return enable_cpucache(cachep, gfp);

2021

2022

if (g_cpucache_up == NONE) {

2022

if (g_cpucache_up == NONE) {

2023

/*

2023

/*

2024

* Note: the first kmem_cache_create must create the cache

2024

* Note: the first kmem_cache_create must create the cache

2025

* that's used by kmalloc(24), otherwise the creation of

2025

* that's used by kmalloc(24), otherwise the creation of

2026

* further caches will BUG().

2026

* further caches will BUG().

2027

*/

2027

*/

2028

cachep->array[smp_processor_id()] = &initarray_generic.cache;

2028

cachep->array[smp_processor_id()] = &initarray_generic.cache;

2029

2030

/*

2030

/*

2031

* If the cache that's used by kmalloc(sizeof(kmem_list3)) is

2031

* If the cache that's used by kmalloc(sizeof(kmem_list3)) is

2032

* the first cache, then we need to set up all its list3s,

2032

* the first cache, then we need to set up all its list3s,

2033

* otherwise the creation of further caches will BUG().

2033

* otherwise the creation of further caches will BUG().

2034

*/

2034

*/

2035

set_up_list3s(cachep, SIZE_AC);

2035

set_up_list3s(cachep, SIZE_AC);

2036

if (INDEX_AC == INDEX_L3)

2036

if (INDEX_AC == INDEX_L3)

2037

g_cpucache_up = PARTIAL_L3;

2037

g_cpucache_up = PARTIAL_L3;

2038

else

2038

else

2039

g_cpucache_up = PARTIAL_AC;

2039

g_cpucache_up = PARTIAL_AC;

2040

} else {

2040

} else {

2041

cachep->array[smp_processor_id()] =

2041

cachep->array[smp_processor_id()] =

2042

kmalloc(sizeof(struct arraycache_init), gfp);

2042

kmalloc(sizeof(struct arraycache_init), gfp);

2043

2044

if (g_cpucache_up == PARTIAL_AC) {

2044

if (g_cpucache_up == PARTIAL_AC) {

2045

set_up_list3s(cachep, SIZE_L3);

2045

set_up_list3s(cachep, SIZE_L3);

2046

g_cpucache_up = PARTIAL_L3;

2046

g_cpucache_up = PARTIAL_L3;

2047

} else {

2047

} else {

2048

int node;

2048

int node;

2049

for_each_online_node(node) {

2049

for_each_online_node(node) {

2050

cachep->nodelists[node] =

2050

cachep->nodelists[node] =

2051

kmalloc_node(sizeof(struct kmem_list3),

2051

kmalloc_node(sizeof(struct kmem_list3),

2052

gfp, node);

2052

gfp, node);

2053

BUG_ON(!cachep->nodelists[node]);

2053

BUG_ON(!cachep->nodelists[node]);

2054

kmem_list3_init(cachep->nodelists[node]);

2054

kmem_list3_init(cachep->nodelists[node]);

2055

}

2055

}

2056

}

2056

}

2057

}

2057

}

2058

cachep->nodelists[numa_node_id()]->next_reap =

2058

cachep->nodelists[numa_node_id()]->next_reap =

2059

jiffies + REAPTIMEOUT_LIST3 +

2059

jiffies + REAPTIMEOUT_LIST3 +

2060

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

2060

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

2061

2062

cpu_cache_get(cachep)->avail = 0;

2062

cpu_cache_get(cachep)->avail = 0;

2063

cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;

2063

cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;

2064

cpu_cache_get(cachep)->batchcount = 1;

2064

cpu_cache_get(cachep)->batchcount = 1;

2065

cpu_cache_get(cachep)->touched = 0;

2065

cpu_cache_get(cachep)->touched = 0;

2066

cachep->batchcount = 1;

2066

cachep->batchcount = 1;

2067

cachep->limit = BOOT_CPUCACHE_ENTRIES;

2067

cachep->limit = BOOT_CPUCACHE_ENTRIES;

2068

return 0;

2068

return 0;

2069

}

2069

}

2070

2071

/**

2071

/**

2072

* kmem_cache_create - Create a cache.

2072

* kmem_cache_create - Create a cache.

2073

* @name: A string which is used in /proc/slabinfo to identify this cache.

2073

* @name: A string which is used in /proc/slabinfo to identify this cache.

2074

* @size: The size of objects to be created in this cache.

2074

* @size: The size of objects to be created in this cache.

2075

* @align: The required alignment for the objects.

2075

* @align: The required alignment for the objects.

2076

* @flags: SLAB flags

2076

* @flags: SLAB flags

2077

* @ctor: A constructor for the objects.

2077

* @ctor: A constructor for the objects.

2078

*

2078

*

2079

* Returns a ptr to the cache on success, NULL on failure.

2079

* Returns a ptr to the cache on success, NULL on failure.

2080

* Cannot be called within a int, but can be interrupted.

2080

* Cannot be called within a int, but can be interrupted.

2081

* The @ctor is run when new pages are allocated by the cache.

2081

* The @ctor is run when new pages are allocated by the cache.

2082

*

2082

*

2083

* @name must be valid until the cache is destroyed. This implies that

2083

* @name must be valid until the cache is destroyed. This implies that

2084

* the module calling this has to destroy the cache before getting unloaded.

2084

* the module calling this has to destroy the cache before getting unloaded.

2085

* Note that kmem_cache_name() is not guaranteed to return the same pointer,

2085

* Note that kmem_cache_name() is not guaranteed to return the same pointer,

2086

* therefore applications must manage it themselves.

2086

* therefore applications must manage it themselves.

2087

*

2087

*

2088

* The flags are

2088

* The flags are

2089

*

2089

*

2090

* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)

2090

* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)

2091

* to catch references to uninitialised memory.

2091

* to catch references to uninitialised memory.

2092

*

2092

*

2093

* %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check

2093

* %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check

2094

* for buffer overruns.

2094

* for buffer overruns.

2095

*

2095

*

2096

* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware

2096

* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware

2097

* cacheline. This can be beneficial if you're counting cycles as closely

2097

* cacheline. This can be beneficial if you're counting cycles as closely

2098

* as davem.

2098

* as davem.

2099

*/

2099

*/

2100

struct kmem_cache *

2100

struct kmem_cache *

2101

kmem_cache_create (const char *name, size_t size, size_t align,

2101

kmem_cache_create (const char *name, size_t size, size_t align,

2102

unsigned long flags, void (*ctor)(void *))

2102

unsigned long flags, void (*ctor)(void *))

2103

{

2103

{

2104

size_t left_over, slab_size, ralign;

2104

size_t left_over, slab_size, ralign;

2105

struct kmem_cache *cachep = NULL, *pc;

2105

struct kmem_cache *cachep = NULL, *pc;

2106

gfp_t gfp;

2106

gfp_t gfp;

2107

2108

/*

2108

/*

2109

* Sanity checks... these are all serious usage bugs.

2109

* Sanity checks... these are all serious usage bugs.

2110

*/

2110

*/

2111

if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||

2111

if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||

2112

size > KMALLOC_MAX_SIZE) {

2112

size > KMALLOC_MAX_SIZE) {

2113

printk(KERN_ERR "%s: Early error in slab %s\n", __func__,

2113

printk(KERN_ERR "%s: Early error in slab %s\n", __func__,

2114

name);

2114

name);

2115

BUG();

2115

BUG();

2116

}

2116

}

2117

2118

/*

2118

/*

2119

* We use cache_chain_mutex to ensure a consistent view of

2119

* We use cache_chain_mutex to ensure a consistent view of

2120

* cpu_online_mask as well. Please see cpuup_callback

2120

* cpu_online_mask as well. Please see cpuup_callback

2121

*/

2121

*/

2122

if (slab_is_available()) {

2122

if (slab_is_available()) {

2123

get_online_cpus();

2123

get_online_cpus();

2124

mutex_lock(&cache_chain_mutex);

2124

mutex_lock(&cache_chain_mutex);

2125

}

2125

}

2126

2127

list_for_each_entry(pc, &cache_chain, next) {

2127

list_for_each_entry(pc, &cache_chain, next) {

2128

char tmp;

2128

char tmp;

2129

int res;

2129

int res;

2130

2131

/*

2131

/*

2132

* This happens when the module gets unloaded and doesn't

2132

* This happens when the module gets unloaded and doesn't

2133

* destroy its slab cache and no-one else reuses the vmalloc

2133

* destroy its slab cache and no-one else reuses the vmalloc

2134

* area of the module. Print a warning.

2134

* area of the module. Print a warning.

2135

*/

2135

*/

2136

res = probe_kernel_address(pc->name, tmp);

2136

res = probe_kernel_address(pc->name, tmp);

2137

if (res) {

2137

if (res) {

2138

printk(KERN_ERR

2138

printk(KERN_ERR

2139

"SLAB: cache with size %d has lost its name\n",

2139

"SLAB: cache with size %d has lost its name\n",

2140

pc->buffer_size);

2140

pc->buffer_size);

2141

continue;

2141

continue;

2142

}

2142

}

2143

2144

if (!strcmp(pc->name, name)) {

2144

if (!strcmp(pc->name, name)) {

2145

printk(KERN_ERR

2145

printk(KERN_ERR

2146

"kmem_cache_create: duplicate cache %s\n", name);

2146

"kmem_cache_create: duplicate cache %s\n", name);

2147

dump_stack();

2147

dump_stack();

2148

goto oops;

2148

goto oops;

2149

}

2149

}

2150

}

2150

}

2151

2152

#if DEBUG

2152

#if DEBUG

2153

WARN_ON(strchr(name, ' ')); /* It confuses parsers */

2153

WARN_ON(strchr(name, ' ')); /* It confuses parsers */

2154

#if FORCED_DEBUG

2154

#if FORCED_DEBUG

2155

/*

2155

/*

2156

* Enable redzoning and last user accounting, except for caches with

2156

* Enable redzoning and last user accounting, except for caches with

2157

* large objects, if the increased size would increase the object size

2157

* large objects, if the increased size would increase the object size

2158

* above the next power of two: caches with object sizes just above a

2158

* above the next power of two: caches with object sizes just above a

2159

* power of two have a significant amount of internal fragmentation.

2159

* power of two have a significant amount of internal fragmentation.

2160

*/

2160

*/

2161

if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +

2161

if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +

2162

2 * sizeof(unsigned long long)))

2162

2 * sizeof(unsigned long long)))

2163

flags |= SLAB_RED_ZONE | SLAB_STORE_USER;

2163

flags |= SLAB_RED_ZONE | SLAB_STORE_USER;

2164

if (!(flags & SLAB_DESTROY_BY_RCU))

2164

if (!(flags & SLAB_DESTROY_BY_RCU))

2165

flags |= SLAB_POISON;

2165

flags |= SLAB_POISON;

2166

#endif

2166

#endif

2167

if (flags & SLAB_DESTROY_BY_RCU)

2167

if (flags & SLAB_DESTROY_BY_RCU)

2168

BUG_ON(flags & SLAB_POISON);

2168

BUG_ON(flags & SLAB_POISON);

2169

#endif

2169

#endif

2170

/*

2170

/*

2171

* Always checks flags, a caller might be expecting debug support which

2171

* Always checks flags, a caller might be expecting debug support which

2172

* isn't available.

2172

* isn't available.

2173

*/

2173

*/

2174

BUG_ON(flags & ~CREATE_MASK);

2174

BUG_ON(flags & ~CREATE_MASK);

2175

2176

/*

2176

/*

2177

* Check that size is in terms of words. This is needed to avoid

2177

* Check that size is in terms of words. This is needed to avoid

2178

* unaligned accesses for some archs when redzoning is used, and makes

2178

* unaligned accesses for some archs when redzoning is used, and makes

2179

* sure any on-slab bufctl's are also correctly aligned.

2179

* sure any on-slab bufctl's are also correctly aligned.

2180

*/

2180

*/

2181

if (size & (BYTES_PER_WORD - 1)) {

2181

if (size & (BYTES_PER_WORD - 1)) {

2182

size += (BYTES_PER_WORD - 1);

2182

size += (BYTES_PER_WORD - 1);

2183

size &= ~(BYTES_PER_WORD - 1);

2183

size &= ~(BYTES_PER_WORD - 1);

2184

}

2184

}

2185

2186

/* calculate the final buffer alignment: */

2186

/* calculate the final buffer alignment: */

2187

2188

/* 1) arch recommendation: can be overridden for debug */

2188

/* 1) arch recommendation: can be overridden for debug */

2189

if (flags & SLAB_HWCACHE_ALIGN) {

2189

if (flags & SLAB_HWCACHE_ALIGN) {

2190

/*

2190

/*

2191

* Default alignment: as specified by the arch code. Except if

2191

* Default alignment: as specified by the arch code. Except if

2192

* an object is really small, then squeeze multiple objects into

2192

* an object is really small, then squeeze multiple objects into

2193

* one cacheline.

2193

* one cacheline.

2194

*/

2194

*/

2195

ralign = cache_line_size();

2195

ralign = cache_line_size();

2196

while (size <= ralign / 2)

2196

while (size <= ralign / 2)

2197

ralign /= 2;

2197

ralign /= 2;

2198

} else {

2198

} else {

2199

ralign = BYTES_PER_WORD;

2199

ralign = BYTES_PER_WORD;

2200

}

2200

}

2201

2202

/*

2202

/*

2203

* Redzoning and user store require word alignment or possibly larger.

2203

* Redzoning and user store require word alignment or possibly larger.

2204

* Note this will be overridden by architecture or caller mandated

2204

* Note this will be overridden by architecture or caller mandated

2205

* alignment if either is greater than BYTES_PER_WORD.

2205

* alignment if either is greater than BYTES_PER_WORD.

2206

*/

2206

*/

2207

if (flags & SLAB_STORE_USER)

2207

if (flags & SLAB_STORE_USER)

2208

ralign = BYTES_PER_WORD;

2208

ralign = BYTES_PER_WORD;

2209

2210

if (flags & SLAB_RED_ZONE) {

2210

if (flags & SLAB_RED_ZONE) {

2211

ralign = REDZONE_ALIGN;

2211

ralign = REDZONE_ALIGN;

2212

/* If redzoning, ensure that the second redzone is suitably

2212

/* If redzoning, ensure that the second redzone is suitably

2213

* aligned, by adjusting the object size accordingly. */

2213

* aligned, by adjusting the object size accordingly. */

2214

size += REDZONE_ALIGN - 1;

2214

size += REDZONE_ALIGN - 1;

2215

size &= ~(REDZONE_ALIGN - 1);

2215

size &= ~(REDZONE_ALIGN - 1);

2216

}

2216

}

2217

2218

/* 2) arch mandated alignment */

2218

/* 2) arch mandated alignment */

2219

if (ralign < ARCH_SLAB_MINALIGN) {

2219

if (ralign < ARCH_SLAB_MINALIGN) {

2220

ralign = ARCH_SLAB_MINALIGN;

2220

ralign = ARCH_SLAB_MINALIGN;

2221

}

2221

}

2222

/* 3) caller mandated alignment */

2222

/* 3) caller mandated alignment */

2223

if (ralign < align) {

2223

if (ralign < align) {

2224

ralign = align;

2224

ralign = align;

2225

}

2225

}

2226

/* disable debug if necessary */

2226

/* disable debug if necessary */

2227

if (ralign > __alignof__(unsigned long long))

2227

if (ralign > __alignof__(unsigned long long))

2228

flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);

2228

flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);

2229

/*

2229

/*

2230

* 4) Store it.

2230

* 4) Store it.

2231

*/

2231

*/

2232

align = ralign;

2232

align = ralign;

2233

2234

if (slab_is_available())

2234

if (slab_is_available())

2235

gfp = GFP_KERNEL;

2235

gfp = GFP_KERNEL;

2236

else

2236

else

2237

gfp = GFP_NOWAIT;

2237

gfp = GFP_NOWAIT;

2238

2239

/* Get cache's description obj. */

2239

/* Get cache's description obj. */

2240

cachep = kmem_cache_zalloc(&cache_cache, gfp);

2240

cachep = kmem_cache_zalloc(&cache_cache, gfp);

2241

if (!cachep)

2241

if (!cachep)

2242

goto oops;

2242

goto oops;

2243

2244

#if DEBUG

2244

#if DEBUG

2245

cachep->obj_size = size;

2245

cachep->obj_size = size;

2246

2247

/*

2247

/*

2248

* Both debugging options require word-alignment which is calculated

2248

* Both debugging options require word-alignment which is calculated

2249

* into align above.

2249

* into align above.

2250

*/

2250

*/

2251

if (flags & SLAB_RED_ZONE) {

2251

if (flags & SLAB_RED_ZONE) {

2252

/* add space for red zone words */

2252

/* add space for red zone words */

2253

cachep->obj_offset += sizeof(unsigned long long);

2253

cachep->obj_offset += sizeof(unsigned long long);

2254

size += 2 * sizeof(unsigned long long);

2254

size += 2 * sizeof(unsigned long long);

2255

}

2255

}

2256

if (flags & SLAB_STORE_USER) {

2256

if (flags & SLAB_STORE_USER) {

2257

/* user store requires one word storage behind the end of

2257

/* user store requires one word storage behind the end of

2258

* the real object. But if the second red zone needs to be

2258

* the real object. But if the second red zone needs to be

2259

* aligned to 64 bits, we must allow that much space.

2259

* aligned to 64 bits, we must allow that much space.

2260

*/

2260

*/

2261

if (flags & SLAB_RED_ZONE)

2261

if (flags & SLAB_RED_ZONE)

2262

size += REDZONE_ALIGN;

2262

size += REDZONE_ALIGN;

2263

else

2263

else

2264

size += BYTES_PER_WORD;

2264

size += BYTES_PER_WORD;

2265

}

2265

}

2266

#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)

2266

#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)

2267

if (size >= malloc_sizes[INDEX_L3 + 1].cs_size

2267

if (size >= malloc_sizes[INDEX_L3 + 1].cs_size

2268

&& cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {

2268

&& cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {

2269

cachep->obj_offset += PAGE_SIZE - size;

2269

cachep->obj_offset += PAGE_SIZE - size;

2270

size = PAGE_SIZE;

2270

size = PAGE_SIZE;

2271

}

2271

}

2272

#endif

2272

#endif

2273

#endif

2273

#endif

2274

2275

/*

2275

/*

2276

* Determine if the slab management is 'on' or 'off' slab.

2276

* Determine if the slab management is 'on' or 'off' slab.

2277

* (bootstrapping cannot cope with offslab caches so don't do

2277

* (bootstrapping cannot cope with offslab caches so don't do

2278

* it too early on. Always use on-slab management when

2278

* it too early on. Always use on-slab management when

2279

* SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)

2279

* SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)

2280

*/

2280

*/

2281

if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&

2281

if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&

2282

!(flags & SLAB_NOLEAKTRACE))

2282

!(flags & SLAB_NOLEAKTRACE))

2283

/*

2283

/*

2284

* Size is large, assume best to place the slab management obj

2284

* Size is large, assume best to place the slab management obj

2285

* off-slab (should allow better packing of objs).

2285

* off-slab (should allow better packing of objs).

2286

*/

2286

*/

2287

flags |= CFLGS_OFF_SLAB;

2287

flags |= CFLGS_OFF_SLAB;

2288

2289

size = ALIGN(size, align);

2289

size = ALIGN(size, align);

2290

2291

left_over = calculate_slab_order(cachep, size, align, flags);

2291

left_over = calculate_slab_order(cachep, size, align, flags);

2292

2293

if (!cachep->num) {

2293

if (!cachep->num) {

2294

printk(KERN_ERR

2294

printk(KERN_ERR

2295

"kmem_cache_create: couldn't create cache %s.\n", name);

2295

"kmem_cache_create: couldn't create cache %s.\n", name);

2296

kmem_cache_free(&cache_cache, cachep);

2296

kmem_cache_free(&cache_cache, cachep);

2297

cachep = NULL;

2297

cachep = NULL;

2298

goto oops;

2298

goto oops;

2299

}

2299

}

2300

slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)

2300

slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)

2301

+ sizeof(struct slab), align);

2301

+ sizeof(struct slab), align);

2302

2303

/*

2303

/*

2304

* If the slab has been placed off-slab, and we have enough space then

2304

* If the slab has been placed off-slab, and we have enough space then

2305

* move it on-slab. This is at the expense of any extra colouring.

2305

* move it on-slab. This is at the expense of any extra colouring.

2306

*/

2306

*/

2307

if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {

2307

if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {

2308

flags &= ~CFLGS_OFF_SLAB;

2308

flags &= ~CFLGS_OFF_SLAB;

2309

left_over -= slab_size;

2309

left_over -= slab_size;

2310

}

2310

}

2311

2312

if (flags & CFLGS_OFF_SLAB) {

2312

if (flags & CFLGS_OFF_SLAB) {

2313

/* really off slab. No need for manual alignment */

2313

/* really off slab. No need for manual alignment */

2314

slab_size =

2314

slab_size =

2315

cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);

2315

cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);

2316

2317

#ifdef CONFIG_PAGE_POISONING

2317

#ifdef CONFIG_PAGE_POISONING

2318

/* If we're going to use the generic kernel_map_pages()

2318

/* If we're going to use the generic kernel_map_pages()

2319

* poisoning, then it's going to smash the contents of

2319

* poisoning, then it's going to smash the contents of

2320

* the redzone and userword anyhow, so switch them off.

2320

* the redzone and userword anyhow, so switch them off.

2321

*/

2321

*/

2322

if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)

2322

if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)

2323

flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);

2323

flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);

2324

#endif

2324

#endif

2325

}

2325

}

2326

2327

cachep->colour_off = cache_line_size();

2327

cachep->colour_off = cache_line_size();

2328

/* Offset must be a multiple of the alignment. */

2328

/* Offset must be a multiple of the alignment. */

2329

if (cachep->colour_off < align)

2329

if (cachep->colour_off < align)

2330

cachep->colour_off = align;

2330

cachep->colour_off = align;

2331

cachep->colour = left_over / cachep->colour_off;

2331

cachep->colour = left_over / cachep->colour_off;

2332

cachep->slab_size = slab_size;

2332

cachep->slab_size = slab_size;

2333

cachep->flags = flags;

2333

cachep->flags = flags;

2334

cachep->gfpflags = 0;

2334

cachep->gfpflags = 0;

2335

if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))

2335

if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))

2336

cachep->gfpflags |= GFP_DMA;

2336

cachep->gfpflags |= GFP_DMA;

2337

cachep->buffer_size = size;

2337

cachep->buffer_size = size;

2338

cachep->reciprocal_buffer_size = reciprocal_value(size);

2338

cachep->reciprocal_buffer_size = reciprocal_value(size);

2339

2340

if (flags & CFLGS_OFF_SLAB) {

2340

if (flags & CFLGS_OFF_SLAB) {

2341

cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);

2341

cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);

2342

/*

2342

/*

2343

* This is a possibility for one of the malloc_sizes caches.

2343

* This is a possibility for one of the malloc_sizes caches.

2344

* But since we go off slab only for object size greater than

2344

* But since we go off slab only for object size greater than

2345

* PAGE_SIZE/8, and malloc_sizes gets created in ascending order,

2345

* PAGE_SIZE/8, and malloc_sizes gets created in ascending order,

2346

* this should not happen at all.

2346

* this should not happen at all.

2347

* But leave a BUG_ON for some lucky dude.

2347

* But leave a BUG_ON for some lucky dude.

2348

*/

2348

*/

2349

BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));

2349

BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));

2350

}

2350

}

2351

cachep->ctor = ctor;

2351

cachep->ctor = ctor;

2352

cachep->name = name;

2352

cachep->name = name;

2353

2354

if (setup_cpu_cache(cachep, gfp)) {

2354

if (setup_cpu_cache(cachep, gfp)) {

2355

__kmem_cache_destroy(cachep);

2355

__kmem_cache_destroy(cachep);

2356

cachep = NULL;

2356

cachep = NULL;

2357

goto oops;

2357

goto oops;

2358

}

2358

}

2359

2360

/* cache setup completed, link it into the list */

2360

/* cache setup completed, link it into the list */

2361

list_add(&cachep->next, &cache_chain);

2361

list_add(&cachep->next, &cache_chain);

2362

oops:

2362

oops:

2363

if (!cachep && (flags & SLAB_PANIC))

2363

if (!cachep && (flags & SLAB_PANIC))

2364

panic("kmem_cache_create(): failed to create slab `%s'\n",

2364

panic("kmem_cache_create(): failed to create slab `%s'\n",

2365

name);

2365

name);

2366

if (slab_is_available()) {

2366

if (slab_is_available()) {

2367

mutex_unlock(&cache_chain_mutex);

2367

mutex_unlock(&cache_chain_mutex);

2368

put_online_cpus();

2368

put_online_cpus();

2369

}

2369

}

2370

return cachep;

2370

return cachep;

2371

}

2371

}

2372

EXPORT_SYMBOL(kmem_cache_create);

2372

EXPORT_SYMBOL(kmem_cache_create);

2373

2374

#if DEBUG

2374

#if DEBUG

2375

static void check_irq_off(void)

2375

static void check_irq_off(void)

2376

{

2376

{

2377

BUG_ON(!irqs_disabled());

2377

BUG_ON(!irqs_disabled());

2378

}

2378

}

2379

2380

static void check_irq_on(void)

2380

static void check_irq_on(void)

2381

{

2381

{

2382

BUG_ON(irqs_disabled());

2382

BUG_ON(irqs_disabled());

2383

}

2383

}

2384

2385

static void check_spinlock_acquired(struct kmem_cache *cachep)

2385

static void check_spinlock_acquired(struct kmem_cache *cachep)

2386

{

2386

{

2387

#ifdef CONFIG_SMP

2387

#ifdef CONFIG_SMP

2388

check_irq_off();

2388

check_irq_off();

2389

assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);

2389

assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);

2390

#endif

2390

#endif

2391

}

2391

}

2392

2393

static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)

2393

static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)

2394

{

2394

{

2395

#ifdef CONFIG_SMP

2395

#ifdef CONFIG_SMP

2396

check_irq_off();

2396

check_irq_off();

2397

assert_spin_locked(&cachep->nodelists[node]->list_lock);

2397

assert_spin_locked(&cachep->nodelists[node]->list_lock);

2398

#endif

2398

#endif

2399

}

2399

}

2400

2401

#else

2401

#else

2402

#define check_irq_off() do { } while(0)

2402

#define check_irq_off() do { } while(0)

2403

#define check_irq_on() do { } while(0)

2403

#define check_irq_on() do { } while(0)

2404

#define check_spinlock_acquired(x) do { } while(0)

2404

#define check_spinlock_acquired(x) do { } while(0)

2405

#define check_spinlock_acquired_node(x, y) do { } while(0)

2405

#define check_spinlock_acquired_node(x, y) do { } while(0)

2406

#endif

2406

#endif

2407

2408

static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,

2408

static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,

2409

struct array_cache *ac,

2409

struct array_cache *ac,

2410

int force, int node);

2410

int force, int node);

2411

2412

static void do_drain(void *arg)

2412

static void do_drain(void *arg)

2413

{

2413

{

2414

struct kmem_cache *cachep = arg;

2414

struct kmem_cache *cachep = arg;

2415

struct array_cache *ac;

2415

struct array_cache *ac;

2416

int node = numa_node_id();

2416

int node = numa_node_id();

2417

2418

check_irq_off();

2418

check_irq_off();

2419

ac = cpu_cache_get(cachep);

2419

ac = cpu_cache_get(cachep);

2420

spin_lock(&cachep->nodelists[node]->list_lock);

2420

spin_lock(&cachep->nodelists[node]->list_lock);

2421

free_block(cachep, ac->entry, ac->avail, node);

2421

free_block(cachep, ac->entry, ac->avail, node);

2422

spin_unlock(&cachep->nodelists[node]->list_lock);

2422

spin_unlock(&cachep->nodelists[node]->list_lock);

2423

ac->avail = 0;

2423

ac->avail = 0;

2424

}

2424

}

2425

2426

static void drain_cpu_caches(struct kmem_cache *cachep)

2426

static void drain_cpu_caches(struct kmem_cache *cachep)

2427

{

2427

{

2428

struct kmem_list3 *l3;

2428

struct kmem_list3 *l3;

2429

int node;

2429

int node;

2430

2431

on_each_cpu(do_drain, cachep, 1);

2431

on_each_cpu(do_drain, cachep, 1);

2432

check_irq_on();

2432

check_irq_on();

2433

for_each_online_node(node) {

2433

for_each_online_node(node) {

2434

l3 = cachep->nodelists[node];

2434

l3 = cachep->nodelists[node];

2435

if (l3 && l3->alien)

2435

if (l3 && l3->alien)

2436

drain_alien_cache(cachep, l3->alien);

2436

drain_alien_cache(cachep, l3->alien);

2437

}

2437

}

2438

2439

for_each_online_node(node) {

2439

for_each_online_node(node) {

2440

l3 = cachep->nodelists[node];

2440

l3 = cachep->nodelists[node];

2441

if (l3)

2441

if (l3)

2442

drain_array(cachep, l3, l3->shared, 1, node);

2442

drain_array(cachep, l3, l3->shared, 1, node);

2443

}

2443

}

2444

}

2444

}

2445

2446

/*

2446

/*

2447

* Remove slabs from the list of free slabs.

2447

* Remove slabs from the list of free slabs.

2448

* Specify the number of slabs to drain in tofree.

2448

* Specify the number of slabs to drain in tofree.

2449

*

2449

*

2450

* Returns the actual number of slabs released.

2450

* Returns the actual number of slabs released.

2451

*/

2451

*/

2452

static int drain_freelist(struct kmem_cache *cache,

2452

static int drain_freelist(struct kmem_cache *cache,

2453

struct kmem_list3 *l3, int tofree)

2453

struct kmem_list3 *l3, int tofree)

2454

{

2454

{

2455

struct list_head *p;

2455

struct list_head *p;

2456

int nr_freed;

2456

int nr_freed;

2457

struct slab *slabp;

2457

struct slab *slabp;

2458

2459

nr_freed = 0;

2459

nr_freed = 0;

2460

while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {

2460

while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {

2461

2462

spin_lock_irq(&l3->list_lock);

2462

spin_lock_irq(&l3->list_lock);

2463

p = l3->slabs_free.prev;

2463

p = l3->slabs_free.prev;

2464

if (p == &l3->slabs_free) {

2464

if (p == &l3->slabs_free) {

2465

spin_unlock_irq(&l3->list_lock);

2465

spin_unlock_irq(&l3->list_lock);

2466

goto out;

2466

goto out;

2467

}

2467

}

2468

2469

slabp = list_entry(p, struct slab, list);

2469

slabp = list_entry(p, struct slab, list);

2470

#if DEBUG

2470

#if DEBUG

2471

BUG_ON(slabp->inuse);

2471

BUG_ON(slabp->inuse);

2472

#endif

2472

#endif

2473

list_del(&slabp->list);

2473

list_del(&slabp->list);

2474

/*

2474

/*

2475

* Safe to drop the lock. The slab is no longer linked

2475

* Safe to drop the lock. The slab is no longer linked

2476

* to the cache.

2476

* to the cache.

2477

*/

2477

*/

2478

l3->free_objects -= cache->num;

2478

l3->free_objects -= cache->num;

2479

spin_unlock_irq(&l3->list_lock);

2479

spin_unlock_irq(&l3->list_lock);

2480

slab_destroy(cache, slabp);

2480

slab_destroy(cache, slabp);

2481

nr_freed++;

2481

nr_freed++;

2482

}

2482

}

2483

out:

2483

out:

2484

return nr_freed;

2484

return nr_freed;

2485

}

2485

}

2486

2487

/* Called with cache_chain_mutex held to protect against cpu hotplug */

2487

/* Called with cache_chain_mutex held to protect against cpu hotplug */

2488

static int __cache_shrink(struct kmem_cache *cachep)

2488

static int __cache_shrink(struct kmem_cache *cachep)

2489

{

2489

{

2490

int ret = 0, i = 0;

2490

int ret = 0, i = 0;

2491

struct kmem_list3 *l3;

2491

struct kmem_list3 *l3;

2492

2493

drain_cpu_caches(cachep);

2493

drain_cpu_caches(cachep);

2494

2495

check_irq_on();

2495

check_irq_on();

2496

for_each_online_node(i) {

2496

for_each_online_node(i) {

2497

l3 = cachep->nodelists[i];

2497

l3 = cachep->nodelists[i];

2498

if (!l3)

2498

if (!l3)

2499

continue;

2499

continue;

2500

2501

drain_freelist(cachep, l3, l3->free_objects);

2501

drain_freelist(cachep, l3, l3->free_objects);

2502

2503

ret += !list_empty(&l3->slabs_full) ||

2503

ret += !list_empty(&l3->slabs_full) ||

2504

!list_empty(&l3->slabs_partial);

2504

!list_empty(&l3->slabs_partial);

2505

}

2505

}

2506

return (ret ? 1 : 0);

2506

return (ret ? 1 : 0);

2507

}

2507

}

2508

2509

/**

2509

/**

2510

* kmem_cache_shrink - Shrink a cache.

2510

* kmem_cache_shrink - Shrink a cache.

2511

* @cachep: The cache to shrink.

2511

* @cachep: The cache to shrink.

2512

*

2512

*

2513

* Releases as many slabs as possible for a cache.

2513

* Releases as many slabs as possible for a cache.

2514

* To help debugging, a zero exit status indicates all slabs were released.

2514

* To help debugging, a zero exit status indicates all slabs were released.

2515

*/

2515

*/

2516

int kmem_cache_shrink(struct kmem_cache *cachep)

2516

int kmem_cache_shrink(struct kmem_cache *cachep)

2517

{

2517

{

2518

int ret;

2518

int ret;

2519

BUG_ON(!cachep || in_interrupt());

2519

BUG_ON(!cachep || in_interrupt());

2520

2521

get_online_cpus();

2521

get_online_cpus();

2522

mutex_lock(&cache_chain_mutex);

2522

mutex_lock(&cache_chain_mutex);

2523

ret = __cache_shrink(cachep);

2523

ret = __cache_shrink(cachep);

2524

mutex_unlock(&cache_chain_mutex);

2524

mutex_unlock(&cache_chain_mutex);

2525

put_online_cpus();

2525

put_online_cpus();

2526

return ret;

2526

return ret;

2527

}

2527

}

2528

EXPORT_SYMBOL(kmem_cache_shrink);

2528

EXPORT_SYMBOL(kmem_cache_shrink);

2529

2530

/**

2530

/**

2531

* kmem_cache_destroy - delete a cache

2531

* kmem_cache_destroy - delete a cache

2532

* @cachep: the cache to destroy

2532

* @cachep: the cache to destroy

2533

*

2533

*

2534

* Remove a &struct kmem_cache object from the slab cache.

2534

* Remove a &struct kmem_cache object from the slab cache.

2535

*

2535

*

2536

* It is expected this function will be called by a module when it is

2536

* It is expected this function will be called by a module when it is

2537

* unloaded. This will remove the cache completely, and avoid a duplicate

2537

* unloaded. This will remove the cache completely, and avoid a duplicate

2538

* cache being allocated each time a module is loaded and unloaded, if the

2538

* cache being allocated each time a module is loaded and unloaded, if the

2539

* module doesn't have persistent in-kernel storage across loads and unloads.

2539

* module doesn't have persistent in-kernel storage across loads and unloads.

2540

*

2540

*

2541

* The cache must be empty before calling this function.

2541

* The cache must be empty before calling this function.

2542

*

2542

*

2543

* The caller must guarantee that noone will allocate memory from the cache

2543

* The caller must guarantee that noone will allocate memory from the cache

2544

* during the kmem_cache_destroy().

2544

* during the kmem_cache_destroy().

2545

*/

2545

*/

2546

void kmem_cache_destroy(struct kmem_cache *cachep)

2546

void kmem_cache_destroy(struct kmem_cache *cachep)

2547

{

2547

{

2548

BUG_ON(!cachep || in_interrupt());

2548

BUG_ON(!cachep || in_interrupt());

2549

2550

/* Find the cache in the chain of caches. */

2550

/* Find the cache in the chain of caches. */

2551

get_online_cpus();

2551

get_online_cpus();

2552

mutex_lock(&cache_chain_mutex);

2552

mutex_lock(&cache_chain_mutex);

2553

/*

2553

/*

2554

* the chain is never empty, cache_cache is never destroyed

2554

* the chain is never empty, cache_cache is never destroyed

2555

*/

2555

*/

2556

list_del(&cachep->next);

2556

list_del(&cachep->next);

2557

if (__cache_shrink(cachep)) {

2557

if (__cache_shrink(cachep)) {

2558

slab_error(cachep, "Can't free all objects");

2558

slab_error(cachep, "Can't free all objects");

2559

list_add(&cachep->next, &cache_chain);

2559

list_add(&cachep->next, &cache_chain);

2560

mutex_unlock(&cache_chain_mutex);

2560

mutex_unlock(&cache_chain_mutex);

2561

put_online_cpus();

2561

put_online_cpus();

2562

return;

2562

return;

2563

}

2563

}

2564

2565

if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))

2565

if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))

2566

rcu_barrier();

2566

rcu_barrier();

2567

2568

__kmem_cache_destroy(cachep);

2568

__kmem_cache_destroy(cachep);

2569

mutex_unlock(&cache_chain_mutex);

2569

mutex_unlock(&cache_chain_mutex);

2570

put_online_cpus();

2570

put_online_cpus();

2571

}

2571

}

2572

EXPORT_SYMBOL(kmem_cache_destroy);

2572

EXPORT_SYMBOL(kmem_cache_destroy);

2573

2574

/*

2574

/*

2575

* Get the memory for a slab management obj.

2575

* Get the memory for a slab management obj.

2576

* For a slab cache when the slab descriptor is off-slab, slab descriptors

2576

* For a slab cache when the slab descriptor is off-slab, slab descriptors

2577

* always come from malloc_sizes caches. The slab descriptor cannot

2577

* always come from malloc_sizes caches. The slab descriptor cannot

2578

* come from the same cache which is getting created because,

2578

* come from the same cache which is getting created because,

2579

* when we are searching for an appropriate cache for these

2579

* when we are searching for an appropriate cache for these

2580

* descriptors in kmem_cache_create, we search through the malloc_sizes array.

2580

* descriptors in kmem_cache_create, we search through the malloc_sizes array.

2581

* If we are creating a malloc_sizes cache here it would not be visible to

2581

* If we are creating a malloc_sizes cache here it would not be visible to

2582

* kmem_find_general_cachep till the initialization is complete.

2582

* kmem_find_general_cachep till the initialization is complete.

2583

* Hence we cannot have slabp_cache same as the original cache.

2583

* Hence we cannot have slabp_cache same as the original cache.

2584

*/

2584

*/

2585

static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,

2585

static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,

2586

int colour_off, gfp_t local_flags,

2586

int colour_off, gfp_t local_flags,

2587

int nodeid)

2587

int nodeid)

2588

{

2588

{

2589

struct slab *slabp;

2589

struct slab *slabp;

2590

2591

if (OFF_SLAB(cachep)) {

2591

if (OFF_SLAB(cachep)) {

2592

/* Slab management obj is off-slab. */

2592

/* Slab management obj is off-slab. */

2593

slabp = kmem_cache_alloc_node(cachep->slabp_cache,

2593

slabp = kmem_cache_alloc_node(cachep->slabp_cache,

2594

local_flags, nodeid);

2594

local_flags, nodeid);

2595

/*

2595

/*

2596

* If the first object in the slab is leaked (it's allocated

2596

* If the first object in the slab is leaked (it's allocated

2597

* but no one has a reference to it), we want to make sure

2597

* but no one has a reference to it), we want to make sure

2598

* kmemleak does not treat the ->s_mem pointer as a reference

2598

* kmemleak does not treat the ->s_mem pointer as a reference

2599

* to the object. Otherwise we will not report the leak.

2599

* to the object. Otherwise we will not report the leak.

2600

*/

2600

*/

2601

kmemleak_scan_area(&slabp->list, sizeof(struct list_head),

2601

kmemleak_scan_area(&slabp->list, sizeof(struct list_head),

2602

local_flags);

2602

local_flags);

2603

if (!slabp)

2603

if (!slabp)

2604

return NULL;

2604

return NULL;

2605

} else {

2605

} else {

2606

slabp = objp + colour_off;

2606

slabp = objp + colour_off;

2607

colour_off += cachep->slab_size;

2607

colour_off += cachep->slab_size;

2608

}

2608

}

2609

slabp->inuse = 0;

2609

slabp->inuse = 0;

2610

slabp->colouroff = colour_off;

2610

slabp->colouroff = colour_off;

2611

slabp->s_mem = objp + colour_off;

2611

slabp->s_mem = objp + colour_off;

2612

slabp->nodeid = nodeid;

2612

slabp->nodeid = nodeid;

2613

slabp->free = 0;

2613

slabp->free = 0;

2614

return slabp;

2614

return slabp;

2615

}

2615

}

2616

2617

static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)

2617

static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)

2618

{

2618

{

2619

return (kmem_bufctl_t *) (slabp + 1);

2619

return (kmem_bufctl_t *) (slabp + 1);

2620

}

2620

}

2621

2622

static void cache_init_objs(struct kmem_cache *cachep,

2622

static void cache_init_objs(struct kmem_cache *cachep,

2623

struct slab *slabp)

2623

struct slab *slabp)

2624

{

2624

{

2625

int i;

2625

int i;

2626

2627

for (i = 0; i < cachep->num; i++) {

2627

for (i = 0; i < cachep->num; i++) {

2628

void *objp = index_to_obj(cachep, slabp, i);

2628

void *objp = index_to_obj(cachep, slabp, i);

2629

#if DEBUG

2629

#if DEBUG

2630

/* need to poison the objs? */

2630

/* need to poison the objs? */

2631

if (cachep->flags & SLAB_POISON)

2631

if (cachep->flags & SLAB_POISON)

2632

poison_obj(cachep, objp, POISON_FREE);

2632

poison_obj(cachep, objp, POISON_FREE);

2633

if (cachep->flags & SLAB_STORE_USER)

2633

if (cachep->flags & SLAB_STORE_USER)

2634

*dbg_userword(cachep, objp) = NULL;

2634

*dbg_userword(cachep, objp) = NULL;

2635

2636

if (cachep->flags & SLAB_RED_ZONE) {

2636

if (cachep->flags & SLAB_RED_ZONE) {

2637

*dbg_redzone1(cachep, objp) = RED_INACTIVE;

2637

*dbg_redzone1(cachep, objp) = RED_INACTIVE;

2638

*dbg_redzone2(cachep, objp) = RED_INACTIVE;

2638

*dbg_redzone2(cachep, objp) = RED_INACTIVE;

2639

}

2639

}

2640

/*

2640

/*

2641

* Constructors are not allowed to allocate memory from the same

2641

* Constructors are not allowed to allocate memory from the same

2642

* cache which they are a constructor for. Otherwise, deadlock.

2642

* cache which they are a constructor for. Otherwise, deadlock.

2643

* They must also be threaded.

2643

* They must also be threaded.

2644

*/

2644

*/

2645

if (cachep->ctor && !(cachep->flags & SLAB_POISON))

2645

if (cachep->ctor && !(cachep->flags & SLAB_POISON))

2646

cachep->ctor(objp + obj_offset(cachep));

2646

cachep->ctor(objp + obj_offset(cachep));

2647

2648

if (cachep->flags & SLAB_RED_ZONE) {

2648

if (cachep->flags & SLAB_RED_ZONE) {

2649

if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)

2649

if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)

2650

slab_error(cachep, "constructor overwrote the"

2650

slab_error(cachep, "constructor overwrote the"

2651

" end of an object");

2651

" end of an object");

2652

if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)

2652

if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)

2653

slab_error(cachep, "constructor overwrote the"

2653

slab_error(cachep, "constructor overwrote the"

2654

" start of an object");

2654

" start of an object");

2655

}

2655

}

2656

if ((cachep->buffer_size % PAGE_SIZE) == 0 &&

2656

if ((cachep->buffer_size % PAGE_SIZE) == 0 &&

2657

OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)

2657

OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)

2658

kernel_map_pages(virt_to_page(objp),

2658

kernel_map_pages(virt_to_page(objp),

2659

cachep->buffer_size / PAGE_SIZE, 0);

2659

cachep->buffer_size / PAGE_SIZE, 0);

2660

#else

2660

#else

2661

if (cachep->ctor)

2661

if (cachep->ctor)

2662

cachep->ctor(objp);

2662

cachep->ctor(objp);

2663

#endif

2663

#endif

2664

slab_bufctl(slabp)[i] = i + 1;

2664

slab_bufctl(slabp)[i] = i + 1;

2665

}

2665

}

2666

slab_bufctl(slabp)[i - 1] = BUFCTL_END;

2666

slab_bufctl(slabp)[i - 1] = BUFCTL_END;

2667

}

2667

}

2668

2669

static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)

2669

static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)

2670

{

2670

{

2671

if (CONFIG_ZONE_DMA_FLAG) {

2671

if (CONFIG_ZONE_DMA_FLAG) {

2672

if (flags & GFP_DMA)

2672

if (flags & GFP_DMA)

2673

BUG_ON(!(cachep->gfpflags & GFP_DMA));

2673

BUG_ON(!(cachep->gfpflags & GFP_DMA));

2674

else

2674

else

2675

BUG_ON(cachep->gfpflags & GFP_DMA);

2675

BUG_ON(cachep->gfpflags & GFP_DMA);

2676

}

2676

}

2677

}

2677

}

2678

2679

static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,

2679

static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,

2680

int nodeid)

2680

int nodeid)

2681

{

2681

{

2682

void *objp = index_to_obj(cachep, slabp, slabp->free);

2682

void *objp = index_to_obj(cachep, slabp, slabp->free);

2683

kmem_bufctl_t next;

2683

kmem_bufctl_t next;

2684

2685

slabp->inuse++;

2685

slabp->inuse++;

2686

next = slab_bufctl(slabp)[slabp->free];

2686

next = slab_bufctl(slabp)[slabp->free];

2687

#if DEBUG

2687

#if DEBUG

2688

slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;

2688

slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;

2689

WARN_ON(slabp->nodeid != nodeid);

2689

WARN_ON(slabp->nodeid != nodeid);

2690

#endif

2690

#endif

2691

slabp->free = next;

2691

slabp->free = next;

2692

2693

return objp;

2693

return objp;

2694

}

2694

}

2695

2696

static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,

2696

static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,

2697

void *objp, int nodeid)

2697

void *objp, int nodeid)

2698

{

2698

{

2699

unsigned int objnr = obj_to_index(cachep, slabp, objp);

2699

unsigned int objnr = obj_to_index(cachep, slabp, objp);

2700

2701

#if DEBUG

2701

#if DEBUG

2702

/* Verify that the slab belongs to the intended node */

2702

/* Verify that the slab belongs to the intended node */

2703

WARN_ON(slabp->nodeid != nodeid);

2703

WARN_ON(slabp->nodeid != nodeid);

2704

2705

if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {

2705

if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {

2706

printk(KERN_ERR "slab: double free detected in cache "

2706

printk(KERN_ERR "slab: double free detected in cache "

2707

"'%s', objp %p\n", cachep->name, objp);

2707

"'%s', objp %p\n", cachep->name, objp);

2708

BUG();

2708

BUG();

2709

}

2709

}

2710

#endif

2710

#endif

2711

slab_bufctl(slabp)[objnr] = slabp->free;

2711

slab_bufctl(slabp)[objnr] = slabp->free;

2712

slabp->free = objnr;

2712

slabp->free = objnr;

2713

slabp->inuse--;

2713

slabp->inuse--;

2714

}

2714

}

2715

2716

/*

2716

/*

2717

* Map pages beginning at addr to the given cache and slab. This is required

2717

* Map pages beginning at addr to the given cache and slab. This is required

2718

* for the slab allocator to be able to lookup the cache and slab of a

2718

* for the slab allocator to be able to lookup the cache and slab of a

2719

* virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.

2719

* virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.

2720

*/

2720

*/

2721

static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,

2721

static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,

2722

void *addr)

2722

void *addr)

2723

{

2723

{

2724

int nr_pages;

2724

int nr_pages;

2725

struct page *page;

2725

struct page *page;

2726

2727

page = virt_to_page(addr);

2727

page = virt_to_page(addr);

2728

2729

nr_pages = 1;

2729

nr_pages = 1;

2730

if (likely(!PageCompound(page)))

2730

if (likely(!PageCompound(page)))

2731

nr_pages <<= cache->gfporder;

2731

nr_pages <<= cache->gfporder;

2732

2733

do {

2733

do {

2734

page_set_cache(page, cache);

2734

page_set_cache(page, cache);

2735

page_set_slab(page, slab);

2735

page_set_slab(page, slab);

2736

page++;

2736

page++;

2737

} while (--nr_pages);

2737

} while (--nr_pages);

2738

}

2738

}

2739

2740

/*

2740

/*

2741

* Grow (by 1) the number of slabs within a cache. This is called by

2741

* Grow (by 1) the number of slabs within a cache. This is called by

2742

* kmem_cache_alloc() when there are no active objs left in a cache.

2742

* kmem_cache_alloc() when there are no active objs left in a cache.

2743

*/

2743

*/

2744

static int cache_grow(struct kmem_cache *cachep,

2744

static int cache_grow(struct kmem_cache *cachep,

2745

gfp_t flags, int nodeid, void *objp)

2745

gfp_t flags, int nodeid, void *objp)

2746

{

2746

{

2747

struct slab *slabp;

2747

struct slab *slabp;

2748

size_t offset;

2748

size_t offset;

2749

gfp_t local_flags;

2749

gfp_t local_flags;

2750

struct kmem_list3 *l3;

2750

struct kmem_list3 *l3;

2751

2752

/*

2752

/*

2753

* Be lazy and only check for valid flags here, keeping it out of the

2753

* Be lazy and only check for valid flags here, keeping it out of the

2754

* critical path in kmem_cache_alloc().

2754

* critical path in kmem_cache_alloc().

2755

*/

2755

*/

2756

BUG_ON(flags & GFP_SLAB_BUG_MASK);

2756

BUG_ON(flags & GFP_SLAB_BUG_MASK);

2757

local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);

2757

local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);

2758

2759

/* Take the l3 list lock to change the colour_next on this node */

2759

/* Take the l3 list lock to change the colour_next on this node */

2760

check_irq_off();

2760

check_irq_off();

2761

l3 = cachep->nodelists[nodeid];

2761

l3 = cachep->nodelists[nodeid];

2762

spin_lock(&l3->list_lock);

2762

spin_lock(&l3->list_lock);

2763

2764

/* Get colour for the slab, and cal the next value. */

2764

/* Get colour for the slab, and cal the next value. */

2765

offset = l3->colour_next;

2765

offset = l3->colour_next;

2766

l3->colour_next++;

2766

l3->colour_next++;

2767

if (l3->colour_next >= cachep->colour)

2767

if (l3->colour_next >= cachep->colour)

2768

l3->colour_next = 0;

2768

l3->colour_next = 0;

2769

spin_unlock(&l3->list_lock);

2769

spin_unlock(&l3->list_lock);

2770

2771

offset *= cachep->colour_off;

2771

offset *= cachep->colour_off;

2772

2773

if (local_flags & __GFP_WAIT)

2773

if (local_flags & __GFP_WAIT)

2774

local_irq_enable();

2774

local_irq_enable();

2775

2776

/*

2776

/*

2777

* The test for missing atomic flag is performed here, rather than

2777

* The test for missing atomic flag is performed here, rather than

2778

* the more obvious place, simply to reduce the critical path length

2778

* the more obvious place, simply to reduce the critical path length

2779

* in kmem_cache_alloc(). If a caller is seriously mis-behaving they

2779

* in kmem_cache_alloc(). If a caller is seriously mis-behaving they

2780

* will eventually be caught here (where it matters).

2780

* will eventually be caught here (where it matters).

2781

*/

2781

*/

2782

kmem_flagcheck(cachep, flags);

2782

kmem_flagcheck(cachep, flags);

2783

2784

/*

2784

/*

2785

* Get mem for the objs. Attempt to allocate a physical page from

2785

* Get mem for the objs. Attempt to allocate a physical page from

2786

* 'nodeid'.

2786

* 'nodeid'.

2787

*/

2787

*/

2788

if (!objp)

2788

if (!objp)

2789

objp = kmem_getpages(cachep, local_flags, nodeid);

2789

objp = kmem_getpages(cachep, local_flags, nodeid);

2790

if (!objp)

2790

if (!objp)

2791

goto failed;

2791

goto failed;

2792

2793

/* Get slab management. */

2793

/* Get slab management. */

2794

slabp = alloc_slabmgmt(cachep, objp, offset,

2794

slabp = alloc_slabmgmt(cachep, objp, offset,

2795

local_flags & ~GFP_CONSTRAINT_MASK, nodeid);

2795

local_flags & ~GFP_CONSTRAINT_MASK, nodeid);

2796

if (!slabp)

2796

if (!slabp)

2797

goto opps1;

2797

goto opps1;

2798

2799

slab_map_pages(cachep, slabp, objp);

2799

slab_map_pages(cachep, slabp, objp);

2800

2801

cache_init_objs(cachep, slabp);

2801

cache_init_objs(cachep, slabp);

2802

2803

if (local_flags & __GFP_WAIT)

2803

if (local_flags & __GFP_WAIT)

2804

local_irq_disable();

2804

local_irq_disable();

2805

check_irq_off();

2805

check_irq_off();

2806

spin_lock(&l3->list_lock);

2806

spin_lock(&l3->list_lock);

2807

2808

/* Make slab active. */

2808

/* Make slab active. */

2809

list_add_tail(&slabp->list, &(l3->slabs_free));

2809

list_add_tail(&slabp->list, &(l3->slabs_free));

2810

STATS_INC_GROWN(cachep);

2810

STATS_INC_GROWN(cachep);

2811

l3->free_objects += cachep->num;

2811

l3->free_objects += cachep->num;

2812

spin_unlock(&l3->list_lock);

2812

spin_unlock(&l3->list_lock);

2813

return 1;

2813

return 1;

2814

opps1:

2814

opps1:

2815

kmem_freepages(cachep, objp);

2815

kmem_freepages(cachep, objp);

2816

failed:

2816

failed:

2817

if (local_flags & __GFP_WAIT)

2817

if (local_flags & __GFP_WAIT)

2818

local_irq_disable();

2818

local_irq_disable();

2819

return 0;

2819

return 0;

2820

}

2820

}

2821

2822

#if DEBUG

2822

#if DEBUG

2823

2824

/*

2824

/*

2825

* Perform extra freeing checks:

2825

* Perform extra freeing checks:

2826

* - detect bad pointers.

2826

* - detect bad pointers.

2827

* - POISON/RED_ZONE checking

2827

* - POISON/RED_ZONE checking

2828

*/

2828

*/

2829

static void kfree_debugcheck(const void *objp)

2829

static void kfree_debugcheck(const void *objp)

2830

{

2830

{

2831

if (!virt_addr_valid(objp)) {

2831

if (!virt_addr_valid(objp)) {

2832

printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",

2832

printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",

2833

(unsigned long)objp);

2833

(unsigned long)objp);

2834

BUG();

2834

BUG();

2835

}

2835

}

2836

}

2836

}

2837

2838

static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)

2838

static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)

2839

{

2839

{

2840

unsigned long long redzone1, redzone2;

2840

unsigned long long redzone1, redzone2;

2841

2842

redzone1 = *dbg_redzone1(cache, obj);

2842

redzone1 = *dbg_redzone1(cache, obj);

2843

redzone2 = *dbg_redzone2(cache, obj);

2843

redzone2 = *dbg_redzone2(cache, obj);

2844

2845

/*

2845

/*

2846

* Redzone is ok.

2846

* Redzone is ok.

2847

*/

2847

*/

2848

if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)

2848

if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)

2849

return;

2849

return;

2850

2851

if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)

2851

if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)

2852

slab_error(cache, "double free detected");

2852

slab_error(cache, "double free detected");

2853

else

2853

else

2854

slab_error(cache, "memory outside object was overwritten");

2854

slab_error(cache, "memory outside object was overwritten");

2855

2856

printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",

2856

printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",

2857

obj, redzone1, redzone2);

2857

obj, redzone1, redzone2);

2858

}

2858

}

2859

2860

static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,

2860

static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,

2861

void *caller)

2861

void *caller)

2862

{

2862

{

2863

struct page *page;

2863

struct page *page;

2864

unsigned int objnr;

2864

unsigned int objnr;

2865

struct slab *slabp;

2865

struct slab *slabp;

2866

2867

BUG_ON(virt_to_cache(objp) != cachep);

2867

BUG_ON(virt_to_cache(objp) != cachep);

2868

2869

objp -= obj_offset(cachep);

2869

objp -= obj_offset(cachep);

2870

kfree_debugcheck(objp);

2870

kfree_debugcheck(objp);

2871

page = virt_to_head_page(objp);

2871

page = virt_to_head_page(objp);

2872

2873

slabp = page_get_slab(page);

2873

slabp = page_get_slab(page);

2874

2875

if (cachep->flags & SLAB_RED_ZONE) {

2875

if (cachep->flags & SLAB_RED_ZONE) {

2876

verify_redzone_free(cachep, objp);

2876

verify_redzone_free(cachep, objp);

2877

*dbg_redzone1(cachep, objp) = RED_INACTIVE;

2877

*dbg_redzone1(cachep, objp) = RED_INACTIVE;

2878

*dbg_redzone2(cachep, objp) = RED_INACTIVE;

2878

*dbg_redzone2(cachep, objp) = RED_INACTIVE;

2879

}

2879

}

2880

if (cachep->flags & SLAB_STORE_USER)

2880

if (cachep->flags & SLAB_STORE_USER)

2881

*dbg_userword(cachep, objp) = caller;

2881

*dbg_userword(cachep, objp) = caller;

2882

2883

objnr = obj_to_index(cachep, slabp, objp);

2883

objnr = obj_to_index(cachep, slabp, objp);

2884

2885

BUG_ON(objnr >= cachep->num);

2885

BUG_ON(objnr >= cachep->num);

2886

BUG_ON(objp != index_to_obj(cachep, slabp, objnr));

2886

BUG_ON(objp != index_to_obj(cachep, slabp, objnr));

2887

2888

#ifdef CONFIG_DEBUG_SLAB_LEAK

2888

#ifdef CONFIG_DEBUG_SLAB_LEAK

2889

slab_bufctl(slabp)[objnr] = BUFCTL_FREE;

2889

slab_bufctl(slabp)[objnr] = BUFCTL_FREE;

2890

#endif

2890

#endif

2891

if (cachep->flags & SLAB_POISON) {

2891

if (cachep->flags & SLAB_POISON) {

2892

#ifdef CONFIG_DEBUG_PAGEALLOC

2892

#ifdef CONFIG_DEBUG_PAGEALLOC

2893

if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {

2893

if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {

2894

store_stackinfo(cachep, objp, (unsigned long)caller);

2894

store_stackinfo(cachep, objp, (unsigned long)caller);

2895

kernel_map_pages(virt_to_page(objp),

2895

kernel_map_pages(virt_to_page(objp),

2896

cachep->buffer_size / PAGE_SIZE, 0);

2896

cachep->buffer_size / PAGE_SIZE, 0);

2897

} else {

2897

} else {

2898

poison_obj(cachep, objp, POISON_FREE);

2898

poison_obj(cachep, objp, POISON_FREE);

2899

}

2899

}

2900

#else

2900

#else

2901

poison_obj(cachep, objp, POISON_FREE);

2901

poison_obj(cachep, objp, POISON_FREE);

2902

#endif

2902

#endif

2903

}

2903

}

2904

return objp;

2904

return objp;

2905

}

2905

}

2906

2907

static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)

2907

static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)

2908

{

2908

{

2909

kmem_bufctl_t i;

2909

kmem_bufctl_t i;

2910

int entries = 0;

2910

int entries = 0;

2911

2912

/* Check slab's freelist to see if this obj is there. */

2912

/* Check slab's freelist to see if this obj is there. */

2913

for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {

2913

for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {

2914

entries++;

2914

entries++;

2915

if (entries > cachep->num || i >= cachep->num)

2915

if (entries > cachep->num || i >= cachep->num)

2916

goto bad;

2916

goto bad;

2917

}

2917

}

2918

if (entries != cachep->num - slabp->inuse) {

2918

if (entries != cachep->num - slabp->inuse) {

2919

bad:

2919

bad:

2920

printk(KERN_ERR "slab: Internal list corruption detected in "

2920

printk(KERN_ERR "slab: Internal list corruption detected in "

2921

"cache '%s'(%d), slabp %p(%d). Hexdump:\n",

2921

"cache '%s'(%d), slabp %p(%d). Hexdump:\n",

2922

cachep->name, cachep->num, slabp, slabp->inuse);

2922

cachep->name, cachep->num, slabp, slabp->inuse);

2923

for (i = 0;

2923

for (i = 0;

2924

i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);

2924

i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);

2925

i++) {

2925

i++) {

2926

if (i % 16 == 0)

2926

if (i % 16 == 0)

2927

printk("\n%03x:", i);

2927

printk("\n%03x:", i);

2928

printk(" %02x", ((unsigned char *)slabp)[i]);

2928

printk(" %02x", ((unsigned char *)slabp)[i]);

2929

}

2929

}

2930

printk("\n");

2930

printk("\n");

2931

BUG();

2931

BUG();

2932

}

2932

}

2933

}

2933

}

2934

#else

2934

#else

2935

#define kfree_debugcheck(x) do { } while(0)

2935

#define kfree_debugcheck(x) do { } while(0)

2936

#define cache_free_debugcheck(x,objp,z) (objp)

2936

#define cache_free_debugcheck(x,objp,z) (objp)

2937

#define check_slabp(x,y) do { } while(0)

2937

#define check_slabp(x,y) do { } while(0)

2938

#endif

2938

#endif

2939

2940

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)

2940

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)

2941

{

2941

{

2942

int batchcount;

2942

int batchcount;

2943

struct kmem_list3 *l3;

2943

struct kmem_list3 *l3;

2944

struct array_cache *ac;

2944

struct array_cache *ac;

2945

int node;

2945

int node;

2946

2947

retry:

2947

retry:

2948

check_irq_off();

2948

check_irq_off();

2949

node = numa_node_id();

2949

node = numa_node_id();

2950

ac = cpu_cache_get(cachep);

2950

ac = cpu_cache_get(cachep);

2951

batchcount = ac->batchcount;

2951

batchcount = ac->batchcount;

2952

if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {

2952

if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {

2953

/*

2953

/*

2954

* If there was little recent activity on this cache, then

2954

* If there was little recent activity on this cache, then

2955

* perform only a partial refill. Otherwise we could generate

2955

* perform only a partial refill. Otherwise we could generate

2956

* refill bouncing.

2956

* refill bouncing.

2957

*/

2957

*/

2958

batchcount = BATCHREFILL_LIMIT;

2958

batchcount = BATCHREFILL_LIMIT;

2959

}

2959

}

2960

l3 = cachep->nodelists[node];

2960

l3 = cachep->nodelists[node];

2961

2962

BUG_ON(ac->avail > 0 || !l3);

2962

BUG_ON(ac->avail > 0 || !l3);

2963

spin_lock(&l3->list_lock);

2963

spin_lock(&l3->list_lock);

2964

2965

/* See if we can refill from the shared array */

2965

/* See if we can refill from the shared array */

2966

if (l3->shared && transfer_objects(ac, l3->shared, batchcount))

2966

if (l3->shared && transfer_objects(ac, l3->shared, batchcount))

2967

goto alloc_done;

2967

goto alloc_done;

2968

2969

while (batchcount > 0) {

2969

while (batchcount > 0) {

2970

struct list_head *entry;

2970

struct list_head *entry;

2971

struct slab *slabp;

2971

struct slab *slabp;

2972

/* Get slab alloc is to come from. */

2972

/* Get slab alloc is to come from. */

2973

entry = l3->slabs_partial.next;

2973

entry = l3->slabs_partial.next;

2974

if (entry == &l3->slabs_partial) {

2974

if (entry == &l3->slabs_partial) {

2975

l3->free_touched = 1;

2975

l3->free_touched = 1;

2976

entry = l3->slabs_free.next;

2976

entry = l3->slabs_free.next;

2977

if (entry == &l3->slabs_free)

2977

if (entry == &l3->slabs_free)

2978

goto must_grow;

2978

goto must_grow;

2979

}

2979

}

2980

2981

slabp = list_entry(entry, struct slab, list);

2981

slabp = list_entry(entry, struct slab, list);

2982

check_slabp(cachep, slabp);

2982

check_slabp(cachep, slabp);

2983

check_spinlock_acquired(cachep);

2983

check_spinlock_acquired(cachep);

2984

2985

/*

2985

/*

2986

* The slab was either on partial or free list so

2986

* The slab was either on partial or free list so

2987

* there must be at least one object available for

2987

* there must be at least one object available for

2988

* allocation.

2988

* allocation.

2989

*/

2989

*/

2990

BUG_ON(slabp->inuse >= cachep->num);

2990

BUG_ON(slabp->inuse >= cachep->num);

2991

2992

while (slabp->inuse < cachep->num && batchcount--) {

2992

while (slabp->inuse < cachep->num && batchcount--) {

2993

STATS_INC_ALLOCED(cachep);

2993

STATS_INC_ALLOCED(cachep);

2994

STATS_INC_ACTIVE(cachep);

2994

STATS_INC_ACTIVE(cachep);

2995

STATS_SET_HIGH(cachep);

2995

STATS_SET_HIGH(cachep);

2996

2997

ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,

2997

ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,

2998

node);

2998

node);

2999

}

2999

}

3000

check_slabp(cachep, slabp);

3000

check_slabp(cachep, slabp);

3001

3002

/* move slabp to correct slabp list: */

3002

/* move slabp to correct slabp list: */

3003

list_del(&slabp->list);

3003

list_del(&slabp->list);

3004

if (slabp->free == BUFCTL_END)

3004

if (slabp->free == BUFCTL_END)

3005

list_add(&slabp->list, &l3->slabs_full);

3005

list_add(&slabp->list, &l3->slabs_full);

3006

else

3006

else

3007

list_add(&slabp->list, &l3->slabs_partial);

3007

list_add(&slabp->list, &l3->slabs_partial);

3008

}

3008

}

3009

3010

must_grow:

3010

must_grow:

3011

l3->free_objects -= ac->avail;

3011

l3->free_objects -= ac->avail;

3012

alloc_done:

3012

alloc_done:

3013

spin_unlock(&l3->list_lock);

3013

spin_unlock(&l3->list_lock);

3014

3015

if (unlikely(!ac->avail)) {

3015

if (unlikely(!ac->avail)) {

3016

int x;

3016

int x;

3017

x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);

3017

x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);

3018

3019

/* cache_grow can reenable interrupts, then ac could change. */

3019

/* cache_grow can reenable interrupts, then ac could change. */

3020

ac = cpu_cache_get(cachep);

3020

ac = cpu_cache_get(cachep);

3021

if (!x && ac->avail == 0) /* no objects in sight? abort */

3021

if (!x && ac->avail == 0) /* no objects in sight? abort */

3022

return NULL;

3022

return NULL;

3023

3024

if (!ac->avail) /* objects refilled by interrupt? */

3024

if (!ac->avail) /* objects refilled by interrupt? */

3025

goto retry;

3025

goto retry;

3026

}

3026

}

3027

ac->touched = 1;

3027

ac->touched = 1;

3028

return ac->entry[--ac->avail];

3028

return ac->entry[--ac->avail];

3029

}

3029

}

3030

3031

static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,

3031

static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,

3032

gfp_t flags)

3032

gfp_t flags)

3033

{

3033

{

3034

might_sleep_if(flags & __GFP_WAIT);

3034

might_sleep_if(flags & __GFP_WAIT);

3035

#if DEBUG

3035

#if DEBUG

3036

kmem_flagcheck(cachep, flags);

3036

kmem_flagcheck(cachep, flags);

3037

#endif

3037

#endif

3038

}

3038

}

3039

3040

#if DEBUG

3040

#if DEBUG

3041

static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,

3041

static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,

3042

gfp_t flags, void *objp, void *caller)

3042

gfp_t flags, void *objp, void *caller)

3043

{

3043

{

3044

if (!objp)

3044

if (!objp)

3045

return objp;

3045

return objp;

3046

if (cachep->flags & SLAB_POISON) {

3046

if (cachep->flags & SLAB_POISON) {

3047

#ifdef CONFIG_DEBUG_PAGEALLOC

3047

#ifdef CONFIG_DEBUG_PAGEALLOC

3048

if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))

3048

if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))

3049

kernel_map_pages(virt_to_page(objp),

3049

kernel_map_pages(virt_to_page(objp),

3050

cachep->buffer_size / PAGE_SIZE, 1);

3050

cachep->buffer_size / PAGE_SIZE, 1);

3051

else

3051

else

3052

check_poison_obj(cachep, objp);

3052

check_poison_obj(cachep, objp);

3053

#else

3053

#else

3054

check_poison_obj(cachep, objp);

3054

check_poison_obj(cachep, objp);

3055

#endif

3055

#endif

3056

poison_obj(cachep, objp, POISON_INUSE);

3056

poison_obj(cachep, objp, POISON_INUSE);

3057

}

3057

}

3058

if (cachep->flags & SLAB_STORE_USER)

3058

if (cachep->flags & SLAB_STORE_USER)

3059

*dbg_userword(cachep, objp) = caller;

3059

*dbg_userword(cachep, objp) = caller;

3060

3061

if (cachep->flags & SLAB_RED_ZONE) {

3061

if (cachep->flags & SLAB_RED_ZONE) {

3062

if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||

3062

if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||

3063

*dbg_redzone2(cachep, objp) != RED_INACTIVE) {

3063

*dbg_redzone2(cachep, objp) != RED_INACTIVE) {

3064

slab_error(cachep, "double free, or memory outside"

3064

slab_error(cachep, "double free, or memory outside"

3065

" object was overwritten");

3065

" object was overwritten");

3066

printk(KERN_ERR

3066

printk(KERN_ERR

3067

"%p: redzone 1:0x%llx, redzone 2:0x%llx\n",

3067

"%p: redzone 1:0x%llx, redzone 2:0x%llx\n",

3068

objp, *dbg_redzone1(cachep, objp),

3068

objp, *dbg_redzone1(cachep, objp),

3069

*dbg_redzone2(cachep, objp));

3069

*dbg_redzone2(cachep, objp));

3070

}

3070

}

3071

*dbg_redzone1(cachep, objp) = RED_ACTIVE;

3071

*dbg_redzone1(cachep, objp) = RED_ACTIVE;

3072

*dbg_redzone2(cachep, objp) = RED_ACTIVE;

3072

*dbg_redzone2(cachep, objp) = RED_ACTIVE;

3073

}

3073

}

3074

#ifdef CONFIG_DEBUG_SLAB_LEAK

3074

#ifdef CONFIG_DEBUG_SLAB_LEAK

3075

{

3075

{

3076

struct slab *slabp;

3076

struct slab *slabp;

3077

unsigned objnr;

3077

unsigned objnr;

3078

3079

slabp = page_get_slab(virt_to_head_page(objp));

3079

slabp = page_get_slab(virt_to_head_page(objp));

3080

objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;

3080

objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;

3081

slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;

3081

slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;

3082

}

3082

}

3083

#endif

3083

#endif

3084

objp += obj_offset(cachep);

3084

objp += obj_offset(cachep);

3085

if (cachep->ctor && cachep->flags & SLAB_POISON)

3085

if (cachep->ctor && cachep->flags & SLAB_POISON)

3086

cachep->ctor(objp);

3086

cachep->ctor(objp);

3087

#if ARCH_SLAB_MINALIGN

3087

#if ARCH_SLAB_MINALIGN

3088

if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {

3088

if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {

3089

printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",

3089

printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",

3090

objp, ARCH_SLAB_MINALIGN);

3090

objp, ARCH_SLAB_MINALIGN);

3091

}

3091

}

3092

#endif

3092

#endif

3093

return objp;

3093

return objp;

3094

}

3094

}

3095

#else

3095

#else

3096

#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)

3096

#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)

3097

#endif

3097

#endif

3098

3099

static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)

3099

static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)

3100

{

3100

{

3101

if (cachep == &cache_cache)

3101

if (cachep == &cache_cache)

3102

return false;

3102

return false;

3103

3104

return should_failslab(obj_size(cachep), flags);

3104

return should_failslab(obj_size(cachep), flags);

3105

}

3105

}

3106

3107

static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)

3107

static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)

3108

{

3108

{

3109

void *objp;

3109

void *objp;

3110

struct array_cache *ac;

3110

struct array_cache *ac;

3111

3112

check_irq_off();

3112

check_irq_off();

3113

3114

ac = cpu_cache_get(cachep);

3114

ac = cpu_cache_get(cachep);

3115

if (likely(ac->avail)) {

3115

if (likely(ac->avail)) {

3116

STATS_INC_ALLOCHIT(cachep);

3116

STATS_INC_ALLOCHIT(cachep);

3117

ac->touched = 1;

3117

ac->touched = 1;

3118

objp = ac->entry[--ac->avail];

3118

objp = ac->entry[--ac->avail];

3119

} else {

3119

} else {

3120

STATS_INC_ALLOCMISS(cachep);

3120

STATS_INC_ALLOCMISS(cachep);

3121

objp = cache_alloc_refill(cachep, flags);

3121

objp = cache_alloc_refill(cachep, flags);

3122

/*

3122

/*

3123

* the 'ac' may be updated by cache_alloc_refill(),

3123

* the 'ac' may be updated by cache_alloc_refill(),

3124

* and kmemleak_erase() requires its correct value.

3124

* and kmemleak_erase() requires its correct value.

3125

*/

3125

*/

3126

ac = cpu_cache_get(cachep);

3126

ac = cpu_cache_get(cachep);

3127

}

3127

}

3128

/*

3128

/*

3129

* To avoid a false negative, if an object that is in one of the

3129

* To avoid a false negative, if an object that is in one of the

3130

* per-CPU caches is leaked, we need to make sure kmemleak doesn't

3130

* per-CPU caches is leaked, we need to make sure kmemleak doesn't

3131

* treat the array pointers as a reference to the object.

3131

* treat the array pointers as a reference to the object.

3132

*/

3132

*/

3133

if (objp)

3133

if (objp)

3134

kmemleak_erase(&ac->entry[ac->avail]);

3134

kmemleak_erase(&ac->entry[ac->avail]);

3135

return objp;

3135

return objp;

3136

}

3136

}

3137

3138

#ifdef CONFIG_NUMA

3138

#ifdef CONFIG_NUMA

3139

/*

3139

/*

3140

* Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.

3140

* Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.

3141

*

3141

*

3142

* If we are in_interrupt, then process context, including cpusets and

3142

* If we are in_interrupt, then process context, including cpusets and

3143

* mempolicy, may not apply and should not be used for allocation policy.

3143

* mempolicy, may not apply and should not be used for allocation policy.

3144

*/

3144

*/

3145

static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)

3145

static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)

3146

{

3146

{

3147

int nid_alloc, nid_here;

3147

int nid_alloc, nid_here;

3148

3149

if (in_interrupt() || (flags & __GFP_THISNODE))

3149

if (in_interrupt() || (flags & __GFP_THISNODE))

3150

return NULL;

3150

return NULL;

3151

nid_alloc = nid_here = numa_node_id();

3151

nid_alloc = nid_here = numa_node_id();

3152

if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))

3152

if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))

3153

nid_alloc = cpuset_mem_spread_node();

3153

nid_alloc = cpuset_mem_spread_node();

3154

else if (current->mempolicy)

3154

else if (current->mempolicy)

3155

nid_alloc = slab_node(current->mempolicy);

3155

nid_alloc = slab_node(current->mempolicy);

3156

if (nid_alloc != nid_here)

3156

if (nid_alloc != nid_here)

3157

return ____cache_alloc_node(cachep, flags, nid_alloc);

3157

return ____cache_alloc_node(cachep, flags, nid_alloc);

3158

return NULL;

3158

return NULL;

3159

}

3159

}

3160

3161

/*

3161

/*

3162

* Fallback function if there was no memory available and no objects on a

3162

* Fallback function if there was no memory available and no objects on a

3163

* certain node and fall back is permitted. First we scan all the

3163

* certain node and fall back is permitted. First we scan all the

3164

* available nodelists for available objects. If that fails then we

3164

* available nodelists for available objects. If that fails then we

3165

* perform an allocation without specifying a node. This allows the page

3165

* perform an allocation without specifying a node. This allows the page

3166

* allocator to do its reclaim / fallback magic. We then insert the

3166

* allocator to do its reclaim / fallback magic. We then insert the

3167

* slab into the proper nodelist and then allocate from it.

3167

* slab into the proper nodelist and then allocate from it.

3168

*/

3168

*/

3169

static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)

3169

static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)

3170

{

3170

{

3171

struct zonelist *zonelist;

3171

struct zonelist *zonelist;

3172

gfp_t local_flags;

3172

gfp_t local_flags;

3173

struct zoneref *z;

3173

struct zoneref *z;

3174

struct zone *zone;

3174

struct zone *zone;

3175

enum zone_type high_zoneidx = gfp_zone(flags);

3175

enum zone_type high_zoneidx = gfp_zone(flags);

3176

void *obj = NULL;

3176

void *obj = NULL;

3177

int nid;

3177

int nid;

3178

3179

if (flags & __GFP_THISNODE)

3179

if (flags & __GFP_THISNODE)

3180

return NULL;

3180

return NULL;

3181

3182

zonelist = node_zonelist(slab_node(current->mempolicy), flags);

3182

zonelist = node_zonelist(slab_node(current->mempolicy), flags);

3183

local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);

3183

local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);

3184

3185

retry:

3185

retry:

3186

/*

3186

/*

3187

* Look through allowed nodes for objects available

3187

* Look through allowed nodes for objects available

3188

* from existing per node queues.

3188

* from existing per node queues.

3189

*/

3189

*/

3190

for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {

3190

for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {

3191

nid = zone_to_nid(zone);

3191

nid = zone_to_nid(zone);

3192

3193

if (cpuset_zone_allowed_hardwall(zone, flags) &&

3193

if (cpuset_zone_allowed_hardwall(zone, flags) &&

3194

cache->nodelists[nid] &&

3194

cache->nodelists[nid] &&

3195

cache->nodelists[nid]->free_objects) {

3195

cache->nodelists[nid]->free_objects) {

3196

obj = ____cache_alloc_node(cache,

3196

obj = ____cache_alloc_node(cache,

3197

flags | GFP_THISNODE, nid);

3197

flags | GFP_THISNODE, nid);

3198

if (obj)

3198

if (obj)

3199

break;

3199

break;

3200

}

3200

}

3201

}

3201

}

3202

3203

if (!obj) {

3203

if (!obj) {

3204

/*

3204

/*

3205

* This allocation will be performed within the constraints

3205

* This allocation will be performed within the constraints

3206

* of the current cpuset / memory policy requirements.

3206

* of the current cpuset / memory policy requirements.

3207

* We may trigger various forms of reclaim on the allowed

3207

* We may trigger various forms of reclaim on the allowed

3208

* set and go into memory reserves if necessary.

3208

* set and go into memory reserves if necessary.

3209

*/

3209

*/

3210

if (local_flags & __GFP_WAIT)

3210

if (local_flags & __GFP_WAIT)

3211

local_irq_enable();

3211

local_irq_enable();

3212

kmem_flagcheck(cache, flags);

3212

kmem_flagcheck(cache, flags);

3213

obj = kmem_getpages(cache, local_flags, numa_node_id());

3213

obj = kmem_getpages(cache, local_flags, numa_node_id());

3214

if (local_flags & __GFP_WAIT)

3214

if (local_flags & __GFP_WAIT)

3215

local_irq_disable();

3215

local_irq_disable();

3216

if (obj) {

3216

if (obj) {

3217

/*

3217

/*

3218

* Insert into the appropriate per node queues

3218

* Insert into the appropriate per node queues

3219

*/

3219

*/

3220

nid = page_to_nid(virt_to_page(obj));

3220

nid = page_to_nid(virt_to_page(obj));

3221

if (cache_grow(cache, flags, nid, obj)) {

3221

if (cache_grow(cache, flags, nid, obj)) {

3222

obj = ____cache_alloc_node(cache,

3222

obj = ____cache_alloc_node(cache,

3223

flags | GFP_THISNODE, nid);

3223

flags | GFP_THISNODE, nid);

3224

if (!obj)

3224

if (!obj)

3225

/*

3225

/*

3226

* Another processor may allocate the

3226

* Another processor may allocate the

3227

* objects in the slab since we are

3227

* objects in the slab since we are

3228

* not holding any locks.

3228

* not holding any locks.

3229

*/

3229

*/

3230

goto retry;

3230

goto retry;

3231

} else {

3231

} else {

3232

/* cache_grow already freed obj */

3232

/* cache_grow already freed obj */

3233

obj = NULL;

3233

obj = NULL;

3234

}

3234

}

3235

}

3235

}

3236

}

3236

}

3237

return obj;

3237

return obj;

3238

}

3238

}

3239

3240

/*

3240

/*

3241

* A interface to enable slab creation on nodeid

3241

* A interface to enable slab creation on nodeid

3242

*/

3242

*/

3243

static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,

3243

static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,

3244

int nodeid)

3244

int nodeid)

3245

{

3245

{

3246

struct list_head *entry;

3246

struct list_head *entry;

3247

struct slab *slabp;

3247

struct slab *slabp;

3248

struct kmem_list3 *l3;

3248

struct kmem_list3 *l3;

3249

void *obj;

3249

void *obj;

3250

int x;

3250

int x;

3251

3252

l3 = cachep->nodelists[nodeid];

3252

l3 = cachep->nodelists[nodeid];

3253

BUG_ON(!l3);

3253

BUG_ON(!l3);

3254

3255

retry:

3255

retry:

3256

check_irq_off();

3256

check_irq_off();

3257

spin_lock(&l3->list_lock);

3257

spin_lock(&l3->list_lock);

3258

entry = l3->slabs_partial.next;

3258

entry = l3->slabs_partial.next;

3259

if (entry == &l3->slabs_partial) {

3259

if (entry == &l3->slabs_partial) {

3260

l3->free_touched = 1;

3260

l3->free_touched = 1;

3261

entry = l3->slabs_free.next;

3261

entry = l3->slabs_free.next;

3262

if (entry == &l3->slabs_free)

3262

if (entry == &l3->slabs_free)

3263

goto must_grow;

3263

goto must_grow;

3264

}

3264

}

3265

3266

slabp = list_entry(entry, struct slab, list);

3266

slabp = list_entry(entry, struct slab, list);

3267

check_spinlock_acquired_node(cachep, nodeid);

3267

check_spinlock_acquired_node(cachep, nodeid);

3268

check_slabp(cachep, slabp);

3268

check_slabp(cachep, slabp);

3269

3270

STATS_INC_NODEALLOCS(cachep);

3270

STATS_INC_NODEALLOCS(cachep);

3271

STATS_INC_ACTIVE(cachep);

3271

STATS_INC_ACTIVE(cachep);

3272

STATS_SET_HIGH(cachep);

3272

STATS_SET_HIGH(cachep);

3273

3274

BUG_ON(slabp->inuse == cachep->num);

3274

BUG_ON(slabp->inuse == cachep->num);

3275

3276

obj = slab_get_obj(cachep, slabp, nodeid);

3276

obj = slab_get_obj(cachep, slabp, nodeid);

3277

check_slabp(cachep, slabp);

3277

check_slabp(cachep, slabp);

3278

l3->free_objects--;

3278

l3->free_objects--;

3279

/* move slabp to correct slabp list: */

3279

/* move slabp to correct slabp list: */

3280

list_del(&slabp->list);

3280

list_del(&slabp->list);

3281

3282

if (slabp->free == BUFCTL_END)

3282

if (slabp->free == BUFCTL_END)

3283

list_add(&slabp->list, &l3->slabs_full);

3283

list_add(&slabp->list, &l3->slabs_full);

3284

else

3284

else

3285

list_add(&slabp->list, &l3->slabs_partial);

3285

list_add(&slabp->list, &l3->slabs_partial);

3286

3287

spin_unlock(&l3->list_lock);

3287

spin_unlock(&l3->list_lock);

3288

goto done;

3288

goto done;

3289

3290

must_grow:

3290

must_grow:

3291

spin_unlock(&l3->list_lock);

3291

spin_unlock(&l3->list_lock);

3292

x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);

3292

x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);

3293

if (x)

3293

if (x)

3294

goto retry;

3294

goto retry;

3295

3296

return fallback_alloc(cachep, flags);

3296

return fallback_alloc(cachep, flags);

3297

3298

done:

3298

done:

3299

return obj;

3299

return obj;

3300

}

3300

}

3301

3302

/**

3302

/**

3303

* kmem_cache_alloc_node - Allocate an object on the specified node

3303

* kmem_cache_alloc_node - Allocate an object on the specified node

3304

* @cachep: The cache to allocate from.

3304

* @cachep: The cache to allocate from.

3305

* @flags: See kmalloc().

3305

* @flags: See kmalloc().

3306

* @nodeid: node number of the target node.

3306

* @nodeid: node number of the target node.

3307

* @caller: return address of caller, used for debug information

3307

* @caller: return address of caller, used for debug information

3308

*

3308

*

3309

* Identical to kmem_cache_alloc but it will allocate memory on the given

3309

* Identical to kmem_cache_alloc but it will allocate memory on the given

3310

* node, which can improve the performance for cpu bound structures.

3310

* node, which can improve the performance for cpu bound structures.

3311

*

3311

*

3312

* Fallback to other node is possible if __GFP_THISNODE is not set.

3312

* Fallback to other node is possible if __GFP_THISNODE is not set.

3313

*/

3313

*/

3314

static __always_inline void *

3314

static __always_inline void *

3315

__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,

3315

__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,

3316

void *caller)

3316

void *caller)

3317

{

3317

{

3318

unsigned long save_flags;

3318

unsigned long save_flags;

3319

void *ptr;

3319

void *ptr;

3320

3321

flags &= gfp_allowed_mask;

3321

flags &= gfp_allowed_mask;

3322

3323

lockdep_trace_alloc(flags);

3323

lockdep_trace_alloc(flags);

3324

3325

if (slab_should_failslab(cachep, flags))

3325

if (slab_should_failslab(cachep, flags))

3326

return NULL;

3326

return NULL;

3327

3328

cache_alloc_debugcheck_before(cachep, flags);

3328

cache_alloc_debugcheck_before(cachep, flags);

3329

local_irq_save(save_flags);

3329

local_irq_save(save_flags);

3330

3331

if (nodeid == -1)

3331

if (nodeid == -1)

3332

nodeid = numa_node_id();

3332

nodeid = numa_node_id();

3333

3334

if (unlikely(!cachep->nodelists[nodeid])) {

3334

if (unlikely(!cachep->nodelists[nodeid])) {

3335

/* Node not bootstrapped yet */

3335

/* Node not bootstrapped yet */

3336

ptr = fallback_alloc(cachep, flags);

3336

ptr = fallback_alloc(cachep, flags);

3337

goto out;

3337

goto out;

3338

}

3338

}

3339

3340

if (nodeid == numa_node_id()) {

3340

if (nodeid == numa_node_id()) {

3341

/*

3341

/*

3342

* Use the locally cached objects if possible.

3342

* Use the locally cached objects if possible.

3343

* However ____cache_alloc does not allow fallback

3343

* However ____cache_alloc does not allow fallback

3344

* to other nodes. It may fail while we still have

3344

* to other nodes. It may fail while we still have

3345

* objects on other nodes available.

3345

* objects on other nodes available.

3346

*/

3346

*/

3347

ptr = ____cache_alloc(cachep, flags);

3347

ptr = ____cache_alloc(cachep, flags);

3348

if (ptr)

3348

if (ptr)

3349

goto out;

3349

goto out;

3350

}

3350

}

3351

/* ___cache_alloc_node can fall back to other nodes */

3351

/* ___cache_alloc_node can fall back to other nodes */

3352

ptr = ____cache_alloc_node(cachep, flags, nodeid);

3352

ptr = ____cache_alloc_node(cachep, flags, nodeid);

3353

out:

3353

out:

3354

local_irq_restore(save_flags);

3354

local_irq_restore(save_flags);

3355

ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);

3355

ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);

3356

kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,

3356

kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,

3357

flags);

3357

flags);

3358

3359

if (likely(ptr))

3359

if (likely(ptr))

3360

kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));

3360

kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));

3361

3362

if (unlikely((flags & __GFP_ZERO) && ptr))

3362

if (unlikely((flags & __GFP_ZERO) && ptr))

3363

memset(ptr, 0, obj_size(cachep));

3363

memset(ptr, 0, obj_size(cachep));

3364

3365

return ptr;

3365

return ptr;

3366

}

3366

}

3367

3368

static __always_inline void *

3368

static __always_inline void *

3369

__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)

3369

__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)

3370

{

3370

{

3371

void *objp;

3371

void *objp;

3372

3373

if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {

3373

if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {

3374

objp = alternate_node_alloc(cache, flags);

3374

objp = alternate_node_alloc(cache, flags);

3375

if (objp)

3375

if (objp)

3376

goto out;

3376

goto out;

3377

}

3377

}

3378

objp = ____cache_alloc(cache, flags);

3378

objp = ____cache_alloc(cache, flags);

3379

3380

/*

3380

/*

3381

* We may just have run out of memory on the local node.

3381

* We may just have run out of memory on the local node.

3382

* ____cache_alloc_node() knows how to locate memory on other nodes

3382

* ____cache_alloc_node() knows how to locate memory on other nodes

3383

*/

3383

*/

3384

if (!objp)

3384

if (!objp)

3385

objp = ____cache_alloc_node(cache, flags, numa_node_id());

3385

objp = ____cache_alloc_node(cache, flags, numa_node_id());

3386

3387

out:

3387

out:

3388

return objp;

3388

return objp;

3389

}

3389

}

3390

#else

3390

#else

3391

3392

static __always_inline void *

3392

static __always_inline void *

3393

__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)

3393

__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)

3394

{

3394

{

3395

return ____cache_alloc(cachep, flags);

3395

return ____cache_alloc(cachep, flags);

3396

}

3396

}

3397

3398

#endif /* CONFIG_NUMA */

3398

#endif /* CONFIG_NUMA */

3399

3400

static __always_inline void *

3400

static __always_inline void *

3401

__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)

3401

__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)

3402

{

3402

{

3403

unsigned long save_flags;

3403

unsigned long save_flags;

3404

void *objp;

3404

void *objp;

3405

3406

flags &= gfp_allowed_mask;

3406

flags &= gfp_allowed_mask;

3407

3408

lockdep_trace_alloc(flags);

3408

lockdep_trace_alloc(flags);

3409

3410

if (slab_should_failslab(cachep, flags))

3410

if (slab_should_failslab(cachep, flags))

3411

return NULL;

3411

return NULL;

3412

3413

cache_alloc_debugcheck_before(cachep, flags);

3413

cache_alloc_debugcheck_before(cachep, flags);

3414

local_irq_save(save_flags);

3414

local_irq_save(save_flags);

3415

objp = __do_cache_alloc(cachep, flags);

3415

objp = __do_cache_alloc(cachep, flags);

3416

local_irq_restore(save_flags);

3416

local_irq_restore(save_flags);

3417

objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);

3417

objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);

3418

kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,

3418

kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,

3419

flags);

3419

flags);

3420

prefetchw(objp);

3420

prefetchw(objp);

3421

3422

if (likely(objp))

3422

if (likely(objp))

3423

kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));

3423

kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));

3424

3425

if (unlikely((flags & __GFP_ZERO) && objp))

3425

if (unlikely((flags & __GFP_ZERO) && objp))

3426

memset(objp, 0, obj_size(cachep));

3426

memset(objp, 0, obj_size(cachep));

3427

3428

return objp;

3428

return objp;

3429

}

3429

}

3430

3431

/*

3431

/*

3432

* Caller needs to acquire correct kmem_list's list_lock

3432

* Caller needs to acquire correct kmem_list's list_lock

3433

*/

3433

*/

3434

static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,

3434

static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,

3435

int node)

3435

int node)

3436

{

3436

{

3437

int i;

3437

int i;

3438

struct kmem_list3 *l3;

3438

struct kmem_list3 *l3;

3439

3440

for (i = 0; i < nr_objects; i++) {

3440

for (i = 0; i < nr_objects; i++) {

3441

void *objp = objpp[i];

3441

void *objp = objpp[i];

3442

struct slab *slabp;

3442

struct slab *slabp;

3443

3444

slabp = virt_to_slab(objp);

3444

slabp = virt_to_slab(objp);

3445

l3 = cachep->nodelists[node];

3445

l3 = cachep->nodelists[node];

3446

list_del(&slabp->list);

3446

list_del(&slabp->list);

3447

check_spinlock_acquired_node(cachep, node);

3447

check_spinlock_acquired_node(cachep, node);

3448

check_slabp(cachep, slabp);

3448

check_slabp(cachep, slabp);

3449

slab_put_obj(cachep, slabp, objp, node);

3449

slab_put_obj(cachep, slabp, objp, node);

3450

STATS_DEC_ACTIVE(cachep);

3450

STATS_DEC_ACTIVE(cachep);

3451

l3->free_objects++;

3451

l3->free_objects++;

3452

check_slabp(cachep, slabp);

3452

check_slabp(cachep, slabp);

3453

3454

/* fixup slab chains */

3454

/* fixup slab chains */

3455

if (slabp->inuse == 0) {

3455

if (slabp->inuse == 0) {

3456

if (l3->free_objects > l3->free_limit) {

3456

if (l3->free_objects > l3->free_limit) {

3457

l3->free_objects -= cachep->num;

3457

l3->free_objects -= cachep->num;

3458

/* No need to drop any previously held

3458

/* No need to drop any previously held

3459

* lock here, even if we have a off-slab slab

3459

* lock here, even if we have a off-slab slab

3460

* descriptor it is guaranteed to come from

3460

* descriptor it is guaranteed to come from

3461

* a different cache, refer to comments before

3461

* a different cache, refer to comments before

3462

* alloc_slabmgmt.

3462

* alloc_slabmgmt.

3463

*/

3463

*/

3464

slab_destroy(cachep, slabp);

3464

slab_destroy(cachep, slabp);

3465

} else {

3465

} else {

3466

list_add(&slabp->list, &l3->slabs_free);

3466

list_add(&slabp->list, &l3->slabs_free);

3467

}

3467

}

3468

} else {

3468

} else {

3469

/* Unconditionally move a slab to the end of the

3469

/* Unconditionally move a slab to the end of the

3470

* partial list on free - maximum time for the

3470

* partial list on free - maximum time for the

3471

* other objects to be freed, too.

3471

* other objects to be freed, too.

3472

*/

3472

*/

3473

list_add_tail(&slabp->list, &l3->slabs_partial);

3473

list_add_tail(&slabp->list, &l3->slabs_partial);

3474

}

3474

}

3475

}

3475

}

3476

}

3476

}

3477

3478

static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)

3478

static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)

3479

{

3479

{

3480

int batchcount;

3480

int batchcount;

3481

struct kmem_list3 *l3;

3481

struct kmem_list3 *l3;

3482

int node = numa_node_id();

3482

int node = numa_node_id();

3483

3484

batchcount = ac->batchcount;

3484

batchcount = ac->batchcount;

3485

#if DEBUG

3485

#if DEBUG

3486

BUG_ON(!batchcount || batchcount > ac->avail);

3486

BUG_ON(!batchcount || batchcount > ac->avail);

3487

#endif

3487

#endif

3488

check_irq_off();

3488

check_irq_off();

3489

l3 = cachep->nodelists[node];

3489

l3 = cachep->nodelists[node];

3490

spin_lock(&l3->list_lock);

3490

spin_lock(&l3->list_lock);

3491

if (l3->shared) {

3491

if (l3->shared) {

3492

struct array_cache *shared_array = l3->shared;

3492

struct array_cache *shared_array = l3->shared;

3493

int max = shared_array->limit - shared_array->avail;

3493

int max = shared_array->limit - shared_array->avail;

3494

if (max) {

3494

if (max) {

3495

if (batchcount > max)

3495

if (batchcount > max)

3496

batchcount = max;

3496

batchcount = max;

3497

memcpy(&(shared_array->entry[shared_array->avail]),

3497

memcpy(&(shared_array->entry[shared_array->avail]),

3498

ac->entry, sizeof(void *) * batchcount);

3498

ac->entry, sizeof(void *) * batchcount);

3499

shared_array->avail += batchcount;

3499

shared_array->avail += batchcount;

3500

goto free_done;

3500

goto free_done;

3501

}

3501

}

3502

}

3502

}

3503

3504

free_block(cachep, ac->entry, batchcount, node);

3504

free_block(cachep, ac->entry, batchcount, node);

3505

free_done:

3505

free_done:

3506

#if STATS

3506

#if STATS

3507

{

3507

{

3508

int i = 0;

3508

int i = 0;

3509

struct list_head *p;

3509

struct list_head *p;

3510

3511

p = l3->slabs_free.next;

3511

p = l3->slabs_free.next;

3512

while (p != &(l3->slabs_free)) {

3512

while (p != &(l3->slabs_free)) {

3513

struct slab *slabp;

3513

struct slab *slabp;

3514

3515

slabp = list_entry(p, struct slab, list);

3515

slabp = list_entry(p, struct slab, list);

3516

BUG_ON(slabp->inuse);

3516

BUG_ON(slabp->inuse);

3517

3518

i++;

3518

i++;

3519

p = p->next;

3519

p = p->next;

3520

}

3520

}

3521

STATS_SET_FREEABLE(cachep, i);

3521

STATS_SET_FREEABLE(cachep, i);

3522

}

3522

}

3523

#endif

3523

#endif

3524

spin_unlock(&l3->list_lock);

3524

spin_unlock(&l3->list_lock);

3525

ac->avail -= batchcount;

3525

ac->avail -= batchcount;

3526

memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);

3526

memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);

3527

}

3527

}

3528

3529

/*

3529

/*

3530

* Release an obj back to its cache. If the obj has a constructed state, it must

3530

* Release an obj back to its cache. If the obj has a constructed state, it must

3531

* be in this state _before_ it is released. Called with disabled ints.

3531

* be in this state _before_ it is released. Called with disabled ints.

3532

*/

3532

*/

3533

static inline void __cache_free(struct kmem_cache *cachep, void *objp)

3533

static inline void __cache_free(struct kmem_cache *cachep, void *objp)

3534

{

3534

{

3535

struct array_cache *ac = cpu_cache_get(cachep);

3535

struct array_cache *ac = cpu_cache_get(cachep);

3536

3537

check_irq_off();

3537

check_irq_off();

3538

kmemleak_free_recursive(objp, cachep->flags);

3538

kmemleak_free_recursive(objp, cachep->flags);

3539

objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));

3539

objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));

3540

3541

kmemcheck_slab_free(cachep, objp, obj_size(cachep));

3541

kmemcheck_slab_free(cachep, objp, obj_size(cachep));

3542

3543

/*

3543

/*

3544

* Skip calling cache_free_alien() when the platform is not numa.

3544

* Skip calling cache_free_alien() when the platform is not numa.

3545

* This will avoid cache misses that happen while accessing slabp (which

3545

* This will avoid cache misses that happen while accessing slabp (which

3546

* is per page memory reference) to get nodeid. Instead use a global

3546

* is per page memory reference) to get nodeid. Instead use a global

3547

* variable to skip the call, which is mostly likely to be present in

3547

* variable to skip the call, which is mostly likely to be present in

3548

* the cache.

3548

* the cache.

3549

*/

3549

*/

3550

if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))

3550

if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))

3551

return;

3551

return;

3552

3553

if (likely(ac->avail < ac->limit)) {

3553

if (likely(ac->avail < ac->limit)) {

3554

STATS_INC_FREEHIT(cachep);

3554

STATS_INC_FREEHIT(cachep);

3555

ac->entry[ac->avail++] = objp;

3555

ac->entry[ac->avail++] = objp;

3556

return;

3556

return;

3557

} else {

3557

} else {

3558

STATS_INC_FREEMISS(cachep);

3558

STATS_INC_FREEMISS(cachep);

3559

cache_flusharray(cachep, ac);

3559

cache_flusharray(cachep, ac);

3560

ac->entry[ac->avail++] = objp;

3560

ac->entry[ac->avail++] = objp;

3561

}

3561

}

3562

}

3562

}

3563

3564

/**

3564

/**

3565

* kmem_cache_alloc - Allocate an object

3565

* kmem_cache_alloc - Allocate an object

3566

* @cachep: The cache to allocate from.

3566

* @cachep: The cache to allocate from.

3567

* @flags: See kmalloc().

3567

* @flags: See kmalloc().

3568

*

3568

*

3569

* Allocate an object from this cache. The flags are only relevant

3569

* Allocate an object from this cache. The flags are only relevant

3570

* if the cache has no available objects.

3570

* if the cache has no available objects.

3571

*/

3571

*/

3572

void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)

3572

void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)

3573

{

3573

{

3574

void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));

3574

void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));

3575

3576

trace_kmem_cache_alloc(_RET_IP_, ret,

3576

trace_kmem_cache_alloc(_RET_IP_, ret,

3577

obj_size(cachep), cachep->buffer_size, flags);

3577

obj_size(cachep), cachep->buffer_size, flags);

3578

3579

return ret;

3579

return ret;

3580

}

3580

}

3581

EXPORT_SYMBOL(kmem_cache_alloc);

3581

EXPORT_SYMBOL(kmem_cache_alloc);

3582

3583

#ifdef CONFIG_TRACING

3583

#ifdef CONFIG_TRACING

3584

void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)

3584

void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)

3585

{

3585

{

3586

return __cache_alloc(cachep, flags, __builtin_return_address(0));

3586

return __cache_alloc(cachep, flags, __builtin_return_address(0));

3587

}

3587

}

3588

EXPORT_SYMBOL(kmem_cache_alloc_notrace);

3588

EXPORT_SYMBOL(kmem_cache_alloc_notrace);

3589

#endif

3589

#endif

3590

3591

/**

3591

/**

3592

* kmem_ptr_validate - check if an untrusted pointer might be a slab entry.

3592

* kmem_ptr_validate - check if an untrusted pointer might be a slab entry.

3593

* @cachep: the cache we're checking against

3593

* @cachep: the cache we're checking against

3594

* @ptr: pointer to validate

3594

* @ptr: pointer to validate

3595

*

3595

*

3596

* This verifies that the untrusted pointer looks sane;

3596

* This verifies that the untrusted pointer looks sane;

3597

* it is _not_ a guarantee that the pointer is actually

3597

* it is _not_ a guarantee that the pointer is actually

3598

* part of the slab cache in question, but it at least

3598

* part of the slab cache in question, but it at least

3599

* validates that the pointer can be dereferenced and

3599

* validates that the pointer can be dereferenced and

3600

* looks half-way sane.

3600

* looks half-way sane.

3601

*

3601

*

3602

* Currently only used for dentry validation.

3602

* Currently only used for dentry validation.

3603

*/

3603

*/

3604

int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)

3604

int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)

3605

{

3605

{

3606

unsigned long addr = (unsigned long)ptr;

3606

unsigned long addr = (unsigned long)ptr;

3607

unsigned long min_addr = PAGE_OFFSET;

3607

unsigned long min_addr = PAGE_OFFSET;

3608

unsigned long align_mask = BYTES_PER_WORD - 1;

3608

unsigned long align_mask = BYTES_PER_WORD - 1;

3609

unsigned long size = cachep->buffer_size;

3609

unsigned long size = cachep->buffer_size;

3610

struct page *page;

3610

struct page *page;

3611

3612

if (unlikely(addr < min_addr))

3612

if (unlikely(addr < min_addr))

3613

goto out;

3613

goto out;

3614

if (unlikely(addr > (unsigned long)high_memory - size))

3614

if (unlikely(addr > (unsigned long)high_memory - size))

3615

goto out;

3615

goto out;

3616

if (unlikely(addr & align_mask))

3616

if (unlikely(addr & align_mask))

3617

goto out;

3617

goto out;

3618

if (unlikely(!kern_addr_valid(addr)))

3618

if (unlikely(!kern_addr_valid(addr)))

3619

goto out;

3619

goto out;

3620

if (unlikely(!kern_addr_valid(addr + size - 1)))

3620

if (unlikely(!kern_addr_valid(addr + size - 1)))

3621

goto out;

3621

goto out;

3622

page = virt_to_page(ptr);

3622

page = virt_to_page(ptr);

3623

if (unlikely(!PageSlab(page)))

3623

if (unlikely(!PageSlab(page)))

3624

goto out;

3624

goto out;

3625

if (unlikely(page_get_cache(page) != cachep))

3625

if (unlikely(page_get_cache(page) != cachep))

3626

goto out;

3626

goto out;

3627

return 1;

3627

return 1;

3628

out:

3628

out:

3629

return 0;

3629

return 0;

3630

}

3630

}

3631

3632

#ifdef CONFIG_NUMA

3632

#ifdef CONFIG_NUMA

3633

void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)

3633

void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)

3634

{

3634

{

3635

void *ret = __cache_alloc_node(cachep, flags, nodeid,

3635

void *ret = __cache_alloc_node(cachep, flags, nodeid,

3636

__builtin_return_address(0));

3636

__builtin_return_address(0));

3637

3638

trace_kmem_cache_alloc_node(_RET_IP_, ret,

3638

trace_kmem_cache_alloc_node(_RET_IP_, ret,

3639

obj_size(cachep), cachep->buffer_size,

3639

obj_size(cachep), cachep->buffer_size,

3640

flags, nodeid);

3640

flags, nodeid);

3641

3642

return ret;

3642

return ret;

3643

}

3643

}

3644

EXPORT_SYMBOL(kmem_cache_alloc_node);

3644

EXPORT_SYMBOL(kmem_cache_alloc_node);

3645

3646

#ifdef CONFIG_TRACING

3646

#ifdef CONFIG_TRACING

3647

void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,

3647

void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,

3648

gfp_t flags,

3648

gfp_t flags,

3649

int nodeid)

3649

int nodeid)

3650

{

3650

{

3651

return __cache_alloc_node(cachep, flags, nodeid,

3651

return __cache_alloc_node(cachep, flags, nodeid,

3652

__builtin_return_address(0));

3652

__builtin_return_address(0));

3653

}

3653

}

3654

EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);

3654

EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);

3655

#endif

3655

#endif

3656

3657

static __always_inline void *

3657

static __always_inline void *

3658

__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)

3658

__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)

3659

{

3659

{

3660

struct kmem_cache *cachep;

3660

struct kmem_cache *cachep;

3661

void *ret;

3661

void *ret;

3662

3663

cachep = kmem_find_general_cachep(size, flags);

3663

cachep = kmem_find_general_cachep(size, flags);

3664

if (unlikely(ZERO_OR_NULL_PTR(cachep)))

3664

if (unlikely(ZERO_OR_NULL_PTR(cachep)))

3665

return cachep;

3665

return cachep;

3666

ret = kmem_cache_alloc_node_notrace(cachep, flags, node);

3666

ret = kmem_cache_alloc_node_notrace(cachep, flags, node);

3667

3668

trace_kmalloc_node((unsigned long) caller, ret,

3668

trace_kmalloc_node((unsigned long) caller, ret,

3669

size, cachep->buffer_size, flags, node);

3669

size, cachep->buffer_size, flags, node);

3670

3671

return ret;

3671

return ret;

3672

}

3672

}

3673

3674

#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)

3674

#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)

3675

void *__kmalloc_node(size_t size, gfp_t flags, int node)

3675

void *__kmalloc_node(size_t size, gfp_t flags, int node)

3676

{

3676

{

3677

return __do_kmalloc_node(size, flags, node,

3677

return __do_kmalloc_node(size, flags, node,

3678

__builtin_return_address(0));

3678

__builtin_return_address(0));

3679

}

3679

}

3680

EXPORT_SYMBOL(__kmalloc_node);

3680

EXPORT_SYMBOL(__kmalloc_node);

3681

3682

void *__kmalloc_node_track_caller(size_t size, gfp_t flags,

3682

void *__kmalloc_node_track_caller(size_t size, gfp_t flags,

3683

int node, unsigned long caller)

3683

int node, unsigned long caller)

3684

{

3684

{

3685

return __do_kmalloc_node(size, flags, node, (void *)caller);

3685

return __do_kmalloc_node(size, flags, node, (void *)caller);

3686

}

3686

}

3687

EXPORT_SYMBOL(__kmalloc_node_track_caller);

3687

EXPORT_SYMBOL(__kmalloc_node_track_caller);

3688

#else

3688

#else

3689

void *__kmalloc_node(size_t size, gfp_t flags, int node)

3689

void *__kmalloc_node(size_t size, gfp_t flags, int node)

3690

{

3690

{

3691

return __do_kmalloc_node(size, flags, node, NULL);

3691

return __do_kmalloc_node(size, flags, node, NULL);

3692

}

3692

}

3693

EXPORT_SYMBOL(__kmalloc_node);

3693

EXPORT_SYMBOL(__kmalloc_node);

3694

#endif /* CONFIG_DEBUG_SLAB || CONFIG_TRACING */

3694

#endif /* CONFIG_DEBUG_SLAB || CONFIG_TRACING */

3695

#endif /* CONFIG_NUMA */

3695

#endif /* CONFIG_NUMA */

3696

3697

/**

3697

/**

3698

* __do_kmalloc - allocate memory

3698

* __do_kmalloc - allocate memory

3699

* @size: how many bytes of memory are required.

3699

* @size: how many bytes of memory are required.

3700

* @flags: the type of memory to allocate (see kmalloc).

3700

* @flags: the type of memory to allocate (see kmalloc).

3701

* @caller: function caller for debug tracking of the caller

3701

* @caller: function caller for debug tracking of the caller

3702

*/

3702

*/

3703

static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,

3703

static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,

3704

void *caller)

3704

void *caller)

3705

{

3705

{

3706

struct kmem_cache *cachep;

3706

struct kmem_cache *cachep;

3707

void *ret;

3707

void *ret;

3708

3709

/* If you want to save a few bytes .text space: replace

3709

/* If you want to save a few bytes .text space: replace

3710

* __ with kmem_.

3710

* __ with kmem_.

3711

* Then kmalloc uses the uninlined functions instead of the inline

3711

* Then kmalloc uses the uninlined functions instead of the inline

3712

* functions.

3712

* functions.

3713

*/

3713

*/

3714

cachep = __find_general_cachep(size, flags);

3714

cachep = __find_general_cachep(size, flags);

3715

if (unlikely(ZERO_OR_NULL_PTR(cachep)))

3715

if (unlikely(ZERO_OR_NULL_PTR(cachep)))

3716

return cachep;

3716

return cachep;

3717

ret = __cache_alloc(cachep, flags, caller);

3717

ret = __cache_alloc(cachep, flags, caller);

3718

3719

trace_kmalloc((unsigned long) caller, ret,

3719

trace_kmalloc((unsigned long) caller, ret,

3720

size, cachep->buffer_size, flags);

3720

size, cachep->buffer_size, flags);

3721

3722

return ret;

3722

return ret;

3723

}

3723

}

3724

3725

3726

#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)

3726

#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)

3727

void *__kmalloc(size_t size, gfp_t flags)

3727

void *__kmalloc(size_t size, gfp_t flags)

3728

{

3728

{

3729

return __do_kmalloc(size, flags, __builtin_return_address(0));

3729

return __do_kmalloc(size, flags, __builtin_return_address(0));

3730

}

3730

}

3731

EXPORT_SYMBOL(__kmalloc);

3731

EXPORT_SYMBOL(__kmalloc);

3732

3733

void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)

3733

void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)

3734

{

3734

{

3735

return __do_kmalloc(size, flags, (void *)caller);

3735

return __do_kmalloc(size, flags, (void *)caller);

3736

}

3736

}

3737

EXPORT_SYMBOL(__kmalloc_track_caller);

3737

EXPORT_SYMBOL(__kmalloc_track_caller);

3738

3739

#else

3739

#else

3740

void *__kmalloc(size_t size, gfp_t flags)

3740

void *__kmalloc(size_t size, gfp_t flags)

3741

{

3741

{

3742

return __do_kmalloc(size, flags, NULL);

3742

return __do_kmalloc(size, flags, NULL);

3743

}

3743

}

3744

EXPORT_SYMBOL(__kmalloc);

3744

EXPORT_SYMBOL(__kmalloc);

3745

#endif

3745

#endif

3746

3747

/**

3747

/**

3748

* kmem_cache_free - Deallocate an object

3748

* kmem_cache_free - Deallocate an object

3749

* @cachep: The cache the allocation was from.

3749

* @cachep: The cache the allocation was from.

3750

* @objp: The previously allocated object.

3750

* @objp: The previously allocated object.

3751

*

3751

*

3752

* Free an object which was previously allocated from this

3752

* Free an object which was previously allocated from this

3753

* cache.

3753

* cache.

3754

*/

3754

*/

3755

void kmem_cache_free(struct kmem_cache *cachep, void *objp)

3755

void kmem_cache_free(struct kmem_cache *cachep, void *objp)

3756

{

3756

{

3757

unsigned long flags;

3757

unsigned long flags;

3758

3759

local_irq_save(flags);

3759

local_irq_save(flags);

3760

debug_check_no_locks_freed(objp, obj_size(cachep));

3760

debug_check_no_locks_freed(objp, obj_size(cachep));

3761

if (!(cachep->flags & SLAB_DEBUG_OBJECTS))

3761

if (!(cachep->flags & SLAB_DEBUG_OBJECTS))

3762

debug_check_no_obj_freed(objp, obj_size(cachep));

3762

debug_check_no_obj_freed(objp, obj_size(cachep));

3763

__cache_free(cachep, objp);

3763

__cache_free(cachep, objp);

3764

local_irq_restore(flags);

3764

local_irq_restore(flags);

3765

3766

trace_kmem_cache_free(_RET_IP_, objp);

3766

trace_kmem_cache_free(_RET_IP_, objp);

3767

}

3767

}

3768

EXPORT_SYMBOL(kmem_cache_free);

3768

EXPORT_SYMBOL(kmem_cache_free);

3769

3770

/**

3770

/**

3771

* kfree - free previously allocated memory

3771

* kfree - free previously allocated memory

3772

* @objp: pointer returned by kmalloc.

3772

* @objp: pointer returned by kmalloc.

3773

*

3773

*

3774

* If @objp is NULL, no operation is performed.

3774

* If @objp is NULL, no operation is performed.

3775

*

3775

*

3776

* Don't free memory not originally allocated by kmalloc()

3776

* Don't free memory not originally allocated by kmalloc()

3777

* or you will run into trouble.

3777

* or you will run into trouble.

3778

*/

3778

*/

3779

void kfree(const void *objp)

3779

void kfree(const void *objp)

3780

{

3780

{

3781

struct kmem_cache *c;

3781

struct kmem_cache *c;

3782

unsigned long flags;

3782

unsigned long flags;

3783

3784

trace_kfree(_RET_IP_, objp);

3784

trace_kfree(_RET_IP_, objp);

3785

3786

if (unlikely(ZERO_OR_NULL_PTR(objp)))

3786

if (unlikely(ZERO_OR_NULL_PTR(objp)))

3787

return;

3787

return;

3788

local_irq_save(flags);

3788

local_irq_save(flags);

3789

kfree_debugcheck(objp);

3789

kfree_debugcheck(objp);

3790

c = virt_to_cache(objp);

3790

c = virt_to_cache(objp);

3791

debug_check_no_locks_freed(objp, obj_size(c));

3791

debug_check_no_locks_freed(objp, obj_size(c));

3792

debug_check_no_obj_freed(objp, obj_size(c));

3792

debug_check_no_obj_freed(objp, obj_size(c));

3793

__cache_free(c, (void *)objp);

3793

__cache_free(c, (void *)objp);

3794

local_irq_restore(flags);

3794

local_irq_restore(flags);

3795

}

3795

}

3796

EXPORT_SYMBOL(kfree);

3796

EXPORT_SYMBOL(kfree);

3797

3798

unsigned int kmem_cache_size(struct kmem_cache *cachep)

3798

unsigned int kmem_cache_size(struct kmem_cache *cachep)

3799

{

3799

{

3800

return obj_size(cachep);

3800

return obj_size(cachep);

3801

}

3801

}

3802

EXPORT_SYMBOL(kmem_cache_size);

3802

EXPORT_SYMBOL(kmem_cache_size);

3803

3804

const char *kmem_cache_name(struct kmem_cache *cachep)

3804

const char *kmem_cache_name(struct kmem_cache *cachep)

3805

{

3805

{

3806

return cachep->name;

3806

return cachep->name;

3807

}

3807

}

3808

EXPORT_SYMBOL_GPL(kmem_cache_name);

3808

EXPORT_SYMBOL_GPL(kmem_cache_name);

3809

3810

/*

3810

/*

3811

* This initializes kmem_list3 or resizes various caches for all nodes.

3811

* This initializes kmem_list3 or resizes various caches for all nodes.

3812

*/

3812

*/

3813

static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)

3813

static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)

3814

{

3814

{

3815

int node;

3815

int node;

3816

struct kmem_list3 *l3;

3816

struct kmem_list3 *l3;

3817

struct array_cache *new_shared;

3817

struct array_cache *new_shared;

3818

struct array_cache **new_alien = NULL;

3818

struct array_cache **new_alien = NULL;

3819

3820

for_each_online_node(node) {

3820

for_each_online_node(node) {

3821

3822

if (use_alien_caches) {

3822

if (use_alien_caches) {

3823

new_alien = alloc_alien_cache(node, cachep->limit, gfp);

3823

new_alien = alloc_alien_cache(node, cachep->limit, gfp);

3824

if (!new_alien)

3824

if (!new_alien)

3825

goto fail;

3825

goto fail;

3826

}

3826

}

3827

3828

new_shared = NULL;

3828

new_shared = NULL;

3829

if (cachep->shared) {

3829

if (cachep->shared) {

3830

new_shared = alloc_arraycache(node,

3830

new_shared = alloc_arraycache(node,

3831

cachep->shared*cachep->batchcount,

3831

cachep->shared*cachep->batchcount,

3832

0xbaadf00d, gfp);

3832

0xbaadf00d, gfp);

3833

if (!new_shared) {

3833

if (!new_shared) {

3834

free_alien_cache(new_alien);

3834

free_alien_cache(new_alien);

3835

goto fail;

3835

goto fail;

3836

}

3836

}

3837

}

3837

}

3838

3839

l3 = cachep->nodelists[node];

3839

l3 = cachep->nodelists[node];

3840

if (l3) {

3840

if (l3) {

3841

struct array_cache *shared = l3->shared;

3841

struct array_cache *shared = l3->shared;

3842

3843

spin_lock_irq(&l3->list_lock);

3843

spin_lock_irq(&l3->list_lock);

3844

3845

if (shared)

3845

if (shared)

3846

free_block(cachep, shared->entry,

3846

free_block(cachep, shared->entry,

3847

shared->avail, node);

3847

shared->avail, node);

3848

3849

l3->shared = new_shared;

3849

l3->shared = new_shared;

3850

if (!l3->alien) {

3850

if (!l3->alien) {

3851

l3->alien = new_alien;

3851

l3->alien = new_alien;

3852

new_alien = NULL;

3852

new_alien = NULL;

3853

}

3853

}

3854

l3->free_limit = (1 + nr_cpus_node(node)) *

3854

l3->free_limit = (1 + nr_cpus_node(node)) *

3855

cachep->batchcount + cachep->num;

3855

cachep->batchcount + cachep->num;

3856

spin_unlock_irq(&l3->list_lock);

3856

spin_unlock_irq(&l3->list_lock);

3857

kfree(shared);

3857

kfree(shared);

3858

free_alien_cache(new_alien);

3858

free_alien_cache(new_alien);

3859

continue;

3859

continue;

3860

}

3860

}

3861

l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);

3861

l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);

3862

if (!l3) {

3862

if (!l3) {

3863

free_alien_cache(new_alien);

3863

free_alien_cache(new_alien);

3864

kfree(new_shared);

3864

kfree(new_shared);

3865

goto fail;

3865

goto fail;

3866

}

3866

}

3867

3868

kmem_list3_init(l3);

3868

kmem_list3_init(l3);

3869

l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +

3869

l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +

3870

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

3870

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

3871

l3->shared = new_shared;

3871

l3->shared = new_shared;

3872

l3->alien = new_alien;

3872

l3->alien = new_alien;

3873

l3->free_limit = (1 + nr_cpus_node(node)) *

3873

l3->free_limit = (1 + nr_cpus_node(node)) *

3874

cachep->batchcount + cachep->num;

3874

cachep->batchcount + cachep->num;

3875

cachep->nodelists[node] = l3;

3875

cachep->nodelists[node] = l3;

3876

}

3876

}

3877

return 0;

3877

return 0;

3878

3879

fail:

3879

fail:

3880

if (!cachep->next.next) {

3880

if (!cachep->next.next) {

3881

/* Cache is not active yet. Roll back what we did */

3881

/* Cache is not active yet. Roll back what we did */

3882

node--;

3882

node--;

3883

while (node >= 0) {

3883

while (node >= 0) {

3884

if (cachep->nodelists[node]) {

3884

if (cachep->nodelists[node]) {

3885

l3 = cachep->nodelists[node];

3885

l3 = cachep->nodelists[node];

3886

3887

kfree(l3->shared);

3887

kfree(l3->shared);

3888

free_alien_cache(l3->alien);

3888

free_alien_cache(l3->alien);

3889

kfree(l3);

3889

kfree(l3);

3890

cachep->nodelists[node] = NULL;

3890

cachep->nodelists[node] = NULL;

3891

}

3891

}

3892

node--;

3892

node--;

3893

}

3893

}

3894

}

3894

}

3895

return -ENOMEM;

3895

return -ENOMEM;

3896

}

3896

}

3897

3898

struct ccupdate_struct {

3898

struct ccupdate_struct {

3899

struct kmem_cache *cachep;

3899

struct kmem_cache *cachep;

3900

struct array_cache *new[NR_CPUS];

3900

struct array_cache *new[NR_CPUS];

3901

};

3901

};

3902

3903

static void do_ccupdate_local(void *info)

3903

static void do_ccupdate_local(void *info)

3904

{

3904

{

3905

struct ccupdate_struct *new = info;

3905

struct ccupdate_struct *new = info;

3906

struct array_cache *old;

3906

struct array_cache *old;

3907

3908

check_irq_off();

3908

check_irq_off();

3909

old = cpu_cache_get(new->cachep);

3909

old = cpu_cache_get(new->cachep);

3910

3911

new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];

3911

new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];

3912

new->new[smp_processor_id()] = old;

3912

new->new[smp_processor_id()] = old;

3913

}

3913

}

3914

3915

/* Always called with the cache_chain_mutex held */

3915

/* Always called with the cache_chain_mutex held */

3916

static int do_tune_cpucache(struct kmem_cache *cachep, int limit,

3916

static int do_tune_cpucache(struct kmem_cache *cachep, int limit,

3917

int batchcount, int shared, gfp_t gfp)

3917

int batchcount, int shared, gfp_t gfp)

3918

{

3918

{

3919

struct ccupdate_struct *new;

3919

struct ccupdate_struct *new;

3920

int i;

3920

int i;

3921

3922

new = kzalloc(sizeof(*new), gfp);

3922

new = kzalloc(sizeof(*new), gfp);

3923

if (!new)

3923

if (!new)

3924

return -ENOMEM;

3924

return -ENOMEM;

3925

3926

for_each_online_cpu(i) {

3926

for_each_online_cpu(i) {

3927

new->new[i] = alloc_arraycache(cpu_to_node(i), limit,

3927

new->new[i] = alloc_arraycache(cpu_to_node(i), limit,

3928

batchcount, gfp);

3928

batchcount, gfp);

3929

if (!new->new[i]) {

3929

if (!new->new[i]) {

3930

for (i--; i >= 0; i--)

3930

for (i--; i >= 0; i--)

3931

kfree(new->new[i]);

3931

kfree(new->new[i]);

3932

kfree(new);

3932

kfree(new);

3933

return -ENOMEM;

3933

return -ENOMEM;

3934

}

3934

}

3935

}

3935

}

3936

new->cachep = cachep;

3936

new->cachep = cachep;

3937

3938

on_each_cpu(do_ccupdate_local, (void *)new, 1);

3938

on_each_cpu(do_ccupdate_local, (void *)new, 1);

3939

3940

check_irq_on();

3940

check_irq_on();

3941

cachep->batchcount = batchcount;

3941

cachep->batchcount = batchcount;

3942

cachep->limit = limit;

3942

cachep->limit = limit;

3943

cachep->shared = shared;

3943

cachep->shared = shared;

3944

3945

for_each_online_cpu(i) {

3945

for_each_online_cpu(i) {

3946

struct array_cache *ccold = new->new[i];

3946

struct array_cache *ccold = new->new[i];

3947

if (!ccold)

3947

if (!ccold)

3948

continue;

3948

continue;

3949

spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);

3949

spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);

3950

free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));

3950

free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));

3951

spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);

3951

spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);

3952

kfree(ccold);

3952

kfree(ccold);

3953

}

3953

}

3954

kfree(new);

3954

kfree(new);

3955

return alloc_kmemlist(cachep, gfp);

3955

return alloc_kmemlist(cachep, gfp);

3956

}

3956

}

3957

3958

/* Called with cache_chain_mutex held always */

3958

/* Called with cache_chain_mutex held always */

3959

static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)

3959

static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)

3960

{

3960

{

3961

int err;

3961

int err;

3962

int limit, shared;

3962

int limit, shared;

3963

3964

/*

3964

/*

3965

* The head array serves three purposes:

3965

* The head array serves three purposes:

3966

* - create a LIFO ordering, i.e. return objects that are cache-warm

3966

* - create a LIFO ordering, i.e. return objects that are cache-warm

3967

* - reduce the number of spinlock operations.

3967

* - reduce the number of spinlock operations.

3968

* - reduce the number of linked list operations on the slab and

3968

* - reduce the number of linked list operations on the slab and

3969

* bufctl chains: array operations are cheaper.

3969

* bufctl chains: array operations are cheaper.

3970

* The numbers are guessed, we should auto-tune as described by

3970

* The numbers are guessed, we should auto-tune as described by

3971

* Bonwick.

3971

* Bonwick.

3972

*/

3972

*/

3973

if (cachep->buffer_size > 131072)

3973

if (cachep->buffer_size > 131072)

3974

limit = 1;

3974

limit = 1;

3975

else if (cachep->buffer_size > PAGE_SIZE)

3975

else if (cachep->buffer_size > PAGE_SIZE)

3976

limit = 8;

3976

limit = 8;

3977

else if (cachep->buffer_size > 1024)

3977

else if (cachep->buffer_size > 1024)

3978

limit = 24;

3978

limit = 24;

3979

else if (cachep->buffer_size > 256)

3979

else if (cachep->buffer_size > 256)

3980

limit = 54;

3980

limit = 54;

3981

else

3981

else

3982

limit = 120;

3982

limit = 120;

3983

3984

/*

3984

/*

3985

* CPU bound tasks (e.g. network routing) can exhibit cpu bound

3985

* CPU bound tasks (e.g. network routing) can exhibit cpu bound

3986

* allocation behaviour: Most allocs on one cpu, most free operations

3986

* allocation behaviour: Most allocs on one cpu, most free operations

3987

* on another cpu. For these cases, an efficient object passing between

3987

* on another cpu. For these cases, an efficient object passing between

3988

* cpus is necessary. This is provided by a shared array. The array

3988

* cpus is necessary. This is provided by a shared array. The array

3989

* replaces Bonwick's magazine layer.

3989

* replaces Bonwick's magazine layer.

3990

* On uniprocessor, it's functionally equivalent (but less efficient)

3990

* On uniprocessor, it's functionally equivalent (but less efficient)

3991

* to a larger limit. Thus disabled by default.

3991

* to a larger limit. Thus disabled by default.

3992

*/

3992

*/

3993

shared = 0;

3993

shared = 0;

3994

if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)

3994

if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)

3995

shared = 8;

3995

shared = 8;

3996

3997

#if DEBUG

3997

#if DEBUG

3998

/*

3998

/*

3999

* With debugging enabled, large batchcount lead to excessively long

3999

* With debugging enabled, large batchcount lead to excessively long

4000

* periods with disabled local interrupts. Limit the batchcount

4000

* periods with disabled local interrupts. Limit the batchcount

4001

*/

4001

*/

4002

if (limit > 32)

4002

if (limit > 32)

4003

limit = 32;

4003

limit = 32;

4004

#endif

4004

#endif

4005

err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);

4005

err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);

4006

if (err)

4006

if (err)

4007

printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",

4007

printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",

4008

cachep->name, -err);

4008

cachep->name, -err);

4009

return err;

4009

return err;

4010

}

4010

}

4011

4012

/*

4012

/*

4013

* Drain an array if it contains any elements taking the l3 lock only if

4013

* Drain an array if it contains any elements taking the l3 lock only if

4014

* necessary. Note that the l3 listlock also protects the array_cache

4014

* necessary. Note that the l3 listlock also protects the array_cache

4015

* if drain_array() is used on the shared array.

4015

* if drain_array() is used on the shared array.

4016

*/

4016

*/

4017

void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,

4017

void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,

4018

struct array_cache *ac, int force, int node)

4018

struct array_cache *ac, int force, int node)

4019

{

4019

{

4020

int tofree;

4020

int tofree;

4021

4022

if (!ac || !ac->avail)

4022

if (!ac || !ac->avail)

4023

return;

4023

return;

4024

if (ac->touched && !force) {

4024

if (ac->touched && !force) {

4025

ac->touched = 0;

4025

ac->touched = 0;

4026

} else {

4026

} else {

4027

spin_lock_irq(&l3->list_lock);

4027

spin_lock_irq(&l3->list_lock);

4028

if (ac->avail) {

4028

if (ac->avail) {

4029

tofree = force ? ac->avail : (ac->limit + 4) / 5;

4029

tofree = force ? ac->avail : (ac->limit + 4) / 5;

4030

if (tofree > ac->avail)

4030

if (tofree > ac->avail)

4031

tofree = (ac->avail + 1) / 2;

4031

tofree = (ac->avail + 1) / 2;

4032

free_block(cachep, ac->entry, tofree, node);

4032

free_block(cachep, ac->entry, tofree, node);

4033

ac->avail -= tofree;

4033

ac->avail -= tofree;

4034

memmove(ac->entry, &(ac->entry[tofree]),

4034

memmove(ac->entry, &(ac->entry[tofree]),

4035

sizeof(void *) * ac->avail);

4035

sizeof(void *) * ac->avail);

4036

}

4036

}

4037

spin_unlock_irq(&l3->list_lock);

4037

spin_unlock_irq(&l3->list_lock);

4038

}

4038

}

4039

}

4039

}

4040

4041

/**

4041

/**

4042

* cache_reap - Reclaim memory from caches.

4042

* cache_reap - Reclaim memory from caches.

4043

* @w: work descriptor

4043

* @w: work descriptor

4044

*

4044

*

4045

* Called from workqueue/eventd every few seconds.

4045

* Called from workqueue/eventd every few seconds.

4046

* Purpose:

4046

* Purpose:

4047

* - clear the per-cpu caches for this CPU.

4047

* - clear the per-cpu caches for this CPU.

4048

* - return freeable pages to the main free memory pool.

4048

* - return freeable pages to the main free memory pool.

4049

*

4049

*

4050

* If we cannot acquire the cache chain mutex then just give up - we'll try

4050

* If we cannot acquire the cache chain mutex then just give up - we'll try

4051

* again on the next iteration.

4051

* again on the next iteration.

4052

*/

4052

*/

4053

static void cache_reap(struct work_struct *w)

4053

static void cache_reap(struct work_struct *w)

4054

{

4054

{

4055

struct kmem_cache *searchp;

4055

struct kmem_cache *searchp;

4056

struct kmem_list3 *l3;

4056

struct kmem_list3 *l3;

4057

int node = numa_node_id();

4057

int node = numa_node_id();

4058

struct delayed_work *work = to_delayed_work(w);

4058

struct delayed_work *work = to_delayed_work(w);

4059

4060

if (!mutex_trylock(&cache_chain_mutex))

4060

if (!mutex_trylock(&cache_chain_mutex))

4061

/* Give up. Setup the next iteration. */

4061

/* Give up. Setup the next iteration. */

4062

goto out;

4062

goto out;

4063

4064

list_for_each_entry(searchp, &cache_chain, next) {

4064

list_for_each_entry(searchp, &cache_chain, next) {

4065

check_irq_on();

4065

check_irq_on();

4066

4067

/*

4067

/*

4068

* We only take the l3 lock if absolutely necessary and we

4068

* We only take the l3 lock if absolutely necessary and we

4069

* have established with reasonable certainty that

4069

* have established with reasonable certainty that

4070

* we can do some work if the lock was obtained.

4070

* we can do some work if the lock was obtained.

4071

*/

4071

*/

4072

l3 = searchp->nodelists[node];

4072

l3 = searchp->nodelists[node];

4073

4074

reap_alien(searchp, l3);

4074

reap_alien(searchp, l3);

4075

4076

drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);

4076

drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);

4077

4078

/*

4078

/*

4079

* These are racy checks but it does not matter

4079

* These are racy checks but it does not matter

4080

* if we skip one check or scan twice.

4080

* if we skip one check or scan twice.

4081

*/

4081

*/

4082

if (time_after(l3->next_reap, jiffies))

4082

if (time_after(l3->next_reap, jiffies))

4083

goto next;

4083

goto next;

4084

4085

l3->next_reap = jiffies + REAPTIMEOUT_LIST3;

4085

l3->next_reap = jiffies + REAPTIMEOUT_LIST3;

4086

4087

drain_array(searchp, l3, l3->shared, 0, node);

4087

drain_array(searchp, l3, l3->shared, 0, node);

4088

4089

if (l3->free_touched)

4089

if (l3->free_touched)

4090

l3->free_touched = 0;

4090

l3->free_touched = 0;

4091

else {

4091

else {

4092

int freed;

4092

int freed;

4093

4094

freed = drain_freelist(searchp, l3, (l3->free_limit +

4094

freed = drain_freelist(searchp, l3, (l3->free_limit +

4095

5 * searchp->num - 1) / (5 * searchp->num));

4095

5 * searchp->num - 1) / (5 * searchp->num));

4096

STATS_ADD_REAPED(searchp, freed);

4096

STATS_ADD_REAPED(searchp, freed);

4097

}

4097

}

4098

cond_resched();

4099

cond_resched();

4100

}

4100

}

4101

check_irq_on();

4101

check_irq_on();

4102

mutex_unlock(&cache_chain_mutex);

4102

mutex_unlock(&cache_chain_mutex);

4103

next_reap_node();

4103

next_reap_node();

4104

out:

4104

out:

4105

/* Set up the next iteration */

4105

/* Set up the next iteration */

4106

schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));

4106

schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));

4107

}

4107

}

4108

4109

#ifdef CONFIG_SLABINFO

4109

#ifdef CONFIG_SLABINFO

4110

4111

static void print_slabinfo_header(struct seq_file *m)

4111

static void print_slabinfo_header(struct seq_file *m)

4112

{

4112

{

4113

/*

4113

/*

4114

* Output format version, so at least we can change it

4114

* Output format version, so at least we can change it

4115

* without _too_ many complaints.

4115

* without _too_ many complaints.

4116

*/

4116

*/

4117

#if STATS

4117

#if STATS

4118

seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");

4118

seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");

4119

#else

4119

#else

4120

seq_puts(m, "slabinfo - version: 2.1\n");

4120

seq_puts(m, "slabinfo - version: 2.1\n");

4121

#endif

4121

#endif

4122

seq_puts(m, "# name <active_objs> <num_objs> <objsize> "

4122

seq_puts(m, "# name <active_objs> <num_objs> <objsize> "

4123

"<objperslab> <pagesperslab>");

4123

"<objperslab> <pagesperslab>");

4124

seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");

4124

seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");

4125

seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");

4125

seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");

4126

#if STATS

4126

#if STATS

4127

seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "

4127

seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "

4128

"<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");

4128

"<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");

4129

seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");

4129

seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");

4130

#endif

4130

#endif

4131

seq_putc(m, '\n');

4131

seq_putc(m, '\n');

4132

}

4132

}

4133

4134

static void *s_start(struct seq_file *m, loff_t *pos)

4134

static void *s_start(struct seq_file *m, loff_t *pos)

4135

{

4135

{

4136

loff_t n = *pos;

4136

loff_t n = *pos;

4137

4138

mutex_lock(&cache_chain_mutex);

4138

mutex_lock(&cache_chain_mutex);

4139

if (!n)

4139

if (!n)

4140

print_slabinfo_header(m);

4140

print_slabinfo_header(m);

4141

4142

return seq_list_start(&cache_chain, *pos);

4142

return seq_list_start(&cache_chain, *pos);

4143

}

4143

}

4144

4145

static void *s_next(struct seq_file *m, void *p, loff_t *pos)

4145

static void *s_next(struct seq_file *m, void *p, loff_t *pos)

4146

{

4146

{

4147

return seq_list_next(p, &cache_chain, pos);

4147

return seq_list_next(p, &cache_chain, pos);

4148

}

4148

}

4149

4150

static void s_stop(struct seq_file *m, void *p)

4150

static void s_stop(struct seq_file *m, void *p)

4151

{

4151

{

4152

mutex_unlock(&cache_chain_mutex);

4152

mutex_unlock(&cache_chain_mutex);

4153

}

4153

}

4154

4155

static int s_show(struct seq_file *m, void *p)

4155

static int s_show(struct seq_file *m, void *p)

4156

{

4156

{

4157

struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);

4157

struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);

4158

struct slab *slabp;

4158

struct slab *slabp;

4159

unsigned long active_objs;

4159

unsigned long active_objs;

4160

unsigned long num_objs;

4160

unsigned long num_objs;

4161

unsigned long active_slabs = 0;

4161

unsigned long active_slabs = 0;

4162

unsigned long num_slabs, free_objects = 0, shared_avail = 0;

4162

unsigned long num_slabs, free_objects = 0, shared_avail = 0;

4163

const char *name;

4163

const char *name;

4164

char *error = NULL;

4164

char *error = NULL;

4165

int node;

4165

int node;

4166

struct kmem_list3 *l3;

4166

struct kmem_list3 *l3;

4167

4168

active_objs = 0;

4168

active_objs = 0;

4169

num_slabs = 0;

4169

num_slabs = 0;

4170

for_each_online_node(node) {

4170

for_each_online_node(node) {

4171

l3 = cachep->nodelists[node];

4171

l3 = cachep->nodelists[node];

4172

if (!l3)

4172

if (!l3)

4173

continue;

4173

continue;

4174

4175

check_irq_on();

4175

check_irq_on();

4176

spin_lock_irq(&l3->list_lock);

4176

spin_lock_irq(&l3->list_lock);

4177

4178

list_for_each_entry(slabp, &l3->slabs_full, list) {

4178

list_for_each_entry(slabp, &l3->slabs_full, list) {

4179

if (slabp->inuse != cachep->num && !error)

4179

if (slabp->inuse != cachep->num && !error)

4180

error = "slabs_full accounting error";

4180

error = "slabs_full accounting error";

4181

active_objs += cachep->num;

4181

active_objs += cachep->num;

4182

active_slabs++;

4182

active_slabs++;

4183

}

4183

}

4184

list_for_each_entry(slabp, &l3->slabs_partial, list) {

4184

list_for_each_entry(slabp, &l3->slabs_partial, list) {

4185

if (slabp->inuse == cachep->num && !error)

4185

if (slabp->inuse == cachep->num && !error)

4186

error = "slabs_partial inuse accounting error";

4186

error = "slabs_partial inuse accounting error";

4187

if (!slabp->inuse && !error)

4187

if (!slabp->inuse && !error)

4188

error = "slabs_partial/inuse accounting error";

4188

error = "slabs_partial/inuse accounting error";

4189

active_objs += slabp->inuse;

4189

active_objs += slabp->inuse;

4190

active_slabs++;

4190

active_slabs++;

4191

}

4191

}

4192

list_for_each_entry(slabp, &l3->slabs_free, list) {

4192

list_for_each_entry(slabp, &l3->slabs_free, list) {

4193

if (slabp->inuse && !error)

4193

if (slabp->inuse && !error)

4194

error = "slabs_free/inuse accounting error";

4194

error = "slabs_free/inuse accounting error";

4195

num_slabs++;

4195

num_slabs++;

4196

}

4196

}

4197

free_objects += l3->free_objects;

4197

free_objects += l3->free_objects;

4198

if (l3->shared)

4198

if (l3->shared)

4199

shared_avail += l3->shared->avail;

4199

shared_avail += l3->shared->avail;

4200

4201

spin_unlock_irq(&l3->list_lock);

4201

spin_unlock_irq(&l3->list_lock);

4202

}

4202

}

4203

num_slabs += active_slabs;

4203

num_slabs += active_slabs;

4204

num_objs = num_slabs * cachep->num;

4204

num_objs = num_slabs * cachep->num;

4205

if (num_objs - active_objs != free_objects && !error)

4205

if (num_objs - active_objs != free_objects && !error)

4206

error = "free_objects accounting error";

4206

error = "free_objects accounting error";

4207

4208

name = cachep->name;

4208

name = cachep->name;

4209

if (error)

4209

if (error)

4210

printk(KERN_ERR "slab: cache %s error: %s\n", name, error);

4210

printk(KERN_ERR "slab: cache %s error: %s\n", name, error);

4211

4212

seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",

4212

seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",

4213

name, active_objs, num_objs, cachep->buffer_size,

4213

name, active_objs, num_objs, cachep->buffer_size,

4214

cachep->num, (1 << cachep->gfporder));

4214

cachep->num, (1 << cachep->gfporder));

4215

seq_printf(m, " : tunables %4u %4u %4u",

4215

seq_printf(m, " : tunables %4u %4u %4u",

4216

cachep->limit, cachep->batchcount, cachep->shared);

4216

cachep->limit, cachep->batchcount, cachep->shared);

4217

seq_printf(m, " : slabdata %6lu %6lu %6lu",

4217

seq_printf(m, " : slabdata %6lu %6lu %6lu",

4218

active_slabs, num_slabs, shared_avail);

4218

active_slabs, num_slabs, shared_avail);

4219

#if STATS

4219

#if STATS

4220

{ /* list3 stats */

4220

{ /* list3 stats */

4221

unsigned long high = cachep->high_mark;

4221

unsigned long high = cachep->high_mark;

4222

unsigned long allocs = cachep->num_allocations;

4222

unsigned long allocs = cachep->num_allocations;

4223

unsigned long grown = cachep->grown;

4223

unsigned long grown = cachep->grown;

4224

unsigned long reaped = cachep->reaped;

4224

unsigned long reaped = cachep->reaped;

4225

unsigned long errors = cachep->errors;

4225

unsigned long errors = cachep->errors;

4226

unsigned long max_freeable = cachep->max_freeable;

4226

unsigned long max_freeable = cachep->max_freeable;

4227

unsigned long node_allocs = cachep->node_allocs;

4227

unsigned long node_allocs = cachep->node_allocs;

4228

unsigned long node_frees = cachep->node_frees;

4228

unsigned long node_frees = cachep->node_frees;

4229

unsigned long overflows = cachep->node_overflow;

4229

unsigned long overflows = cachep->node_overflow;

4230

4231

seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \

4231

seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \

4232

%4lu %4lu %4lu %4lu %4lu", allocs, high, grown,

4232

%4lu %4lu %4lu %4lu %4lu", allocs, high, grown,

4233

reaped, errors, max_freeable, node_allocs,

4233

reaped, errors, max_freeable, node_allocs,

4234

node_frees, overflows);

4234

node_frees, overflows);

4235

}

4235

}

4236

/* cpu stats */

4236

/* cpu stats */

4237

{

4237

{

4238

unsigned long allochit = atomic_read(&cachep->allochit);

4238

unsigned long allochit = atomic_read(&cachep->allochit);

4239

unsigned long allocmiss = atomic_read(&cachep->allocmiss);

4239

unsigned long allocmiss = atomic_read(&cachep->allocmiss);

4240

unsigned long freehit = atomic_read(&cachep->freehit);

4240

unsigned long freehit = atomic_read(&cachep->freehit);

4241

unsigned long freemiss = atomic_read(&cachep->freemiss);

4241

unsigned long freemiss = atomic_read(&cachep->freemiss);

4242

4243

seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",

4243

seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",

4244

allochit, allocmiss, freehit, freemiss);

4244

allochit, allocmiss, freehit, freemiss);

4245

}

4245

}

4246

#endif

4246

#endif

4247

seq_putc(m, '\n');

4247

seq_putc(m, '\n');

4248

return 0;

4248

return 0;

4249

}

4249

}

4250

4251

/*

4251

/*

4252

* slabinfo_op - iterator that generates /proc/slabinfo

4252

* slabinfo_op - iterator that generates /proc/slabinfo

4253

*

4253

*

4254

* Output layout:

4254

* Output layout:

4255

* cache-name

4255

* cache-name

4256

* num-active-objs

4256

* num-active-objs

4257

* total-objs

4257

* total-objs

4258

* object size

4258

* object size

4259

* num-active-slabs

4259

* num-active-slabs

4260

* total-slabs

4260

* total-slabs

4261

* num-pages-per-slab

4261

* num-pages-per-slab

4262

* + further values on SMP and with statistics enabled

4262

* + further values on SMP and with statistics enabled

4263

*/

4263

*/

4264

4265

static const struct seq_operations slabinfo_op = {

4265

static const struct seq_operations slabinfo_op = {

4266

.start = s_start,

4266

.start = s_start,

4267

.next = s_next,

4267

.next = s_next,

4268

.stop = s_stop,

4268

.stop = s_stop,

4269

.show = s_show,

4269

.show = s_show,

4270

};

4270

};

4271

4272

#define MAX_SLABINFO_WRITE 128

4272

#define MAX_SLABINFO_WRITE 128

4273

/**

4273

/**

4274

* slabinfo_write - Tuning for the slab allocator

4274

* slabinfo_write - Tuning for the slab allocator

4275

* @file: unused

4275

* @file: unused

4276

* @buffer: user buffer

4276

* @buffer: user buffer

4277

* @count: data length

4277

* @count: data length

4278

* @ppos: unused

4278

* @ppos: unused

4279

*/

4279

*/

4280

ssize_t slabinfo_write(struct file *file, const char __user * buffer,

4280

ssize_t slabinfo_write(struct file *file, const char __user * buffer,

4281

size_t count, loff_t *ppos)

4281

size_t count, loff_t *ppos)

4282

{

4282

{

4283

char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;

4283

char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;

4284

int limit, batchcount, shared, res;

4284

int limit, batchcount, shared, res;

4285

struct kmem_cache *cachep;

4285

struct kmem_cache *cachep;

4286

4287

if (count > MAX_SLABINFO_WRITE)

4287

if (count > MAX_SLABINFO_WRITE)

4288

return -EINVAL;

4288

return -EINVAL;

4289

if (copy_from_user(&kbuf, buffer, count))

4289

if (copy_from_user(&kbuf, buffer, count))

4290

return -EFAULT;

4290

return -EFAULT;

4291

kbuf[MAX_SLABINFO_WRITE] = '\0';

4291

kbuf[MAX_SLABINFO_WRITE] = '\0';

4292

4293

tmp = strchr(kbuf, ' ');

4293

tmp = strchr(kbuf, ' ');

4294

if (!tmp)

4294

if (!tmp)

4295

return -EINVAL;

4295

return -EINVAL;

4296

*tmp = '\0';

4296

*tmp = '\0';

4297

tmp++;

4297

tmp++;

4298

if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)

4298

if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)

4299

return -EINVAL;

4299

return -EINVAL;

4300

4301

/* Find the cache in the chain of caches. */

4301

/* Find the cache in the chain of caches. */

4302

mutex_lock(&cache_chain_mutex);

4302

mutex_lock(&cache_chain_mutex);

4303

res = -EINVAL;

4303

res = -EINVAL;

4304

list_for_each_entry(cachep, &cache_chain, next) {

4304

list_for_each_entry(cachep, &cache_chain, next) {

4305

if (!strcmp(cachep->name, kbuf)) {

4305

if (!strcmp(cachep->name, kbuf)) {

4306

if (limit < 1 || batchcount < 1 ||

4306

if (limit < 1 || batchcount < 1 ||

4307

batchcount > limit || shared < 0) {

4307

batchcount > limit || shared < 0) {

4308

res = 0;

4308

res = 0;

4309

} else {

4309

} else {

4310

res = do_tune_cpucache(cachep, limit,

4310

res = do_tune_cpucache(cachep, limit,

4311

batchcount, shared,

4311

batchcount, shared,

4312

GFP_KERNEL);

4312

GFP_KERNEL);

4313

}

4313

}

4314

break;

4314

break;

4315

}

4315

}

4316

}

4316

}

4317

mutex_unlock(&cache_chain_mutex);

4317

mutex_unlock(&cache_chain_mutex);

4318

if (res >= 0)

4318

if (res >= 0)

4319

res = count;

4319

res = count;

4320

return res;

4320

return res;

4321

}

4321

}

4322

4323

static int slabinfo_open(struct inode *inode, struct file *file)

4323

static int slabinfo_open(struct inode *inode, struct file *file)

4324

{

4324

{

4325

return seq_open(file, &slabinfo_op);

4325

return seq_open(file, &slabinfo_op);

4326

}

4326

}

4327

4328

static const struct file_operations proc_slabinfo_operations = {

4328

static const struct file_operations proc_slabinfo_operations = {

4329

.open = slabinfo_open,

4329

.open = slabinfo_open,

4330

.read = seq_read,

4330

.read = seq_read,

4331

.write = slabinfo_write,

4331

.write = slabinfo_write,

4332

.llseek = seq_lseek,

4332

.llseek = seq_lseek,

4333

.release = seq_release,

4333

.release = seq_release,

4334

};

4334

};

4335

4336

#ifdef CONFIG_DEBUG_SLAB_LEAK

4336

#ifdef CONFIG_DEBUG_SLAB_LEAK

4337

4338

static void *leaks_start(struct seq_file *m, loff_t *pos)

4338

static void *leaks_start(struct seq_file *m, loff_t *pos)

4339

{

4339

{

4340

mutex_lock(&cache_chain_mutex);

4340

mutex_lock(&cache_chain_mutex);

4341

return seq_list_start(&cache_chain, *pos);

4341

return seq_list_start(&cache_chain, *pos);

4342

}

4342

}

4343

4344

static inline int add_caller(unsigned long *n, unsigned long v)

4344

static inline int add_caller(unsigned long *n, unsigned long v)

4345

{

4345

{

4346

unsigned long *p;

4346

unsigned long *p;

4347

int l;

4347

int l;

4348

if (!v)

4348

if (!v)

4349

return 1;

4349

return 1;

4350

l = n[1];

4350

l = n[1];

4351

p = n + 2;

4351

p = n + 2;

4352

while (l) {

4352

while (l) {

4353

int i = l/2;

4353

int i = l/2;

4354

unsigned long *q = p + 2 * i;

4354

unsigned long *q = p + 2 * i;

4355

if (*q == v) {

4355

if (*q == v) {

4356

q[1]++;

4356

q[1]++;

4357

return 1;

4357

return 1;

4358

}

4358

}

4359

if (*q > v) {

4359

if (*q > v) {

4360

l = i;

4360

l = i;

4361

} else {

4361

} else {

4362

p = q + 2;

4362

p = q + 2;

4363

l -= i + 1;

4363

l -= i + 1;

4364

}

4364

}

4365

}

4365

}

4366

if (++n[1] == n[0])

4366

if (++n[1] == n[0])

4367

return 0;

4367

return 0;

4368

memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));

4368

memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));

4369

p[0] = v;

4369

p[0] = v;

4370

p[1] = 1;

4370

p[1] = 1;

4371

return 1;

4371

return 1;

4372

}

4372

}

4373

4374

static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)

4374

static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)

4375

{

4375

{

4376

void *p;

4376

void *p;

4377

int i;

4377

int i;

4378

if (n[0] == n[1])

4378

if (n[0] == n[1])

4379

return;

4379

return;

4380

for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {

4380

for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {

4381

if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)

4381

if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)

4382

continue;

4382

continue;

4383

if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))

4383

if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))

4384

return;

4384

return;

4385

}

4385

}

4386

}

4386

}

4387

4388

static void show_symbol(struct seq_file *m, unsigned long address)

4388

static void show_symbol(struct seq_file *m, unsigned long address)

4389

{

4389

{

4390

#ifdef CONFIG_KALLSYMS

4390

#ifdef CONFIG_KALLSYMS

4391

unsigned long offset, size;

4391

unsigned long offset, size;

4392

char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];

4392

char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];

4393

4394

if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {

4394

if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {

4395

seq_printf(m, "%s+%#lx/%#lx", name, offset, size);

4395

seq_printf(m, "%s+%#lx/%#lx", name, offset, size);

4396

if (modname[0])

4396

if (modname[0])

4397

seq_printf(m, " [%s]", modname);

4397

seq_printf(m, " [%s]", modname);

4398

return;

4398

return;

4399

}

4399

}

4400

#endif

4400

#endif

4401

seq_printf(m, "%p", (void *)address);

4401

seq_printf(m, "%p", (void *)address);

4402

}

4402

}

4403

4404

static int leaks_show(struct seq_file *m, void *p)

4404

static int leaks_show(struct seq_file *m, void *p)

4405

{

4405

{

4406

struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);

4406

struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);

4407

struct slab *slabp;

4407

struct slab *slabp;

4408

struct kmem_list3 *l3;

4408

struct kmem_list3 *l3;

4409

const char *name;

4409

const char *name;

4410

unsigned long *n = m->private;

4410

unsigned long *n = m->private;

4411

int node;

4411

int node;

4412

int i;

4412

int i;

4413

4414

if (!(cachep->flags & SLAB_STORE_USER))

4414

if (!(cachep->flags & SLAB_STORE_USER))

4415

return 0;

4415

return 0;

4416

if (!(cachep->flags & SLAB_RED_ZONE))

4416

if (!(cachep->flags & SLAB_RED_ZONE))

4417

return 0;

4417

return 0;

4418

4419

/* OK, we can do it */

4419

/* OK, we can do it */

4420

4421

n[1] = 0;

4421

n[1] = 0;

4422

4423

for_each_online_node(node) {

4423

for_each_online_node(node) {

4424

l3 = cachep->nodelists[node];

4424

l3 = cachep->nodelists[node];

4425

if (!l3)

4425

if (!l3)

4426

continue;

4426

continue;

4427

4428

check_irq_on();

4428

check_irq_on();

4429

spin_lock_irq(&l3->list_lock);

4429

spin_lock_irq(&l3->list_lock);

4430

4431

list_for_each_entry(slabp, &l3->slabs_full, list)

4431

list_for_each_entry(slabp, &l3->slabs_full, list)

4432

handle_slab(n, cachep, slabp);

4432

handle_slab(n, cachep, slabp);

4433

list_for_each_entry(slabp, &l3->slabs_partial, list)

4433

list_for_each_entry(slabp, &l3->slabs_partial, list)

4434

handle_slab(n, cachep, slabp);

4434

handle_slab(n, cachep, slabp);

4435

spin_unlock_irq(&l3->list_lock);

4435

spin_unlock_irq(&l3->list_lock);

4436

}

4436

}

4437

name = cachep->name;

4437

name = cachep->name;

4438

if (n[0] == n[1]) {

4438

if (n[0] == n[1]) {

4439

/* Increase the buffer size */

4439

/* Increase the buffer size */

4440

mutex_unlock(&cache_chain_mutex);

4440

mutex_unlock(&cache_chain_mutex);

4441

m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);

4441

m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);

4442

if (!m->private) {

4442

if (!m->private) {

4443

/* Too bad, we are really out */

4443

/* Too bad, we are really out */

4444

m->private = n;

4444

m->private = n;

4445

mutex_lock(&cache_chain_mutex);

4445

mutex_lock(&cache_chain_mutex);

4446

return -ENOMEM;

4446

return -ENOMEM;

4447

}

4447

}

4448

*(unsigned long *)m->private = n[0] * 2;

4448

*(unsigned long *)m->private = n[0] * 2;

4449

kfree(n);

4449

kfree(n);

4450

mutex_lock(&cache_chain_mutex);

4450

mutex_lock(&cache_chain_mutex);

4451

/* Now make sure this entry will be retried */

4451

/* Now make sure this entry will be retried */

4452

m->count = m->size;

4452

m->count = m->size;

4453

return 0;

4453

return 0;

4454

}

4454

}

4455

for (i = 0; i < n[1]; i++) {

4455

for (i = 0; i < n[1]; i++) {

4456

seq_printf(m, "%s: %lu ", name, n[2*i+3]);

4456

seq_printf(m, "%s: %lu ", name, n[2*i+3]);

4457

show_symbol(m, n[2*i+2]);

4457

show_symbol(m, n[2*i+2]);

4458

seq_putc(m, '\n');

4458

seq_putc(m, '\n');

4459

}

4459

}

4460

4461

return 0;

4461

return 0;

4462

}

4462

}

4463

4464

static const struct seq_operations slabstats_op = {

4464

static const struct seq_operations slabstats_op = {

4465

.start = leaks_start,

4465

.start = leaks_start,

4466

.next = s_next,

4466

.next = s_next,

4467

.stop = s_stop,

4467

.stop = s_stop,

4468

.show = leaks_show,

4468

.show = leaks_show,

4469

};

4469

};

4470

4471

static int slabstats_open(struct inode *inode, struct file *file)

4471

static int slabstats_open(struct inode *inode, struct file *file)

4472

{

4472

{

4473

unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);

4473

unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);

4474

int ret = -ENOMEM;

4474

int ret = -ENOMEM;

4475

if (n) {

4475

if (n) {

4476

ret = seq_open(file, &slabstats_op);

4476

ret = seq_open(file, &slabstats_op);

4477

if (!ret) {

4477

if (!ret) {

4478

struct seq_file *m = file->private_data;

4478

struct seq_file *m = file->private_data;

4479

*n = PAGE_SIZE / (2 * sizeof(unsigned long));

4479

*n = PAGE_SIZE / (2 * sizeof(unsigned long));

4480

m->private = n;

4480

m->private = n;

4481

n = NULL;

4481

n = NULL;

4482

}

4482

}

4483

kfree(n);

4483

kfree(n);

4484

}

4484

}

4485

return ret;

4485

return ret;

4486

}

4486

}

4487

4488

static const struct file_operations proc_slabstats_operations = {

4488

static const struct file_operations proc_slabstats_operations = {

4489

.open = slabstats_open,

4489

.open = slabstats_open,

4490

.read = seq_read,

4490

.read = seq_read,

4491

.llseek = seq_lseek,

4491

.llseek = seq_lseek,

4492

.release = seq_release_private,

4492

.release = seq_release_private,

4493

};

4493

};

4494

#endif

4494

#endif

4495

4496

static int __init slab_proc_init(void)

4496

static int __init slab_proc_init(void)

4497

{

4497

{

4498

proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);

4498

proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);

4499

#ifdef CONFIG_DEBUG_SLAB_LEAK

4499

#ifdef CONFIG_DEBUG_SLAB_LEAK

4500

proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);

4500

proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);

4501

#endif

4501

#endif

4502

return 0;

4502

return 0;

4503

}

4503

}

4504

module_init(slab_proc_init);

4504

module_init(slab_proc_init);

4505

#endif

4505

#endif

4506

4507

/**

4507

/**

4508

* ksize - get the actual amount of memory allocated for a given object

4508

* ksize - get the actual amount of memory allocated for a given object

4509

* @objp: Pointer to the object

4509

* @objp: Pointer to the object

4510

*

4510

*

4511

* kmalloc may internally round up allocations and return more memory

4511

* kmalloc may internally round up allocations and return more memory

4512

* than requested. ksize() can be used to determine the actual amount of

4512

* than requested. ksize() can be used to determine the actual amount of

4513

* memory allocated. The caller may use this additional memory, even though

4513

* memory allocated. The caller may use this additional memory, even though

4514

* a smaller amount of memory was initially specified with the kmalloc call.

4514

* a smaller amount of memory was initially specified with the kmalloc call.

4515

* The caller must guarantee that objp points to a valid object previously

4515

* The caller must guarantee that objp points to a valid object previously

4516

* allocated with either kmalloc() or kmem_cache_alloc(). The object

4516

* allocated with either kmalloc() or kmem_cache_alloc(). The object

4517

* must not be freed during the duration of the call.

4517

* must not be freed during the duration of the call.

4518

*/

4518

*/

4519

size_t ksize(const void *objp)

4519

size_t ksize(const void *objp)

4520

{

4520

{

4521

BUG_ON(!objp);

4521

BUG_ON(!objp);

4522

if (unlikely(objp == ZERO_SIZE_PTR))

4522

if (unlikely(objp == ZERO_SIZE_PTR))

4523

return 0;

4523

return 0;

4524

4525

return obj_size(virt_to_cache(objp));

4525

return obj_size(virt_to_cache(objp));

4526

}

4526

}

4527

EXPORT_SYMBOL(ksize);

4527

EXPORT_SYMBOL(ksize);

4528

GITLAB

SLAB: Fix lockdep annotation breakage