Eric Lee / smarc-ti-linux-kernel

1

#ifndef _LINUX_MM_H

1

#ifndef _LINUX_MM_H

2

#define _LINUX_MM_H

2

#define _LINUX_MM_H

3

4

#include <linux/errno.h>

4

#include <linux/errno.h>

5

6

#ifdef __KERNEL__

6

#ifdef __KERNEL__

7

8

#include <linux/mmdebug.h>

8

#include <linux/mmdebug.h>

9

#include <linux/gfp.h>

9

#include <linux/gfp.h>

10

#include <linux/bug.h>

10

#include <linux/bug.h>

11

#include <linux/list.h>

11

#include <linux/list.h>

12

#include <linux/mmzone.h>

12

#include <linux/mmzone.h>

13

#include <linux/rbtree.h>

13

#include <linux/rbtree.h>

14

#include <linux/atomic.h>

14

#include <linux/atomic.h>

15

#include <linux/debug_locks.h>

15

#include <linux/debug_locks.h>

16

#include <linux/mm_types.h>

16

#include <linux/mm_types.h>

17

#include <linux/range.h>

17

#include <linux/range.h>

18

#include <linux/pfn.h>

18

#include <linux/pfn.h>

19

#include <linux/bit_spinlock.h>

19

#include <linux/bit_spinlock.h>

20

#include <linux/shrinker.h>

20

#include <linux/shrinker.h>

21

22

struct mempolicy;

22

struct mempolicy;

23

struct anon_vma;

23

struct anon_vma;

24

struct anon_vma_chain;

24

struct anon_vma_chain;

25

struct file_ra_state;

25

struct file_ra_state;

26

struct user_struct;

26

struct user_struct;

27

struct writeback_control;

27

struct writeback_control;

28

29

#ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */

29

#ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */

30

extern unsigned long max_mapnr;

30

extern unsigned long max_mapnr;

31

32

static inline void set_max_mapnr(unsigned long limit)

32

static inline void set_max_mapnr(unsigned long limit)

33

{

33

{

34

max_mapnr = limit;

34

max_mapnr = limit;

35

}

35

}

36

#else

36

#else

37

static inline void set_max_mapnr(unsigned long limit) { }

37

static inline void set_max_mapnr(unsigned long limit) { }

38

#endif

38

#endif

39

40

extern unsigned long totalram_pages;

40

extern unsigned long totalram_pages;

41

extern void * high_memory;

41

extern void * high_memory;

42

extern int page_cluster;

42

extern int page_cluster;

43

44

#ifdef CONFIG_SYSCTL

44

#ifdef CONFIG_SYSCTL

45

extern int sysctl_legacy_va_layout;

45

extern int sysctl_legacy_va_layout;

46

#else

46

#else

47

#define sysctl_legacy_va_layout 0

47

#define sysctl_legacy_va_layout 0

48

#endif

48

#endif

49

50

#include <asm/page.h>

50

#include <asm/page.h>

51

#include <asm/pgtable.h>

51

#include <asm/pgtable.h>

52

#include <asm/processor.h>

52

#include <asm/processor.h>

53

54

#ifndef __pa_symbol

54

#ifndef __pa_symbol

55

#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))

55

#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))

56

#endif

56

#endif

57

58

extern unsigned long sysctl_user_reserve_kbytes;

58

extern unsigned long sysctl_user_reserve_kbytes;

59

extern unsigned long sysctl_admin_reserve_kbytes;

59

extern unsigned long sysctl_admin_reserve_kbytes;

60

61

extern int sysctl_overcommit_memory;

61

extern int sysctl_overcommit_memory;

62

extern int sysctl_overcommit_ratio;

62

extern int sysctl_overcommit_ratio;

63

extern unsigned long sysctl_overcommit_kbytes;

63

extern unsigned long sysctl_overcommit_kbytes;

64

65

extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,

65

extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,

66

size_t *, loff_t *);

66

size_t *, loff_t *);

67

extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,

67

extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,

68

size_t *, loff_t *);

68

size_t *, loff_t *);

69

70

#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))

70

#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))

71

72

/* to align the pointer to the (next) page boundary */

72

/* to align the pointer to the (next) page boundary */

73

#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)

73

#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)

74

75

/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */

75

/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */

76

#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE)

76

#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE)

77

78

/*

78

/*

79

* Linux kernel virtual memory manager primitives.

79

* Linux kernel virtual memory manager primitives.

80

* The idea being to have a "virtual" mm in the same way

80

* The idea being to have a "virtual" mm in the same way

81

* we have a virtual fs - giving a cleaner interface to the

81

* we have a virtual fs - giving a cleaner interface to the

82

* mm details, and allowing different kinds of memory mappings

82

* mm details, and allowing different kinds of memory mappings

83

* (from shared memory to executable loading to arbitrary

83

* (from shared memory to executable loading to arbitrary

84

* mmap() functions).

84

* mmap() functions).

85

*/

85

*/

86

87

extern struct kmem_cache *vm_area_cachep;

87

extern struct kmem_cache *vm_area_cachep;

88

89

#ifndef CONFIG_MMU

89

#ifndef CONFIG_MMU

90

extern struct rb_root nommu_region_tree;

90

extern struct rb_root nommu_region_tree;

91

extern struct rw_semaphore nommu_region_sem;

91

extern struct rw_semaphore nommu_region_sem;

92

93

extern unsigned int kobjsize(const void *objp);

93

extern unsigned int kobjsize(const void *objp);

94

#endif

94

#endif

95

96

/*

96

/*

97

* vm_flags in vm_area_struct, see mm_types.h.

97

* vm_flags in vm_area_struct, see mm_types.h.

98

*/

98

*/

99

#define VM_NONE 0x00000000

99

#define VM_NONE 0x00000000

100

101

#define VM_READ 0x00000001 /* currently active flags */

101

#define VM_READ 0x00000001 /* currently active flags */

102

#define VM_WRITE 0x00000002

102

#define VM_WRITE 0x00000002

103

#define VM_EXEC 0x00000004

103

#define VM_EXEC 0x00000004

104

#define VM_SHARED 0x00000008

104

#define VM_SHARED 0x00000008

105

106

/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */

106

/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */

107

#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */

107

#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */

108

#define VM_MAYWRITE 0x00000020

108

#define VM_MAYWRITE 0x00000020

109

#define VM_MAYEXEC 0x00000040

109

#define VM_MAYEXEC 0x00000040

110

#define VM_MAYSHARE 0x00000080

110

#define VM_MAYSHARE 0x00000080

111

112

#define VM_GROWSDOWN 0x00000100 /* general info on the segment */

112

#define VM_GROWSDOWN 0x00000100 /* general info on the segment */

113

#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */

113

#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */

114

#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */

114

#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */

115

116

#define VM_LOCKED 0x00002000

116

#define VM_LOCKED 0x00002000

117

#define VM_IO 0x00004000 /* Memory mapped I/O or similar */

117

#define VM_IO 0x00004000 /* Memory mapped I/O or similar */

118

119

/* Used by sys_madvise() */

119

/* Used by sys_madvise() */

120

#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */

120

#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */

121

#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */

121

#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */

122

123

#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */

123

#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */

124

#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */

124

#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */

125

#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */

125

#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */

126

#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */

126

#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */

127

#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */

127

#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */

128

#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */

128

#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */

129

#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */

129

#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */

130

#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */

130

#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */

131

132

#ifdef CONFIG_MEM_SOFT_DIRTY

132

#ifdef CONFIG_MEM_SOFT_DIRTY

133

# define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */

133

# define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */

134

#else

134

#else

135

# define VM_SOFTDIRTY 0

135

# define VM_SOFTDIRTY 0

136

#endif

136

#endif

137

138

#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */

138

#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */

139

#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */

139

#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */

140

#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */

140

#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */

141

#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */

141

#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */

142

143

#if defined(CONFIG_X86)

143

#if defined(CONFIG_X86)

144

# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */

144

# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */

145

#elif defined(CONFIG_PPC)

145

#elif defined(CONFIG_PPC)

146

# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */

146

# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */

147

#elif defined(CONFIG_PARISC)

147

#elif defined(CONFIG_PARISC)

148

# define VM_GROWSUP VM_ARCH_1

148

# define VM_GROWSUP VM_ARCH_1

149

#elif defined(CONFIG_METAG)

149

#elif defined(CONFIG_METAG)

150

# define VM_GROWSUP VM_ARCH_1

150

# define VM_GROWSUP VM_ARCH_1

151

#elif defined(CONFIG_IA64)

151

#elif defined(CONFIG_IA64)

152

# define VM_GROWSUP VM_ARCH_1

152

# define VM_GROWSUP VM_ARCH_1

153

#elif !defined(CONFIG_MMU)

153

#elif !defined(CONFIG_MMU)

154

# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */

154

# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */

155

#endif

155

#endif

156

157

#ifndef VM_GROWSUP

157

#ifndef VM_GROWSUP

158

# define VM_GROWSUP VM_NONE

158

# define VM_GROWSUP VM_NONE

159

#endif

159

#endif

160

161

/* Bits set in the VMA until the stack is in its final location */

161

/* Bits set in the VMA until the stack is in its final location */

162

#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ)

162

#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ)

163

164

#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */

164

#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */

165

#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS

165

#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS

166

#endif

166

#endif

167

168

#ifdef CONFIG_STACK_GROWSUP

168

#ifdef CONFIG_STACK_GROWSUP

169

#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)

169

#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)

170

#else

170

#else

171

#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)

171

#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)

172

#endif

172

#endif

173

174

/*

174

/*

175

* Special vmas that are non-mergable, non-mlock()able.

175

* Special vmas that are non-mergable, non-mlock()able.

176

* Note: mm/huge_memory.c VM_NO_THP depends on this definition.

176

* Note: mm/huge_memory.c VM_NO_THP depends on this definition.

177

*/

177

*/

178

#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)

178

#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)

179

180

/* This mask defines which mm->def_flags a process can inherit its parent */

180

/* This mask defines which mm->def_flags a process can inherit its parent */

181

#define VM_INIT_DEF_MASK VM_NOHUGEPAGE

181

#define VM_INIT_DEF_MASK VM_NOHUGEPAGE

182

183

/*

183

/*

184

* mapping from the currently active vm_flags protection bits (the

184

* mapping from the currently active vm_flags protection bits (the

185

* low four bits) to a page protection mask..

185

* low four bits) to a page protection mask..

186

*/

186

*/

187

extern pgprot_t protection_map[16];

187

extern pgprot_t protection_map[16];

188

189

#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */

189

#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */

190

#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */

190

#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */

191

#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */

191

#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */

192

#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */

192

#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */

193

#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */

193

#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */

194

#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */

194

#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */

195

#define FAULT_FLAG_TRIED 0x40 /* second try */

195

#define FAULT_FLAG_TRIED 0x40 /* second try */

196

#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */

196

#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */

197

198

/*

198

/*

199

* vm_fault is filled by the the pagefault handler and passed to the vma's

199

* vm_fault is filled by the the pagefault handler and passed to the vma's

200

* ->fault function. The vma's ->fault is responsible for returning a bitmask

200

* ->fault function. The vma's ->fault is responsible for returning a bitmask

201

* of VM_FAULT_xxx flags that give details about how the fault was handled.

201

* of VM_FAULT_xxx flags that give details about how the fault was handled.

202

*

202

*

203

* pgoff should be used in favour of virtual_address, if possible. If pgoff

203

* pgoff should be used in favour of virtual_address, if possible. If pgoff

204

* is used, one may implement ->remap_pages to get nonlinear mapping support.

204

* is used, one may implement ->remap_pages to get nonlinear mapping support.

205

*/

205

*/

206

struct vm_fault {

206

struct vm_fault {

207

unsigned int flags; /* FAULT_FLAG_xxx flags */

207

unsigned int flags; /* FAULT_FLAG_xxx flags */

208

pgoff_t pgoff; /* Logical page offset based on vma */

208

pgoff_t pgoff; /* Logical page offset based on vma */

209

void __user *virtual_address; /* Faulting virtual address */

209

void __user *virtual_address; /* Faulting virtual address */

210

211

struct page *page; /* ->fault handlers should return a

211

struct page *page; /* ->fault handlers should return a

212

* page here, unless VM_FAULT_NOPAGE

212

* page here, unless VM_FAULT_NOPAGE

213

* is set (which is also implied by

213

* is set (which is also implied by

214

* VM_FAULT_ERROR).

214

* VM_FAULT_ERROR).

215

*/

215

*/

216

/* for ->map_pages() only */

216

/* for ->map_pages() only */

217

pgoff_t max_pgoff; /* map pages for offset from pgoff till

217

pgoff_t max_pgoff; /* map pages for offset from pgoff till

218

* max_pgoff inclusive */

218

* max_pgoff inclusive */

219

pte_t *pte; /* pte entry associated with ->pgoff */

219

pte_t *pte; /* pte entry associated with ->pgoff */

220

};

220

};

221

222

/*

222

/*

223

* These are the virtual MM functions - opening of an area, closing and

223

* These are the virtual MM functions - opening of an area, closing and

224

* unmapping it (needed to keep files on disk up-to-date etc), pointer

224

* unmapping it (needed to keep files on disk up-to-date etc), pointer

225

* to the functions called when a no-page or a wp-page exception occurs.

225

* to the functions called when a no-page or a wp-page exception occurs.

226

*/

226

*/

227

struct vm_operations_struct {

227

struct vm_operations_struct {

228

void (*open)(struct vm_area_struct * area);

228

void (*open)(struct vm_area_struct * area);

229

void (*close)(struct vm_area_struct * area);

229

void (*close)(struct vm_area_struct * area);

230

int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);

230

int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);

231

void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);

231

void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);

232

233

/* notification that a previously read-only page is about to become

233

/* notification that a previously read-only page is about to become

234

* writable, if an error is returned it will cause a SIGBUS */

234

* writable, if an error is returned it will cause a SIGBUS */

235

int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);

235

int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);

236

237

/* called by access_process_vm when get_user_pages() fails, typically

237

/* called by access_process_vm when get_user_pages() fails, typically

238

* for use by special VMAs that can switch between memory and hardware

238

* for use by special VMAs that can switch between memory and hardware

239

*/

239

*/

240

int (*access)(struct vm_area_struct *vma, unsigned long addr,

240

int (*access)(struct vm_area_struct *vma, unsigned long addr,

241

void *buf, int len, int write);

241

void *buf, int len, int write);

242

#ifdef CONFIG_NUMA

242

#ifdef CONFIG_NUMA

243

/*

243

/*

244

* set_policy() op must add a reference to any non-NULL @new mempolicy

244

* set_policy() op must add a reference to any non-NULL @new mempolicy

245

* to hold the policy upon return. Caller should pass NULL @new to

245

* to hold the policy upon return. Caller should pass NULL @new to

246

* remove a policy and fall back to surrounding context--i.e. do not

246

* remove a policy and fall back to surrounding context--i.e. do not

247

* install a MPOL_DEFAULT policy, nor the task or system default

247

* install a MPOL_DEFAULT policy, nor the task or system default

248

* mempolicy.

248

* mempolicy.

249

*/

249

*/

250

int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);

250

int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);

251

252

/*

252

/*

253

* get_policy() op must add reference [mpol_get()] to any policy at

253

* get_policy() op must add reference [mpol_get()] to any policy at

254

* (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure

254

* (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure

255

* in mm/mempolicy.c will do this automatically.

255

* in mm/mempolicy.c will do this automatically.

256

* get_policy() must NOT add a ref if the policy at (vma,addr) is not

256

* get_policy() must NOT add a ref if the policy at (vma,addr) is not

257

* marked as MPOL_SHARED. vma policies are protected by the mmap_sem.

257

* marked as MPOL_SHARED. vma policies are protected by the mmap_sem.

258

* If no [shared/vma] mempolicy exists at the addr, get_policy() op

258

* If no [shared/vma] mempolicy exists at the addr, get_policy() op

259

* must return NULL--i.e., do not "fallback" to task or system default

259

* must return NULL--i.e., do not "fallback" to task or system default

260

* policy.

260

* policy.

261

*/

261

*/

262

struct mempolicy *(*get_policy)(struct vm_area_struct *vma,

262

struct mempolicy *(*get_policy)(struct vm_area_struct *vma,

263

unsigned long addr);

263

unsigned long addr);

264

int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from,

264

int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from,

265

const nodemask_t *to, unsigned long flags);

265

const nodemask_t *to, unsigned long flags);

266

#endif

266

#endif

267

/* called by sys_remap_file_pages() to populate non-linear mapping */

267

/* called by sys_remap_file_pages() to populate non-linear mapping */

268

int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,

268

int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,

269

unsigned long size, pgoff_t pgoff);

269

unsigned long size, pgoff_t pgoff);

270

};

270

};

271

272

struct mmu_gather;

272

struct mmu_gather;

273

struct inode;

273

struct inode;

274

275

#define page_private(page) ((page)->private)

275

#define page_private(page) ((page)->private)

276

#define set_page_private(page, v) ((page)->private = (v))

276

#define set_page_private(page, v) ((page)->private = (v))

277

278

/* It's valid only if the page is free path or free_list */

278

/* It's valid only if the page is free path or free_list */

279

static inline void set_freepage_migratetype(struct page *page, int migratetype)

279

static inline void set_freepage_migratetype(struct page *page, int migratetype)

280

{

280

{

281

page->index = migratetype;

281

page->index = migratetype;

282

}

282

}

283

284

/* It's valid only if the page is free path or free_list */

284

/* It's valid only if the page is free path or free_list */

285

static inline int get_freepage_migratetype(struct page *page)

285

static inline int get_freepage_migratetype(struct page *page)

286

{

286

{

287

return page->index;

287

return page->index;

288

}

288

}

289

290

/*

290

/*

291

* FIXME: take this include out, include page-flags.h in

291

* FIXME: take this include out, include page-flags.h in

292

* files which need it (119 of them)

292

* files which need it (119 of them)

293

*/

293

*/

294

#include <linux/page-flags.h>

294

#include <linux/page-flags.h>

295

#include <linux/huge_mm.h>

295

#include <linux/huge_mm.h>

296

297

/*

297

/*

298

* Methods to modify the page usage count.

298

* Methods to modify the page usage count.

299

*

299

*

300

* What counts for a page usage:

300

* What counts for a page usage:

301

* - cache mapping (page->mapping)

301

* - cache mapping (page->mapping)

302

* - private data (page->private)

302

* - private data (page->private)

303

* - page mapped in a task's page tables, each mapping

303

* - page mapped in a task's page tables, each mapping

304

* is counted separately

304

* is counted separately

305

*

305

*

306

* Also, many kernel routines increase the page count before a critical

306

* Also, many kernel routines increase the page count before a critical

307

* routine so they can be sure the page doesn't go away from under them.

307

* routine so they can be sure the page doesn't go away from under them.

308

*/

308

*/

309

310

/*

310

/*

311

* Drop a ref, return true if the refcount fell to zero (the page has no users)

311

* Drop a ref, return true if the refcount fell to zero (the page has no users)

312

*/

312

*/

313

static inline int put_page_testzero(struct page *page)

313

static inline int put_page_testzero(struct page *page)

314

{

314

{

315

VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0, page);

315

VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0, page);

316

return atomic_dec_and_test(&page->_count);

316

return atomic_dec_and_test(&page->_count);

317

}

317

}

318

319

/*

319

/*

320

* Try to grab a ref unless the page has a refcount of zero, return false if

320

* Try to grab a ref unless the page has a refcount of zero, return false if

321

* that is the case.

321

* that is the case.

322

* This can be called when MMU is off so it must not access

322

* This can be called when MMU is off so it must not access

323

* any of the virtual mappings.

323

* any of the virtual mappings.

324

*/

324

*/

325

static inline int get_page_unless_zero(struct page *page)

325

static inline int get_page_unless_zero(struct page *page)

326

{

326

{

327

return atomic_inc_not_zero(&page->_count);

327

return atomic_inc_not_zero(&page->_count);

328

}

328

}

329

330

/*

330

/*

331

* Try to drop a ref unless the page has a refcount of one, return false if

331

* Try to drop a ref unless the page has a refcount of one, return false if

332

* that is the case.

332

* that is the case.

333

* This is to make sure that the refcount won't become zero after this drop.

333

* This is to make sure that the refcount won't become zero after this drop.

334

* This can be called when MMU is off so it must not access

334

* This can be called when MMU is off so it must not access

335

* any of the virtual mappings.

335

* any of the virtual mappings.

336

*/

336

*/

337

static inline int put_page_unless_one(struct page *page)

337

static inline int put_page_unless_one(struct page *page)

338

{

338

{

339

return atomic_add_unless(&page->_count, -1, 1);

339

return atomic_add_unless(&page->_count, -1, 1);

340

}

340

}

341

342

extern int page_is_ram(unsigned long pfn);

342

extern int page_is_ram(unsigned long pfn);

343

344

/* Support for virtually mapped pages */

344

/* Support for virtually mapped pages */

345

struct page *vmalloc_to_page(const void *addr);

345

struct page *vmalloc_to_page(const void *addr);

346

unsigned long vmalloc_to_pfn(const void *addr);

346

unsigned long vmalloc_to_pfn(const void *addr);

347

348

/*

348

/*

349

* Determine if an address is within the vmalloc range

349

* Determine if an address is within the vmalloc range

350

*

350

*

351

* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there

351

* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there

352

* is no special casing required.

352

* is no special casing required.

353

*/

353

*/

354

static inline int is_vmalloc_addr(const void *x)

354

static inline int is_vmalloc_addr(const void *x)

355

{

355

{

356

#ifdef CONFIG_MMU

356

#ifdef CONFIG_MMU

357

unsigned long addr = (unsigned long)x;

357

unsigned long addr = (unsigned long)x;

358

359

return addr >= VMALLOC_START && addr < VMALLOC_END;

359

return addr >= VMALLOC_START && addr < VMALLOC_END;

360

#else

360

#else

361

return 0;

361

return 0;

362

#endif

362

#endif

363

}

363

}

364

#ifdef CONFIG_MMU

364

#ifdef CONFIG_MMU

365

extern int is_vmalloc_or_module_addr(const void *x);

365

extern int is_vmalloc_or_module_addr(const void *x);

366

#else

366

#else

367

static inline int is_vmalloc_or_module_addr(const void *x)

367

static inline int is_vmalloc_or_module_addr(const void *x)

368

{

368

{

369

return 0;

369

return 0;

370

}

370

}

371

#endif

371

#endif

372

373

static inline void compound_lock(struct page *page)

373

static inline void compound_lock(struct page *page)

374

{

374

{

375

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

375

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

376

VM_BUG_ON_PAGE(PageSlab(page), page);

376

VM_BUG_ON_PAGE(PageSlab(page), page);

377

bit_spin_lock(PG_compound_lock, &page->flags);

377

bit_spin_lock(PG_compound_lock, &page->flags);

378

#endif

378

#endif

379

}

379

}

380

381

static inline void compound_unlock(struct page *page)

381

static inline void compound_unlock(struct page *page)

382

{

382

{

383

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

383

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

384

VM_BUG_ON_PAGE(PageSlab(page), page);

384

VM_BUG_ON_PAGE(PageSlab(page), page);

385

bit_spin_unlock(PG_compound_lock, &page->flags);

385

bit_spin_unlock(PG_compound_lock, &page->flags);

386

#endif

386

#endif

387

}

387

}

388

389

static inline unsigned long compound_lock_irqsave(struct page *page)

389

static inline unsigned long compound_lock_irqsave(struct page *page)

390

{

390

{

391

unsigned long uninitialized_var(flags);

391

unsigned long uninitialized_var(flags);

392

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

392

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

393

local_irq_save(flags);

393

local_irq_save(flags);

394

compound_lock(page);

394

compound_lock(page);

395

#endif

395

#endif

396

return flags;

396

return flags;

397

}

397

}

398

399

static inline void compound_unlock_irqrestore(struct page *page,

399

static inline void compound_unlock_irqrestore(struct page *page,

400

unsigned long flags)

400

unsigned long flags)

401

{

401

{

402

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

402

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

403

compound_unlock(page);

403

compound_unlock(page);

404

local_irq_restore(flags);

404

local_irq_restore(flags);

405

#endif

405

#endif

406

}

406

}

407

408

static inline struct page *compound_head(struct page *page)

408

static inline struct page *compound_head(struct page *page)

409

{

409

{

410

if (unlikely(PageTail(page))) {

410

if (unlikely(PageTail(page))) {

411

struct page *head = page->first_page;

411

struct page *head = page->first_page;

412

413

/*

413

/*

414

* page->first_page may be a dangling pointer to an old

414

* page->first_page may be a dangling pointer to an old

415

* compound page, so recheck that it is still a tail

415

* compound page, so recheck that it is still a tail

416

* page before returning.

416

* page before returning.

417

*/

417

*/

418

smp_rmb();

418

smp_rmb();

419

if (likely(PageTail(page)))

419

if (likely(PageTail(page)))

420

return head;

420

return head;

421

}

421

}

422

return page;

422

return page;

423

}

423

}

424

425

/*

425

/*

426

* The atomic page->_mapcount, starts from -1: so that transitions

426

* The atomic page->_mapcount, starts from -1: so that transitions

427

* both from it and to it can be tracked, using atomic_inc_and_test

427

* both from it and to it can be tracked, using atomic_inc_and_test

428

* and atomic_add_negative(-1).

428

* and atomic_add_negative(-1).

429

*/

429

*/

430

static inline void page_mapcount_reset(struct page *page)

430

static inline void page_mapcount_reset(struct page *page)

431

{

431

{

432

atomic_set(&(page)->_mapcount, -1);

432

atomic_set(&(page)->_mapcount, -1);

433

}

433

}

434

435

static inline int page_mapcount(struct page *page)

435

static inline int page_mapcount(struct page *page)

436

{

436

{

437

return atomic_read(&(page)->_mapcount) + 1;

437

return atomic_read(&(page)->_mapcount) + 1;

438

}

438

}

439

440

static inline int page_count(struct page *page)

440

static inline int page_count(struct page *page)

441

{

441

{

442

return atomic_read(&compound_head(page)->_count);

442

return atomic_read(&compound_head(page)->_count);

443

}

443

}

444

445

#ifdef CONFIG_HUGETLB_PAGE

445

#ifdef CONFIG_HUGETLB_PAGE

446

extern int PageHeadHuge(struct page *page_head);

446

extern int PageHeadHuge(struct page *page_head);

447

#else /* CONFIG_HUGETLB_PAGE */

447

#else /* CONFIG_HUGETLB_PAGE */

448

static inline int PageHeadHuge(struct page *page_head)

448

static inline int PageHeadHuge(struct page *page_head)

449

{

449

{

450

return 0;

450

return 0;

451

}

451

}

452

#endif /* CONFIG_HUGETLB_PAGE */

452

#endif /* CONFIG_HUGETLB_PAGE */

453

454

static inline bool __compound_tail_refcounted(struct page *page)

454

static inline bool __compound_tail_refcounted(struct page *page)

455

{

455

{

456

return !PageSlab(page) && !PageHeadHuge(page);

456

return !PageSlab(page) && !PageHeadHuge(page);

457

}

457

}

458

459

/*

459

/*

460

* This takes a head page as parameter and tells if the

460

* This takes a head page as parameter and tells if the

461

* tail page reference counting can be skipped.

461

* tail page reference counting can be skipped.

462

*

462

*

463

* For this to be safe, PageSlab and PageHeadHuge must remain true on

463

* For this to be safe, PageSlab and PageHeadHuge must remain true on

464

* any given page where they return true here, until all tail pins

464

* any given page where they return true here, until all tail pins

465

* have been released.

465

* have been released.

466

*/

466

*/

467

static inline bool compound_tail_refcounted(struct page *page)

467

static inline bool compound_tail_refcounted(struct page *page)

468

{

468

{

469

VM_BUG_ON_PAGE(!PageHead(page), page);

469

VM_BUG_ON_PAGE(!PageHead(page), page);

470

return __compound_tail_refcounted(page);

470

return __compound_tail_refcounted(page);

471

}

471

}

472

473

static inline void get_huge_page_tail(struct page *page)

473

static inline void get_huge_page_tail(struct page *page)

474

{

474

{

475

/*

475

/*

476

* __split_huge_page_refcount() cannot run from under us.

476

* __split_huge_page_refcount() cannot run from under us.

477

*/

477

*/

478

VM_BUG_ON_PAGE(!PageTail(page), page);

478

VM_BUG_ON_PAGE(!PageTail(page), page);

479

VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);

479

VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);

480

VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);

480

VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);

481

if (compound_tail_refcounted(page->first_page))

481

if (compound_tail_refcounted(page->first_page))

482

atomic_inc(&page->_mapcount);

482

atomic_inc(&page->_mapcount);

483

}

483

}

484

485

extern bool __get_page_tail(struct page *page);

485

extern bool __get_page_tail(struct page *page);

486

487

static inline void get_page(struct page *page)

487

static inline void get_page(struct page *page)

488

{

488

{

489

if (unlikely(PageTail(page)))

489

if (unlikely(PageTail(page)))

490

if (likely(__get_page_tail(page)))

490

if (likely(__get_page_tail(page)))

491

return;

491

return;

492

/*

492

/*

493

* Getting a normal page or the head of a compound page

493

* Getting a normal page or the head of a compound page

494

* requires to already have an elevated page->_count.

494

* requires to already have an elevated page->_count.

495

*/

495

*/

496

VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);

496

VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);

497

atomic_inc(&page->_count);

497

atomic_inc(&page->_count);

498

}

498

}

499

500

static inline struct page *virt_to_head_page(const void *x)

500

static inline struct page *virt_to_head_page(const void *x)

501

{

501

{

502

struct page *page = virt_to_page(x);

502

struct page *page = virt_to_page(x);

503

return compound_head(page);

503

return compound_head(page);

504

}

504

}

505

506

/*

506

/*

507

* Setup the page count before being freed into the page allocator for

507

* Setup the page count before being freed into the page allocator for

508

* the first time (boot or memory hotplug)

508

* the first time (boot or memory hotplug)

509

*/

509

*/

510

static inline void init_page_count(struct page *page)

510

static inline void init_page_count(struct page *page)

511

{

511

{

512

atomic_set(&page->_count, 1);

512

atomic_set(&page->_count, 1);

513

}

513

}

514

515

/*

515

/*

516

* PageBuddy() indicate that the page is free and in the buddy system

516

* PageBuddy() indicate that the page is free and in the buddy system

517

* (see mm/page_alloc.c).

517

* (see mm/page_alloc.c).

518

*

518

*

519

* PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to

519

* PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to

520

* -2 so that an underflow of the page_mapcount() won't be mistaken

520

* -2 so that an underflow of the page_mapcount() won't be mistaken

521

* for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very

521

* for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very

522

* efficiently by most CPU architectures.

522

* efficiently by most CPU architectures.

523

*/

523

*/

524

#define PAGE_BUDDY_MAPCOUNT_VALUE (-128)

524

#define PAGE_BUDDY_MAPCOUNT_VALUE (-128)

525

526

static inline int PageBuddy(struct page *page)

526

static inline int PageBuddy(struct page *page)

527

{

527

{

528

return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE;

528

return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE;

529

}

529

}

530

531

static inline void __SetPageBuddy(struct page *page)

531

static inline void __SetPageBuddy(struct page *page)

532

{

532

{

533

VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);

533

VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);

534

atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);

534

atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);

535

}

535

}

536

537

static inline void __ClearPageBuddy(struct page *page)

537

static inline void __ClearPageBuddy(struct page *page)

538

{

538

{

539

VM_BUG_ON_PAGE(!PageBuddy(page), page);

539

VM_BUG_ON_PAGE(!PageBuddy(page), page);

540

atomic_set(&page->_mapcount, -1);

540

atomic_set(&page->_mapcount, -1);

541

}

541

}

542

543

void put_page(struct page *page);

543

void put_page(struct page *page);

544

void put_pages_list(struct list_head *pages);

544

void put_pages_list(struct list_head *pages);

545

546

void split_page(struct page *page, unsigned int order);

546

void split_page(struct page *page, unsigned int order);

547

int split_free_page(struct page *page);

547

int split_free_page(struct page *page);

548

549

/*

549

/*

550

* Compound pages have a destructor function. Provide a

550

* Compound pages have a destructor function. Provide a

551

* prototype for that function and accessor functions.

551

* prototype for that function and accessor functions.

552

* These are _only_ valid on the head of a PG_compound page.

552

* These are _only_ valid on the head of a PG_compound page.

553

*/

553

*/

554

typedef void compound_page_dtor(struct page *);

554

typedef void compound_page_dtor(struct page *);

555

556

static inline void set_compound_page_dtor(struct page *page,

556

static inline void set_compound_page_dtor(struct page *page,

557

compound_page_dtor *dtor)

557

compound_page_dtor *dtor)

558

{

558

{

559

page[1].lru.next = (void *)dtor;

559

page[1].lru.next = (void *)dtor;

560

}

560

}

561

562

static inline compound_page_dtor *get_compound_page_dtor(struct page *page)

562

static inline compound_page_dtor *get_compound_page_dtor(struct page *page)

563

{

563

{

564

return (compound_page_dtor *)page[1].lru.next;

564

return (compound_page_dtor *)page[1].lru.next;

565

}

565

}

566

567

static inline int compound_order(struct page *page)

567

static inline int compound_order(struct page *page)

568

{

568

{

569

if (!PageHead(page))

569

if (!PageHead(page))

570

return 0;

570

return 0;

571

return (unsigned long)page[1].lru.prev;

571

return (unsigned long)page[1].lru.prev;

572

}

572

}

573

574

static inline void set_compound_order(struct page *page, unsigned long order)

574

static inline void set_compound_order(struct page *page, unsigned long order)

575

{

575

{

576

page[1].lru.prev = (void *)order;

576

page[1].lru.prev = (void *)order;

577

}

577

}

578

579

#ifdef CONFIG_MMU

579

#ifdef CONFIG_MMU

580

/*

580

/*

581

* Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when

581

* Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when

582

* servicing faults for write access. In the normal case, do always want

582

* servicing faults for write access. In the normal case, do always want

583

* pte_mkwrite. But get_user_pages can cause write faults for mappings

583

* pte_mkwrite. But get_user_pages can cause write faults for mappings

584

* that do not have writing enabled, when used by access_process_vm.

584

* that do not have writing enabled, when used by access_process_vm.

585

*/

585

*/

586

static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)

586

static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)

587

{

587

{

588

if (likely(vma->vm_flags & VM_WRITE))

588

if (likely(vma->vm_flags & VM_WRITE))

589

pte = pte_mkwrite(pte);

589

pte = pte_mkwrite(pte);

590

return pte;

590

return pte;

591

}

591

}

592

593

void do_set_pte(struct vm_area_struct *vma, unsigned long address,

593

void do_set_pte(struct vm_area_struct *vma, unsigned long address,

594

struct page *page, pte_t *pte, bool write, bool anon);

594

struct page *page, pte_t *pte, bool write, bool anon);

595

#endif

595

#endif

596

597

/*

597

/*

598

* Multiple processes may "see" the same page. E.g. for untouched

598

* Multiple processes may "see" the same page. E.g. for untouched

599

* mappings of /dev/null, all processes see the same page full of

599

* mappings of /dev/null, all processes see the same page full of

600

* zeroes, and text pages of executables and shared libraries have

600

* zeroes, and text pages of executables and shared libraries have

601

* only one copy in memory, at most, normally.

601

* only one copy in memory, at most, normally.

602

*

602

*

603

* For the non-reserved pages, page_count(page) denotes a reference count.

603

* For the non-reserved pages, page_count(page) denotes a reference count.

604

* page_count() == 0 means the page is free. page->lru is then used for

604

* page_count() == 0 means the page is free. page->lru is then used for

605

* freelist management in the buddy allocator.

605

* freelist management in the buddy allocator.

606

* page_count() > 0 means the page has been allocated.

606

* page_count() > 0 means the page has been allocated.

607

*

607

*

608

* Pages are allocated by the slab allocator in order to provide memory

608

* Pages are allocated by the slab allocator in order to provide memory

609

* to kmalloc and kmem_cache_alloc. In this case, the management of the

609

* to kmalloc and kmem_cache_alloc. In this case, the management of the

610

* page, and the fields in 'struct page' are the responsibility of mm/slab.c

610

* page, and the fields in 'struct page' are the responsibility of mm/slab.c

611

* unless a particular usage is carefully commented. (the responsibility of

611

* unless a particular usage is carefully commented. (the responsibility of

612

* freeing the kmalloc memory is the caller's, of course).

612

* freeing the kmalloc memory is the caller's, of course).

613

*

613

*

614

* A page may be used by anyone else who does a __get_free_page().

614

* A page may be used by anyone else who does a __get_free_page().

615

* In this case, page_count still tracks the references, and should only

615

* In this case, page_count still tracks the references, and should only

616

* be used through the normal accessor functions. The top bits of page->flags

616

* be used through the normal accessor functions. The top bits of page->flags

617

* and page->virtual store page management information, but all other fields

617

* and page->virtual store page management information, but all other fields

618

* are unused and could be used privately, carefully. The management of this

618

* are unused and could be used privately, carefully. The management of this

619

* page is the responsibility of the one who allocated it, and those who have

619

* page is the responsibility of the one who allocated it, and those who have

620

* subsequently been given references to it.

620

* subsequently been given references to it.

621

*

621

*

622

* The other pages (we may call them "pagecache pages") are completely

622

* The other pages (we may call them "pagecache pages") are completely

623

* managed by the Linux memory manager: I/O, buffers, swapping etc.

623

* managed by the Linux memory manager: I/O, buffers, swapping etc.

624

* The following discussion applies only to them.

624

* The following discussion applies only to them.

625

*

625

*

626

* A pagecache page contains an opaque `private' member, which belongs to the

626

* A pagecache page contains an opaque `private' member, which belongs to the

627

* page's address_space. Usually, this is the address of a circular list of

627

* page's address_space. Usually, this is the address of a circular list of

628

* the page's disk buffers. PG_private must be set to tell the VM to call

628

* the page's disk buffers. PG_private must be set to tell the VM to call

629

* into the filesystem to release these pages.

629

* into the filesystem to release these pages.

630

*

630

*

631

* A page may belong to an inode's memory mapping. In this case, page->mapping

631

* A page may belong to an inode's memory mapping. In this case, page->mapping

632

* is the pointer to the inode, and page->index is the file offset of the page,

632

* is the pointer to the inode, and page->index is the file offset of the page,

633

* in units of PAGE_CACHE_SIZE.

633

* in units of PAGE_CACHE_SIZE.

634

*

634

*

635

* If pagecache pages are not associated with an inode, they are said to be

635

* If pagecache pages are not associated with an inode, they are said to be

636

* anonymous pages. These may become associated with the swapcache, and in that

636

* anonymous pages. These may become associated with the swapcache, and in that

637

* case PG_swapcache is set, and page->private is an offset into the swapcache.

637

* case PG_swapcache is set, and page->private is an offset into the swapcache.

638

*

638

*

639

* In either case (swapcache or inode backed), the pagecache itself holds one

639

* In either case (swapcache or inode backed), the pagecache itself holds one

640

* reference to the page. Setting PG_private should also increment the

640

* reference to the page. Setting PG_private should also increment the

641

* refcount. The each user mapping also has a reference to the page.

641

* refcount. The each user mapping also has a reference to the page.

642

*

642

*

643

* The pagecache pages are stored in a per-mapping radix tree, which is

643

* The pagecache pages are stored in a per-mapping radix tree, which is

644

* rooted at mapping->page_tree, and indexed by offset.

644

* rooted at mapping->page_tree, and indexed by offset.

645

* Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space

645

* Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space

646

* lists, we instead now tag pages as dirty/writeback in the radix tree.

646

* lists, we instead now tag pages as dirty/writeback in the radix tree.

647

*

647

*

648

* All pagecache pages may be subject to I/O:

648

* All pagecache pages may be subject to I/O:

649

* - inode pages may need to be read from disk,

649

* - inode pages may need to be read from disk,

650

* - inode pages which have been modified and are MAP_SHARED may need

650

* - inode pages which have been modified and are MAP_SHARED may need

651

* to be written back to the inode on disk,

651

* to be written back to the inode on disk,

652

* - anonymous pages (including MAP_PRIVATE file mappings) which have been

652

* - anonymous pages (including MAP_PRIVATE file mappings) which have been

653

* modified may need to be swapped out to swap space and (later) to be read

653

* modified may need to be swapped out to swap space and (later) to be read

654

* back into memory.

654

* back into memory.

655

*/

655

*/

656

657

/*

657

/*

658

* The zone field is never updated after free_area_init_core()

658

* The zone field is never updated after free_area_init_core()

659

* sets it, so none of the operations on it need to be atomic.

659

* sets it, so none of the operations on it need to be atomic.

660

*/

660

*/

661

662

/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */

662

/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */

663

#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)

663

#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)

664

#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH)

664

#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH)

665

#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)

665

#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)

666

#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)

666

#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)

667

668

/*

668

/*

669

* Define the bit shifts to access each section. For non-existent

669

* Define the bit shifts to access each section. For non-existent

670

* sections we define the shift as 0; that plus a 0 mask ensures

670

* sections we define the shift as 0; that plus a 0 mask ensures

671

* the compiler will optimise away reference to them.

671

* the compiler will optimise away reference to them.

672

*/

672

*/

673

#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))

673

#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))

674

#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0))

674

#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0))

675

#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0))

675

#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0))

676

#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))

676

#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))

677

678

/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */

678

/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */

679

#ifdef NODE_NOT_IN_PAGE_FLAGS

679

#ifdef NODE_NOT_IN_PAGE_FLAGS

680

#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT)

680

#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT)

681

#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \

681

#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \

682

SECTIONS_PGOFF : ZONES_PGOFF)

682

SECTIONS_PGOFF : ZONES_PGOFF)

683

#else

683

#else

684

#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT)

684

#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT)

685

#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \

685

#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \

686

NODES_PGOFF : ZONES_PGOFF)

686

NODES_PGOFF : ZONES_PGOFF)

687

#endif

687

#endif

688

689

#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0))

689

#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0))

690

691

#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS

691

#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS

692

#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS

692

#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS

693

#endif

693

#endif

694

695

#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)

695

#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)

696

#define NODES_MASK ((1UL << NODES_WIDTH) - 1)

696

#define NODES_MASK ((1UL << NODES_WIDTH) - 1)

697

#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)

697

#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)

698

#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1)

698

#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1)

699

#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)

699

#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)

700

701

static inline enum zone_type page_zonenum(const struct page *page)

701

static inline enum zone_type page_zonenum(const struct page *page)

702

{

702

{

703

return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;

703

return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;

704

}

704

}

705

706

#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)

706

#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)

707

#define SECTION_IN_PAGE_FLAGS

707

#define SECTION_IN_PAGE_FLAGS

708

#endif

708

#endif

709

710

/*

710

/*

711

* The identification function is mainly used by the buddy allocator for

711

* The identification function is mainly used by the buddy allocator for

712

* determining if two pages could be buddies. We are not really identifying

712

* determining if two pages could be buddies. We are not really identifying

713

* the zone since we could be using the section number id if we do not have

713

* the zone since we could be using the section number id if we do not have

714

* node id available in page flags.

714

* node id available in page flags.

715

* We only guarantee that it will return the same value for two combinable

715

* We only guarantee that it will return the same value for two combinable

716

* pages in a zone.

716

* pages in a zone.

717

*/

717

*/

718

static inline int page_zone_id(struct page *page)

718

static inline int page_zone_id(struct page *page)

719

{

719

{

720

return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;

720

return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;

721

}

721

}

722

723

static inline int zone_to_nid(struct zone *zone)

723

static inline int zone_to_nid(struct zone *zone)

724

{

724

{

725

#ifdef CONFIG_NUMA

725

#ifdef CONFIG_NUMA

726

return zone->node;

726

return zone->node;

727

#else

727

#else

728

return 0;

728

return 0;

729

#endif

729

#endif

730

}

730

}

731

732

#ifdef NODE_NOT_IN_PAGE_FLAGS

732

#ifdef NODE_NOT_IN_PAGE_FLAGS

733

extern int page_to_nid(const struct page *page);

733

extern int page_to_nid(const struct page *page);

734

#else

734

#else

735

static inline int page_to_nid(const struct page *page)

735

static inline int page_to_nid(const struct page *page)

736

{

736

{

737

return (page->flags >> NODES_PGSHIFT) & NODES_MASK;

737

return (page->flags >> NODES_PGSHIFT) & NODES_MASK;

738

}

738

}

739

#endif

739

#endif

740

741

#ifdef CONFIG_NUMA_BALANCING

741

#ifdef CONFIG_NUMA_BALANCING

742

static inline int cpu_pid_to_cpupid(int cpu, int pid)

742

static inline int cpu_pid_to_cpupid(int cpu, int pid)

743

{

743

{

744

return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);

744

return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);

745

}

745

}

746

747

static inline int cpupid_to_pid(int cpupid)

747

static inline int cpupid_to_pid(int cpupid)

748

{

748

{

749

return cpupid & LAST__PID_MASK;

749

return cpupid & LAST__PID_MASK;

750

}

750

}

751

752

static inline int cpupid_to_cpu(int cpupid)

752

static inline int cpupid_to_cpu(int cpupid)

753

{

753

{

754

return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK;

754

return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK;

755

}

755

}

756

757

static inline int cpupid_to_nid(int cpupid)

757

static inline int cpupid_to_nid(int cpupid)

758

{

758

{

759

return cpu_to_node(cpupid_to_cpu(cpupid));

759

return cpu_to_node(cpupid_to_cpu(cpupid));

760

}

760

}

761

762

static inline bool cpupid_pid_unset(int cpupid)

762

static inline bool cpupid_pid_unset(int cpupid)

763

{

763

{

764

return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK);

764

return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK);

765

}

765

}

766

767

static inline bool cpupid_cpu_unset(int cpupid)

767

static inline bool cpupid_cpu_unset(int cpupid)

768

{

768

{

769

return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);

769

return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);

770

}

770

}

771

772

static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)

772

static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)

773

{

773

{

774

return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);

774

return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);

775

}

775

}

776

777

#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)

777

#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)

778

#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS

778

#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS

779

static inline int page_cpupid_xchg_last(struct page *page, int cpupid)

779

static inline int page_cpupid_xchg_last(struct page *page, int cpupid)

780

{

780

{

781

return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK);

781

return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK);

782

}

782

}

783

784

static inline int page_cpupid_last(struct page *page)

784

static inline int page_cpupid_last(struct page *page)

785

{

785

{

786

return page->_last_cpupid;

786

return page->_last_cpupid;

787

}

787

}

788

static inline void page_cpupid_reset_last(struct page *page)

788

static inline void page_cpupid_reset_last(struct page *page)

789

{

789

{

790

page->_last_cpupid = -1 & LAST_CPUPID_MASK;

790

page->_last_cpupid = -1 & LAST_CPUPID_MASK;

791

}

791

}

792

#else

792

#else

793

static inline int page_cpupid_last(struct page *page)

793

static inline int page_cpupid_last(struct page *page)

794

{

794

{

795

return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;

795

return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;

796

}

796

}

797

798

extern int page_cpupid_xchg_last(struct page *page, int cpupid);

798

extern int page_cpupid_xchg_last(struct page *page, int cpupid);

799

800

static inline void page_cpupid_reset_last(struct page *page)

800

static inline void page_cpupid_reset_last(struct page *page)

801

{

801

{

802

int cpupid = (1 << LAST_CPUPID_SHIFT) - 1;

802

int cpupid = (1 << LAST_CPUPID_SHIFT) - 1;

803

804

page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);

804

page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);

805

page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;

805

page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;

806

}

806

}

807

#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */

807

#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */

808

#else /* !CONFIG_NUMA_BALANCING */

808

#else /* !CONFIG_NUMA_BALANCING */

809

static inline int page_cpupid_xchg_last(struct page *page, int cpupid)

809

static inline int page_cpupid_xchg_last(struct page *page, int cpupid)

810

{

810

{

811

return page_to_nid(page); /* XXX */

811

return page_to_nid(page); /* XXX */

812

}

812

}

813

814

static inline int page_cpupid_last(struct page *page)

814

static inline int page_cpupid_last(struct page *page)

815

{

815

{

816

return page_to_nid(page); /* XXX */

816

return page_to_nid(page); /* XXX */

817

}

817

}

818

819

static inline int cpupid_to_nid(int cpupid)

819

static inline int cpupid_to_nid(int cpupid)

820

{

820

{

821

return -1;

821

return -1;

822

}

822

}

823

824

static inline int cpupid_to_pid(int cpupid)

824

static inline int cpupid_to_pid(int cpupid)

825

{

825

{

826

return -1;

826

return -1;

827

}

827

}

828

829

static inline int cpupid_to_cpu(int cpupid)

829

static inline int cpupid_to_cpu(int cpupid)

830

{

830

{

831

return -1;

831

return -1;

832

}

832

}

833

834

static inline int cpu_pid_to_cpupid(int nid, int pid)

834

static inline int cpu_pid_to_cpupid(int nid, int pid)

835

{

835

{

836

return -1;

836

return -1;

837

}

837

}

838

839

static inline bool cpupid_pid_unset(int cpupid)

839

static inline bool cpupid_pid_unset(int cpupid)

840

{

840

{

841

return 1;

841

return 1;

842

}

842

}

843

844

static inline void page_cpupid_reset_last(struct page *page)

844

static inline void page_cpupid_reset_last(struct page *page)

845

{

845

{

846

}

846

}

847

848

static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)

848

static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)

849

{

849

{

850

return false;

850

return false;

851

}

851

}

852

#endif /* CONFIG_NUMA_BALANCING */

852

#endif /* CONFIG_NUMA_BALANCING */

853

854

static inline struct zone *page_zone(const struct page *page)

854

static inline struct zone *page_zone(const struct page *page)

855

{

855

{

856

return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];

856

return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];

857

}

857

}

858

859

#ifdef SECTION_IN_PAGE_FLAGS

859

#ifdef SECTION_IN_PAGE_FLAGS

860

static inline void set_page_section(struct page *page, unsigned long section)

860

static inline void set_page_section(struct page *page, unsigned long section)

861

{

861

{

862

page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);

862

page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);

863

page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;

863

page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;

864

}

864

}

865

866

static inline unsigned long page_to_section(const struct page *page)

866

static inline unsigned long page_to_section(const struct page *page)

867

{

867

{

868

return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;

868

return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;

869

}

869

}

870

#endif

870

#endif

871

872

static inline void set_page_zone(struct page *page, enum zone_type zone)

872

static inline void set_page_zone(struct page *page, enum zone_type zone)

873

{

873

{

874

page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);

874

page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);

875

page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;

875

page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;

876

}

876

}

877

878

static inline void set_page_node(struct page *page, unsigned long node)

878

static inline void set_page_node(struct page *page, unsigned long node)

879

{

879

{

880

page->flags &= ~(NODES_MASK << NODES_PGSHIFT);

880

page->flags &= ~(NODES_MASK << NODES_PGSHIFT);

881

page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;

881

page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;

882

}

882

}

883

884

static inline void set_page_links(struct page *page, enum zone_type zone,

884

static inline void set_page_links(struct page *page, enum zone_type zone,

885

unsigned long node, unsigned long pfn)

885

unsigned long node, unsigned long pfn)

886

{

886

{

887

set_page_zone(page, zone);

887

set_page_zone(page, zone);

888

set_page_node(page, node);

888

set_page_node(page, node);

889

#ifdef SECTION_IN_PAGE_FLAGS

889

#ifdef SECTION_IN_PAGE_FLAGS

890

set_page_section(page, pfn_to_section_nr(pfn));

890

set_page_section(page, pfn_to_section_nr(pfn));

891

#endif

891

#endif

892

}

892

}

893

894

/*

894

/*

895

* Some inline functions in vmstat.h depend on page_zone()

895

* Some inline functions in vmstat.h depend on page_zone()

896

*/

896

*/

897

#include <linux/vmstat.h>

897

#include <linux/vmstat.h>

898

899

static __always_inline void *lowmem_page_address(const struct page *page)

899

static __always_inline void *lowmem_page_address(const struct page *page)

900

{

900

{

901

return __va(PFN_PHYS(page_to_pfn(page)));

901

return __va(PFN_PHYS(page_to_pfn(page)));

902

}

902

}

903

904

#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)

904

#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)

905

#define HASHED_PAGE_VIRTUAL

905

#define HASHED_PAGE_VIRTUAL

906

#endif

906

#endif

907

908

#if defined(WANT_PAGE_VIRTUAL)

908

#if defined(WANT_PAGE_VIRTUAL)

909

static inline void *page_address(const struct page *page)

909

static inline void *page_address(const struct page *page)

910

{

910

{

911

return page->virtual;

911

return page->virtual;

912

}

912

}

913

static inline void set_page_address(struct page *page, void *address)

913

static inline void set_page_address(struct page *page, void *address)

914

{

914

{

915

page->virtual = address;

915

page->virtual = address;

916

}

916

}

917

#define page_address_init() do { } while(0)

917

#define page_address_init() do { } while(0)

918

#endif

918

#endif

919

920

#if defined(HASHED_PAGE_VIRTUAL)

920

#if defined(HASHED_PAGE_VIRTUAL)

921

void *page_address(const struct page *page);

921

void *page_address(const struct page *page);

922

void set_page_address(struct page *page, void *virtual);

922

void set_page_address(struct page *page, void *virtual);

923

void page_address_init(void);

923

void page_address_init(void);

924

#endif

924

#endif

925

926

#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)

926

#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)

927

#define page_address(page) lowmem_page_address(page)

927

#define page_address(page) lowmem_page_address(page)

928

#define set_page_address(page, address) do { } while(0)

928

#define set_page_address(page, address) do { } while(0)

929

#define page_address_init() do { } while(0)

929

#define page_address_init() do { } while(0)

930

#endif

930

#endif

931

932

/*

932

/*

933

* On an anonymous page mapped into a user virtual memory area,

933

* On an anonymous page mapped into a user virtual memory area,

934

* page->mapping points to its anon_vma, not to a struct address_space;

934

* page->mapping points to its anon_vma, not to a struct address_space;

935

* with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h.

935

* with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h.

936

*

936

*

937

* On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,

937

* On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,

938

* the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit;

938

* the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit;

939

* and then page->mapping points, not to an anon_vma, but to a private

939

* and then page->mapping points, not to an anon_vma, but to a private

940

* structure which KSM associates with that merged page. See ksm.h.

940

* structure which KSM associates with that merged page. See ksm.h.

941

*

941

*

942

* PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used.

942

* PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used.

943

*

943

*

944

* Please note that, confusingly, "page_mapping" refers to the inode

944

* Please note that, confusingly, "page_mapping" refers to the inode

945

* address_space which maps the page from disk; whereas "page_mapped"

945

* address_space which maps the page from disk; whereas "page_mapped"

946

* refers to user virtual address space into which the page is mapped.

946

* refers to user virtual address space into which the page is mapped.

947

*/

947

*/

948

#define PAGE_MAPPING_ANON 1

948

#define PAGE_MAPPING_ANON 1

949

#define PAGE_MAPPING_KSM 2

949

#define PAGE_MAPPING_KSM 2

950

#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)

950

#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)

951

952

extern struct address_space *page_mapping(struct page *page);

952

extern struct address_space *page_mapping(struct page *page);

953

954

/* Neutral page->mapping pointer to address_space or anon_vma or other */

954

/* Neutral page->mapping pointer to address_space or anon_vma or other */

955

static inline void *page_rmapping(struct page *page)

955

static inline void *page_rmapping(struct page *page)

956

{

956

{

957

return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);

957

return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);

958

}

958

}

959

960

extern struct address_space *__page_file_mapping(struct page *);

960

extern struct address_space *__page_file_mapping(struct page *);

961

962

static inline

962

static inline

963

struct address_space *page_file_mapping(struct page *page)

963

struct address_space *page_file_mapping(struct page *page)

964

{

964

{

965

if (unlikely(PageSwapCache(page)))

965

if (unlikely(PageSwapCache(page)))

966

return __page_file_mapping(page);

966

return __page_file_mapping(page);

967

968

return page->mapping;

968

return page->mapping;

969

}

969

}

970

971

static inline int PageAnon(struct page *page)

971

static inline int PageAnon(struct page *page)

972

{

972

{

973

return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;

973

return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;

974

}

974

}

975

976

/*

976

/*

977

* Return the pagecache index of the passed page. Regular pagecache pages

977

* Return the pagecache index of the passed page. Regular pagecache pages

978

* use ->index whereas swapcache pages use ->private

978

* use ->index whereas swapcache pages use ->private

979

*/

979

*/

980

static inline pgoff_t page_index(struct page *page)

980

static inline pgoff_t page_index(struct page *page)

981

{

981

{

982

if (unlikely(PageSwapCache(page)))

982

if (unlikely(PageSwapCache(page)))

983

return page_private(page);

983

return page_private(page);

984

return page->index;

984

return page->index;

985

}

985

}

986

987

extern pgoff_t __page_file_index(struct page *page);

987

extern pgoff_t __page_file_index(struct page *page);

988

989

/*

989

/*

990

* Return the file index of the page. Regular pagecache pages use ->index

990

* Return the file index of the page. Regular pagecache pages use ->index

991

* whereas swapcache pages use swp_offset(->private)

991

* whereas swapcache pages use swp_offset(->private)

992

*/

992

*/

993

static inline pgoff_t page_file_index(struct page *page)

993

static inline pgoff_t page_file_index(struct page *page)

994

{

994

{

995

if (unlikely(PageSwapCache(page)))

995

if (unlikely(PageSwapCache(page)))

996

return __page_file_index(page);

996

return __page_file_index(page);

997

998

return page->index;

998

return page->index;

999

}

999

}

1000

1001

/*

1001

/*

1002

* Return true if this page is mapped into pagetables.

1002

* Return true if this page is mapped into pagetables.

1003

*/

1003

*/

1004

static inline int page_mapped(struct page *page)

1004

static inline int page_mapped(struct page *page)

1005

{

1005

{

1006

return atomic_read(&(page)->_mapcount) >= 0;

1006

return atomic_read(&(page)->_mapcount) >= 0;

1007

}

1007

}

1008

1009

/*

1009

/*

1010

* Different kinds of faults, as returned by handle_mm_fault().

1010

* Different kinds of faults, as returned by handle_mm_fault().

1011

* Used to decide whether a process gets delivered SIGBUS or

1011

* Used to decide whether a process gets delivered SIGBUS or

1012

* just gets major/minor fault counters bumped up.

1012

* just gets major/minor fault counters bumped up.

1013

*/

1013

*/

1014

1015

#define VM_FAULT_MINOR 0 /* For backwards compat. Remove me quickly. */

1015

#define VM_FAULT_MINOR 0 /* For backwards compat. Remove me quickly. */

1016

1017

#define VM_FAULT_OOM 0x0001

1017

#define VM_FAULT_OOM 0x0001

1018

#define VM_FAULT_SIGBUS 0x0002

1018

#define VM_FAULT_SIGBUS 0x0002

1019

#define VM_FAULT_MAJOR 0x0004

1019

#define VM_FAULT_MAJOR 0x0004

1020

#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */

1020

#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */

1021

#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */

1021

#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */

1022

#define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */

1022

#define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */

1023

1024

#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */

1024

#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */

1025

#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */

1025

#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */

1026

#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */

1026

#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */

1027

#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */

1027

#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */

1028

1029

#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */

1029

#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */

1030

1031

#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \

1031

#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \

1032

VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE)

1032

VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE)

1033

1034

/* Encode hstate index for a hwpoisoned large page */

1034

/* Encode hstate index for a hwpoisoned large page */

1035

#define VM_FAULT_SET_HINDEX(x) ((x) << 12)

1035

#define VM_FAULT_SET_HINDEX(x) ((x) << 12)

1036

#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)

1036

#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)

1037

1038

/*

1038

/*

1039

* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.

1039

* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.

1040

*/

1040

*/

1041

extern void pagefault_out_of_memory(void);

1041

extern void pagefault_out_of_memory(void);

1042

1043

#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)

1043

#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)

1044

1045

/*

1045

/*

1046

* Flags passed to show_mem() and show_free_areas() to suppress output in

1046

* Flags passed to show_mem() and show_free_areas() to suppress output in

1047

* various contexts.

1047

* various contexts.

1048

*/

1048

*/

1049

#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */

1049

#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */

1050

1051

extern void show_free_areas(unsigned int flags);

1051

extern void show_free_areas(unsigned int flags);

1052

extern bool skip_free_areas_node(unsigned int flags, int nid);

1052

extern bool skip_free_areas_node(unsigned int flags, int nid);

1053

1054

int shmem_zero_setup(struct vm_area_struct *);

1054

int shmem_zero_setup(struct vm_area_struct *);

1055

#ifdef CONFIG_SHMEM

1055

#ifdef CONFIG_SHMEM

1056

bool shmem_mapping(struct address_space *mapping);

1056

bool shmem_mapping(struct address_space *mapping);

1057

#else

1057

#else

1058

static inline bool shmem_mapping(struct address_space *mapping)

1058

static inline bool shmem_mapping(struct address_space *mapping)

1059

{

1059

{

1060

return false;

1060

return false;

1061

}

1061

}

1062

#endif

1062

#endif

1063

1064

extern int can_do_mlock(void);

1064

extern int can_do_mlock(void);

1065

extern int user_shm_lock(size_t, struct user_struct *);

1065

extern int user_shm_lock(size_t, struct user_struct *);

1066

extern void user_shm_unlock(size_t, struct user_struct *);

1066

extern void user_shm_unlock(size_t, struct user_struct *);

1067

1068

/*

1068

/*

1069

* Parameter block passed down to zap_pte_range in exceptional cases.

1069

* Parameter block passed down to zap_pte_range in exceptional cases.

1070

*/

1070

*/

1071

struct zap_details {

1071

struct zap_details {

1072

struct vm_area_struct *nonlinear_vma; /* Check page->index if set */

1072

struct vm_area_struct *nonlinear_vma; /* Check page->index if set */

1073

struct address_space *check_mapping; /* Check page->mapping if set */

1073

struct address_space *check_mapping; /* Check page->mapping if set */

1074

pgoff_t first_index; /* Lowest page->index to unmap */

1074

pgoff_t first_index; /* Lowest page->index to unmap */

1075

pgoff_t last_index; /* Highest page->index to unmap */

1075

pgoff_t last_index; /* Highest page->index to unmap */

1076

};

1076

};

1077

1078

struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,

1078

struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,

1079

pte_t pte);

1079

pte_t pte);

1080

1081

int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,

1081

int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,

1082

unsigned long size);

1082

unsigned long size);

1083

void zap_page_range(struct vm_area_struct *vma, unsigned long address,

1083

void zap_page_range(struct vm_area_struct *vma, unsigned long address,

1084

unsigned long size, struct zap_details *);

1084

unsigned long size, struct zap_details *);

1085

void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,

1085

void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,

1086

unsigned long start, unsigned long end);

1086

unsigned long start, unsigned long end);

1087

1088

/**

1088

/**

1089

* mm_walk - callbacks for walk_page_range

1089

* mm_walk - callbacks for walk_page_range

1090

* @pgd_entry: if set, called for each non-empty PGD (top-level) entry

1090

* @pgd_entry: if set, called for each non-empty PGD (top-level) entry

1091

* @pud_entry: if set, called for each non-empty PUD (2nd-level) entry

1091

* @pud_entry: if set, called for each non-empty PUD (2nd-level) entry

1092

* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry

1092

* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry

1093

* this handler is required to be able to handle

1093

* this handler is required to be able to handle

1094

* pmd_trans_huge() pmds. They may simply choose to

1094

* pmd_trans_huge() pmds. They may simply choose to

1095

* split_huge_page() instead of handling it explicitly.

1095

* split_huge_page() instead of handling it explicitly.

1096

* @pte_entry: if set, called for each non-empty PTE (4th-level) entry

1096

* @pte_entry: if set, called for each non-empty PTE (4th-level) entry

1097

* @pte_hole: if set, called for each hole at all levels

1097

* @pte_hole: if set, called for each hole at all levels

1098

* @hugetlb_entry: if set, called for each hugetlb entry

1098

* @hugetlb_entry: if set, called for each hugetlb entry

1099

* *Caution*: The caller must hold mmap_sem() if @hugetlb_entry

1099

* *Caution*: The caller must hold mmap_sem() if @hugetlb_entry

1100

* is used.

1100

* is used.

1101

*

1101

*

1102

* (see walk_page_range for more details)

1102

* (see walk_page_range for more details)

1103

*/

1103

*/

1104

struct mm_walk {

1104

struct mm_walk {

1105

int (*pgd_entry)(pgd_t *pgd, unsigned long addr,

1105

int (*pgd_entry)(pgd_t *pgd, unsigned long addr,

1106

unsigned long next, struct mm_walk *walk);

1106

unsigned long next, struct mm_walk *walk);

1107

int (*pud_entry)(pud_t *pud, unsigned long addr,

1107

int (*pud_entry)(pud_t *pud, unsigned long addr,

1108

unsigned long next, struct mm_walk *walk);

1108

unsigned long next, struct mm_walk *walk);

1109

int (*pmd_entry)(pmd_t *pmd, unsigned long addr,

1109

int (*pmd_entry)(pmd_t *pmd, unsigned long addr,

1110

unsigned long next, struct mm_walk *walk);

1110

unsigned long next, struct mm_walk *walk);

1111

int (*pte_entry)(pte_t *pte, unsigned long addr,

1111

int (*pte_entry)(pte_t *pte, unsigned long addr,

1112

unsigned long next, struct mm_walk *walk);

1112

unsigned long next, struct mm_walk *walk);

1113

int (*pte_hole)(unsigned long addr, unsigned long next,

1113

int (*pte_hole)(unsigned long addr, unsigned long next,

1114

struct mm_walk *walk);

1114

struct mm_walk *walk);

1115

int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,

1115

int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,

1116

unsigned long addr, unsigned long next,

1116

unsigned long addr, unsigned long next,

1117

struct mm_walk *walk);

1117

struct mm_walk *walk);

1118

struct mm_struct *mm;

1118

struct mm_struct *mm;

1119

void *private;

1119

void *private;

1120

};

1120

};

1121

1122

int walk_page_range(unsigned long addr, unsigned long end,

1122

int walk_page_range(unsigned long addr, unsigned long end,

1123

struct mm_walk *walk);

1123

struct mm_walk *walk);

1124

void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,

1124

void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,

1125

unsigned long end, unsigned long floor, unsigned long ceiling);

1125

unsigned long end, unsigned long floor, unsigned long ceiling);

1126

int copy_page_range(struct mm_struct *dst, struct mm_struct *src,

1126

int copy_page_range(struct mm_struct *dst, struct mm_struct *src,

1127

struct vm_area_struct *vma);

1127

struct vm_area_struct *vma);

1128

void unmap_mapping_range(struct address_space *mapping,

1128

void unmap_mapping_range(struct address_space *mapping,

1129

loff_t const holebegin, loff_t const holelen, int even_cows);

1129

loff_t const holebegin, loff_t const holelen, int even_cows);

1130

int follow_pfn(struct vm_area_struct *vma, unsigned long address,

1130

int follow_pfn(struct vm_area_struct *vma, unsigned long address,

1131

unsigned long *pfn);

1131

unsigned long *pfn);

1132

int follow_phys(struct vm_area_struct *vma, unsigned long address,

1132

int follow_phys(struct vm_area_struct *vma, unsigned long address,

1133

unsigned int flags, unsigned long *prot, resource_size_t *phys);

1133

unsigned int flags, unsigned long *prot, resource_size_t *phys);

1134

int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,

1134

int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,

1135

void *buf, int len, int write);

1135

void *buf, int len, int write);

1136

1137

static inline void unmap_shared_mapping_range(struct address_space *mapping,

1137

static inline void unmap_shared_mapping_range(struct address_space *mapping,

1138

loff_t const holebegin, loff_t const holelen)

1138

loff_t const holebegin, loff_t const holelen)

1139

{

1139

{

1140

unmap_mapping_range(mapping, holebegin, holelen, 0);

1140

unmap_mapping_range(mapping, holebegin, holelen, 0);

1141

}

1141

}

1142

1143

extern void truncate_pagecache(struct inode *inode, loff_t new);

1143

extern void truncate_pagecache(struct inode *inode, loff_t new);

1144

extern void truncate_setsize(struct inode *inode, loff_t newsize);

1144

extern void truncate_setsize(struct inode *inode, loff_t newsize);

1145

void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);

1145

void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);

1146

int truncate_inode_page(struct address_space *mapping, struct page *page);

1146

int truncate_inode_page(struct address_space *mapping, struct page *page);

1147

int generic_error_remove_page(struct address_space *mapping, struct page *page);

1147

int generic_error_remove_page(struct address_space *mapping, struct page *page);

1148

int invalidate_inode_page(struct page *page);

1148

int invalidate_inode_page(struct page *page);

1149

1150

#ifdef CONFIG_MMU

1150

#ifdef CONFIG_MMU

1151

extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,

1151

extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,

1152

unsigned long address, unsigned int flags);

1152

unsigned long address, unsigned int flags);

1153

extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,

1153

extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,

1154

unsigned long address, unsigned int fault_flags);

1154

unsigned long address, unsigned int fault_flags);

1155

#else

1155

#else

1156

static inline int handle_mm_fault(struct mm_struct *mm,

1156

static inline int handle_mm_fault(struct mm_struct *mm,

1157

struct vm_area_struct *vma, unsigned long address,

1157

struct vm_area_struct *vma, unsigned long address,

1158

unsigned int flags)

1158

unsigned int flags)

1159

{

1159

{

1160

/* should never happen if there's no MMU */

1160

/* should never happen if there's no MMU */

1161

BUG();

1161

BUG();

1162

return VM_FAULT_SIGBUS;

1162

return VM_FAULT_SIGBUS;

1163

}

1163

}

1164

static inline int fixup_user_fault(struct task_struct *tsk,

1164

static inline int fixup_user_fault(struct task_struct *tsk,

1165

struct mm_struct *mm, unsigned long address,

1165

struct mm_struct *mm, unsigned long address,

1166

unsigned int fault_flags)

1166

unsigned int fault_flags)

1167

{

1167

{

1168

/* should never happen if there's no MMU */

1168

/* should never happen if there's no MMU */

1169

BUG();

1169

BUG();

1170

return -EFAULT;

1170

return -EFAULT;

1171

}

1171

}

1172

#endif

1172

#endif

1173

1174

extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);

1174

extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);

1175

extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,

1175

extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,

1176

void *buf, int len, int write);

1176

void *buf, int len, int write);

1177

1178

long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,

1178

long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,

1179

unsigned long start, unsigned long nr_pages,

1179

unsigned long start, unsigned long nr_pages,

1180

unsigned int foll_flags, struct page **pages,

1180

unsigned int foll_flags, struct page **pages,

1181

struct vm_area_struct **vmas, int *nonblocking);

1181

struct vm_area_struct **vmas, int *nonblocking);

1182

long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,

1182

long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,

1183

unsigned long start, unsigned long nr_pages,

1183

unsigned long start, unsigned long nr_pages,

1184

int write, int force, struct page **pages,

1184

int write, int force, struct page **pages,

1185

struct vm_area_struct **vmas);

1185

struct vm_area_struct **vmas);

1186

int get_user_pages_fast(unsigned long start, int nr_pages, int write,

1186

int get_user_pages_fast(unsigned long start, int nr_pages, int write,

1187

struct page **pages);

1187

struct page **pages);

1188

struct kvec;

1188

struct kvec;

1189

int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,

1189

int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,

1190

struct page **pages);

1190

struct page **pages);

1191

int get_kernel_page(unsigned long start, int write, struct page **pages);

1191

int get_kernel_page(unsigned long start, int write, struct page **pages);

1192

struct page *get_dump_page(unsigned long addr);

1192

struct page *get_dump_page(unsigned long addr);

1193

1194

extern int try_to_release_page(struct page * page, gfp_t gfp_mask);

1194

extern int try_to_release_page(struct page * page, gfp_t gfp_mask);

1195

extern void do_invalidatepage(struct page *page, unsigned int offset,

1195

extern void do_invalidatepage(struct page *page, unsigned int offset,

1196

unsigned int length);

1196

unsigned int length);

1197

1198

int __set_page_dirty_nobuffers(struct page *page);

1198

int __set_page_dirty_nobuffers(struct page *page);

1199

int __set_page_dirty_no_writeback(struct page *page);

1199

int __set_page_dirty_no_writeback(struct page *page);

1200

int redirty_page_for_writepage(struct writeback_control *wbc,

1200

int redirty_page_for_writepage(struct writeback_control *wbc,

1201

struct page *page);

1201

struct page *page);

1202

void account_page_dirtied(struct page *page, struct address_space *mapping);

1202

void account_page_dirtied(struct page *page, struct address_space *mapping);

1203

void account_page_writeback(struct page *page);

1203

void account_page_writeback(struct page *page);

1204

int set_page_dirty(struct page *page);

1204

int set_page_dirty(struct page *page);

1205

int set_page_dirty_lock(struct page *page);

1205

int set_page_dirty_lock(struct page *page);

1206

int clear_page_dirty_for_io(struct page *page);

1206

int clear_page_dirty_for_io(struct page *page);

1207

1208

/* Is the vma a continuation of the stack vma above it? */

1208

/* Is the vma a continuation of the stack vma above it? */

1209

static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)

1209

static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)

1210

{

1210

{

1211

return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);

1211

return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);

1212

}

1212

}

1213

1214

static inline int stack_guard_page_start(struct vm_area_struct *vma,

1214

static inline int stack_guard_page_start(struct vm_area_struct *vma,

1215

unsigned long addr)

1215

unsigned long addr)

1216

{

1216

{

1217

return (vma->vm_flags & VM_GROWSDOWN) &&

1217

return (vma->vm_flags & VM_GROWSDOWN) &&

1218

(vma->vm_start == addr) &&

1218

(vma->vm_start == addr) &&

1219

!vma_growsdown(vma->vm_prev, addr);

1219

!vma_growsdown(vma->vm_prev, addr);

1220

}

1220

}

1221

1222

/* Is the vma a continuation of the stack vma below it? */

1222

/* Is the vma a continuation of the stack vma below it? */

1223

static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)

1223

static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)

1224

{

1224

{

1225

return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);

1225

return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);

1226

}

1226

}

1227

1228

static inline int stack_guard_page_end(struct vm_area_struct *vma,

1228

static inline int stack_guard_page_end(struct vm_area_struct *vma,

1229

unsigned long addr)

1229

unsigned long addr)

1230

{

1230

{

1231

return (vma->vm_flags & VM_GROWSUP) &&

1231

return (vma->vm_flags & VM_GROWSUP) &&

1232

(vma->vm_end == addr) &&

1232

(vma->vm_end == addr) &&

1233

!vma_growsup(vma->vm_next, addr);

1233

!vma_growsup(vma->vm_next, addr);

1234

}

1234

}

1235

1236

extern pid_t

1236

extern pid_t

1237

vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);

1237

vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);

1238

1239

extern unsigned long move_page_tables(struct vm_area_struct *vma,

1239

extern unsigned long move_page_tables(struct vm_area_struct *vma,

1240

unsigned long old_addr, struct vm_area_struct *new_vma,

1240

unsigned long old_addr, struct vm_area_struct *new_vma,

1241

unsigned long new_addr, unsigned long len,

1241

unsigned long new_addr, unsigned long len,

1242

bool need_rmap_locks);

1242

bool need_rmap_locks);

1243

extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,

1243

extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,

1244

unsigned long end, pgprot_t newprot,

1244

unsigned long end, pgprot_t newprot,

1245

int dirty_accountable, int prot_numa);

1245

int dirty_accountable, int prot_numa);

1246

extern int mprotect_fixup(struct vm_area_struct *vma,

1246

extern int mprotect_fixup(struct vm_area_struct *vma,

1247

struct vm_area_struct **pprev, unsigned long start,

1247

struct vm_area_struct **pprev, unsigned long start,

1248

unsigned long end, unsigned long newflags);

1248

unsigned long end, unsigned long newflags);

1249

1250

/*

1250

/*

1251

* doesn't attempt to fault and will return short.

1251

* doesn't attempt to fault and will return short.

1252

*/

1252

*/

1253

int __get_user_pages_fast(unsigned long start, int nr_pages, int write,

1253

int __get_user_pages_fast(unsigned long start, int nr_pages, int write,

1254

struct page **pages);

1254

struct page **pages);

1255

/*

1255

/*

1256

* per-process(per-mm_struct) statistics.

1256

* per-process(per-mm_struct) statistics.

1257

*/

1257

*/

1258

static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)

1258

static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)

1259

{

1259

{

1260

long val = atomic_long_read(&mm->rss_stat.count[member]);

1260

long val = atomic_long_read(&mm->rss_stat.count[member]);

1261

1262

#ifdef SPLIT_RSS_COUNTING

1262

#ifdef SPLIT_RSS_COUNTING

1263

/*

1263

/*

1264

* counter is updated in asynchronous manner and may go to minus.

1264

* counter is updated in asynchronous manner and may go to minus.

1265

* But it's never be expected number for users.

1265

* But it's never be expected number for users.

1266

*/

1266

*/

1267

if (val < 0)

1267

if (val < 0)

1268

val = 0;

1268

val = 0;

1269

#endif

1269

#endif

1270

return (unsigned long)val;

1270

return (unsigned long)val;

1271

}

1271

}

1272

1273

static inline void add_mm_counter(struct mm_struct *mm, int member, long value)

1273

static inline void add_mm_counter(struct mm_struct *mm, int member, long value)

1274

{

1274

{

1275

atomic_long_add(value, &mm->rss_stat.count[member]);

1275

atomic_long_add(value, &mm->rss_stat.count[member]);

1276

}

1276

}

1277

1278

static inline void inc_mm_counter(struct mm_struct *mm, int member)

1278

static inline void inc_mm_counter(struct mm_struct *mm, int member)

1279

{

1279

{

1280

atomic_long_inc(&mm->rss_stat.count[member]);

1280

atomic_long_inc(&mm->rss_stat.count[member]);

1281

}

1281

}

1282

1283

static inline void dec_mm_counter(struct mm_struct *mm, int member)

1283

static inline void dec_mm_counter(struct mm_struct *mm, int member)

1284

{

1284

{

1285

atomic_long_dec(&mm->rss_stat.count[member]);

1285

atomic_long_dec(&mm->rss_stat.count[member]);

1286

}

1286

}

1287

1288

static inline unsigned long get_mm_rss(struct mm_struct *mm)

1288

static inline unsigned long get_mm_rss(struct mm_struct *mm)

1289

{

1289

{

1290

return get_mm_counter(mm, MM_FILEPAGES) +

1290

return get_mm_counter(mm, MM_FILEPAGES) +

1291

get_mm_counter(mm, MM_ANONPAGES);

1291

get_mm_counter(mm, MM_ANONPAGES);

1292

}

1292

}

1293

1294

static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)

1294

static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)

1295

{

1295

{

1296

return max(mm->hiwater_rss, get_mm_rss(mm));

1296

return max(mm->hiwater_rss, get_mm_rss(mm));

1297

}

1297

}

1298

1299

static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)

1299

static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)

1300

{

1300

{

1301

return max(mm->hiwater_vm, mm->total_vm);

1301

return max(mm->hiwater_vm, mm->total_vm);

1302

}

1302

}

1303

1304

static inline void update_hiwater_rss(struct mm_struct *mm)

1304

static inline void update_hiwater_rss(struct mm_struct *mm)

1305

{

1305

{

1306

unsigned long _rss = get_mm_rss(mm);

1306

unsigned long _rss = get_mm_rss(mm);

1307

1308

if ((mm)->hiwater_rss < _rss)

1308

if ((mm)->hiwater_rss < _rss)

1309

(mm)->hiwater_rss = _rss;

1309

(mm)->hiwater_rss = _rss;

1310

}

1310

}

1311

1312

static inline void update_hiwater_vm(struct mm_struct *mm)

1312

static inline void update_hiwater_vm(struct mm_struct *mm)

1313

{

1313

{

1314

if (mm->hiwater_vm < mm->total_vm)

1314

if (mm->hiwater_vm < mm->total_vm)

1315

mm->hiwater_vm = mm->total_vm;

1315

mm->hiwater_vm = mm->total_vm;

1316

}

1316

}

1317

1318

static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,

1318

static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,

1319

struct mm_struct *mm)

1319

struct mm_struct *mm)

1320

{

1320

{

1321

unsigned long hiwater_rss = get_mm_hiwater_rss(mm);

1321

unsigned long hiwater_rss = get_mm_hiwater_rss(mm);

1322

1323

if (*maxrss < hiwater_rss)

1323

if (*maxrss < hiwater_rss)

1324

*maxrss = hiwater_rss;

1324

*maxrss = hiwater_rss;

1325

}

1325

}

1326

1327

#if defined(SPLIT_RSS_COUNTING)

1327

#if defined(SPLIT_RSS_COUNTING)

1328

void sync_mm_rss(struct mm_struct *mm);

1328

void sync_mm_rss(struct mm_struct *mm);

1329

#else

1329

#else

1330

static inline void sync_mm_rss(struct mm_struct *mm)

1330

static inline void sync_mm_rss(struct mm_struct *mm)

1331

{

1331

{

1332

}

1332

}

1333

#endif

1333

#endif

1334

1335

int vma_wants_writenotify(struct vm_area_struct *vma);

1335

int vma_wants_writenotify(struct vm_area_struct *vma);

1336

1337

extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,

1337

extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,

1338

spinlock_t **ptl);

1338

spinlock_t **ptl);

1339

static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,

1339

static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,

1340

spinlock_t **ptl)

1340

spinlock_t **ptl)

1341

{

1341

{

1342

pte_t *ptep;

1342

pte_t *ptep;

1343

__cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl));

1343

__cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl));

1344

return ptep;

1344

return ptep;

1345

}

1345

}

1346

1347

#ifdef __PAGETABLE_PUD_FOLDED

1347

#ifdef __PAGETABLE_PUD_FOLDED

1348

static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,

1348

static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,

1349

unsigned long address)

1349

unsigned long address)

1350

{

1350

{

1351

return 0;

1351

return 0;

1352

}

1352

}

1353

#else

1353

#else

1354

int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);

1354

int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);

1355

#endif

1355

#endif

1356

1357

#ifdef __PAGETABLE_PMD_FOLDED

1357

#ifdef __PAGETABLE_PMD_FOLDED

1358

static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,

1358

static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,

1359

unsigned long address)

1359

unsigned long address)

1360

{

1360

{

1361

return 0;

1361

return 0;

1362

}

1362

}

1363

#else

1363

#else

1364

int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);

1364

int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);

1365

#endif

1365

#endif

1366

1367

int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,

1367

int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,

1368

pmd_t *pmd, unsigned long address);

1368

pmd_t *pmd, unsigned long address);

1369

int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);

1369

int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);

1370

1371

/*

1371

/*

1372

* The following ifdef needed to get the 4level-fixup.h header to work.

1372

* The following ifdef needed to get the 4level-fixup.h header to work.

1373

* Remove it when 4level-fixup.h has been removed.

1373

* Remove it when 4level-fixup.h has been removed.

1374

*/

1374

*/

1375

#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)

1375

#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)

1376

static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)

1376

static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)

1377

{

1377

{

1378

return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?

1378

return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?

1379

NULL: pud_offset(pgd, address);

1379

NULL: pud_offset(pgd, address);

1380

}

1380

}

1381

1382

static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)

1382

static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)

1383

{

1383

{

1384

return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?

1384

return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?

1385

NULL: pmd_offset(pud, address);

1385

NULL: pmd_offset(pud, address);

1386

}

1386

}

1387

#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */

1387

#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */

1388

1389

#if USE_SPLIT_PTE_PTLOCKS

1389

#if USE_SPLIT_PTE_PTLOCKS

1390

#if ALLOC_SPLIT_PTLOCKS

1390

#if ALLOC_SPLIT_PTLOCKS

1391

void __init ptlock_cache_init(void);

1391

void __init ptlock_cache_init(void);

1392

extern bool ptlock_alloc(struct page *page);

1392

extern bool ptlock_alloc(struct page *page);

1393

extern void ptlock_free(struct page *page);

1393

extern void ptlock_free(struct page *page);

1394

1395

static inline spinlock_t *ptlock_ptr(struct page *page)

1395

static inline spinlock_t *ptlock_ptr(struct page *page)

1396

{

1396

{

1397

return page->ptl;

1397

return page->ptl;

1398

}

1398

}

1399

#else /* ALLOC_SPLIT_PTLOCKS */

1399

#else /* ALLOC_SPLIT_PTLOCKS */

1400

static inline void ptlock_cache_init(void)

1400

static inline void ptlock_cache_init(void)

1401

{

1401

{

1402

}

1402

}

1403

1404

static inline bool ptlock_alloc(struct page *page)

1404

static inline bool ptlock_alloc(struct page *page)

1405

{

1405

{

1406

return true;

1406

return true;

1407

}

1407

}

1408

1409

static inline void ptlock_free(struct page *page)

1409

static inline void ptlock_free(struct page *page)

1410

{

1410

{

1411

}

1411

}

1412

1413

static inline spinlock_t *ptlock_ptr(struct page *page)

1413

static inline spinlock_t *ptlock_ptr(struct page *page)

1414

{

1414

{

1415

return &page->ptl;

1415

return &page->ptl;

1416

}

1416

}

1417

#endif /* ALLOC_SPLIT_PTLOCKS */

1417

#endif /* ALLOC_SPLIT_PTLOCKS */

1418

1419

static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)

1419

static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)

1420

{

1420

{

1421

return ptlock_ptr(pmd_page(*pmd));

1421

return ptlock_ptr(pmd_page(*pmd));

1422

}

1422

}

1423

1424

static inline bool ptlock_init(struct page *page)

1424

static inline bool ptlock_init(struct page *page)

1425

{

1425

{

1426

/*

1426

/*

1427

* prep_new_page() initialize page->private (and therefore page->ptl)

1427

* prep_new_page() initialize page->private (and therefore page->ptl)

1428

* with 0. Make sure nobody took it in use in between.

1428

* with 0. Make sure nobody took it in use in between.

1429

*

1429

*

1430

* It can happen if arch try to use slab for page table allocation:

1430

* It can happen if arch try to use slab for page table allocation:

1431

* slab code uses page->slab_cache and page->first_page (for tail

1431

* slab code uses page->slab_cache and page->first_page (for tail

1432

* pages), which share storage with page->ptl.

1432

* pages), which share storage with page->ptl.

1433

*/

1433

*/

1434

VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);

1434

VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);

1435

if (!ptlock_alloc(page))

1435

if (!ptlock_alloc(page))

1436

return false;

1436

return false;

1437

spin_lock_init(ptlock_ptr(page));

1437

spin_lock_init(ptlock_ptr(page));

1438

return true;

1438

return true;

1439

}

1439

}

1440

1441

/* Reset page->mapping so free_pages_check won't complain. */

1441

/* Reset page->mapping so free_pages_check won't complain. */

1442

static inline void pte_lock_deinit(struct page *page)

1442

static inline void pte_lock_deinit(struct page *page)

1443

{

1443

{

1444

page->mapping = NULL;

1444

page->mapping = NULL;

1445

ptlock_free(page);

1445

ptlock_free(page);

1446

}

1446

}

1447

1448

#else /* !USE_SPLIT_PTE_PTLOCKS */

1448

#else /* !USE_SPLIT_PTE_PTLOCKS */

1449

/*

1449

/*

1450

* We use mm->page_table_lock to guard all pagetable pages of the mm.

1450

* We use mm->page_table_lock to guard all pagetable pages of the mm.

1451

*/

1451

*/

1452

static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)

1452

static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)

1453

{

1453

{

1454

return &mm->page_table_lock;

1454

return &mm->page_table_lock;

1455

}

1455

}

1456

static inline void ptlock_cache_init(void) {}

1456

static inline void ptlock_cache_init(void) {}

1457

static inline bool ptlock_init(struct page *page) { return true; }

1457

static inline bool ptlock_init(struct page *page) { return true; }

1458

static inline void pte_lock_deinit(struct page *page) {}

1458

static inline void pte_lock_deinit(struct page *page) {}

1459

#endif /* USE_SPLIT_PTE_PTLOCKS */

1459

#endif /* USE_SPLIT_PTE_PTLOCKS */

1460

1461

static inline void pgtable_init(void)

1461

static inline void pgtable_init(void)

1462

{

1462

{

1463

ptlock_cache_init();

1463

ptlock_cache_init();

1464

pgtable_cache_init();

1464

pgtable_cache_init();

1465

}

1465

}

1466

1467

static inline bool pgtable_page_ctor(struct page *page)

1467

static inline bool pgtable_page_ctor(struct page *page)

1468

{

1468

{

1469

inc_zone_page_state(page, NR_PAGETABLE);

1469

inc_zone_page_state(page, NR_PAGETABLE);

1470

return ptlock_init(page);

1470

return ptlock_init(page);

1471

}

1471

}

1472

1473

static inline void pgtable_page_dtor(struct page *page)

1473

static inline void pgtable_page_dtor(struct page *page)

1474

{

1474

{

1475

pte_lock_deinit(page);

1475

pte_lock_deinit(page);

1476

dec_zone_page_state(page, NR_PAGETABLE);

1476

dec_zone_page_state(page, NR_PAGETABLE);

1477

}

1477

}

1478

1479

#define pte_offset_map_lock(mm, pmd, address, ptlp) \

1479

#define pte_offset_map_lock(mm, pmd, address, ptlp) \

1480

({ \

1480

({ \

1481

spinlock_t *__ptl = pte_lockptr(mm, pmd); \

1481

spinlock_t *__ptl = pte_lockptr(mm, pmd); \

1482

pte_t *__pte = pte_offset_map(pmd, address); \

1482

pte_t *__pte = pte_offset_map(pmd, address); \

1483

*(ptlp) = __ptl; \

1483

*(ptlp) = __ptl; \

1484

spin_lock(__ptl); \

1484

spin_lock(__ptl); \

1485

__pte; \

1485

__pte; \

1486

})

1486

})

1487

1488

#define pte_unmap_unlock(pte, ptl) do { \

1488

#define pte_unmap_unlock(pte, ptl) do { \

1489

spin_unlock(ptl); \

1489

spin_unlock(ptl); \

1490

pte_unmap(pte); \

1490

pte_unmap(pte); \

1491

} while (0)

1491

} while (0)

1492

1493

#define pte_alloc_map(mm, vma, pmd, address) \

1493

#define pte_alloc_map(mm, vma, pmd, address) \

1494

((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \

1494

((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \

1495

pmd, address))? \

1495

pmd, address))? \

1496

NULL: pte_offset_map(pmd, address))

1496

NULL: pte_offset_map(pmd, address))

1497

1498

#define pte_alloc_map_lock(mm, pmd, address, ptlp) \

1498

#define pte_alloc_map_lock(mm, pmd, address, ptlp) \

1499

((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \

1499

((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \

1500

pmd, address))? \

1500

pmd, address))? \

1501

NULL: pte_offset_map_lock(mm, pmd, address, ptlp))

1501

NULL: pte_offset_map_lock(mm, pmd, address, ptlp))

1502

1503

#define pte_alloc_kernel(pmd, address) \

1503

#define pte_alloc_kernel(pmd, address) \

1504

((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \

1504

((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \

1505

NULL: pte_offset_kernel(pmd, address))

1505

NULL: pte_offset_kernel(pmd, address))

1506

1507

#if USE_SPLIT_PMD_PTLOCKS

1507

#if USE_SPLIT_PMD_PTLOCKS

1508

1509

static struct page *pmd_to_page(pmd_t *pmd)

1509

static struct page *pmd_to_page(pmd_t *pmd)

1510

{

1510

{

1511

unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);

1511

unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);

1512

return virt_to_page((void *)((unsigned long) pmd & mask));

1512

return virt_to_page((void *)((unsigned long) pmd & mask));

1513

}

1513

}

1514

1515

static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)

1515

static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)

1516

{

1516

{

1517

return ptlock_ptr(pmd_to_page(pmd));

1517

return ptlock_ptr(pmd_to_page(pmd));

1518

}

1518

}

1519

1520

static inline bool pgtable_pmd_page_ctor(struct page *page)

1520

static inline bool pgtable_pmd_page_ctor(struct page *page)

1521

{

1521

{

1522

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

1522

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

1523

page->pmd_huge_pte = NULL;

1523

page->pmd_huge_pte = NULL;

1524

#endif

1524

#endif

1525

return ptlock_init(page);

1525

return ptlock_init(page);

1526

}

1526

}

1527

1528

static inline void pgtable_pmd_page_dtor(struct page *page)

1528

static inline void pgtable_pmd_page_dtor(struct page *page)

1529

{

1529

{

1530

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

1530

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

1531

VM_BUG_ON_PAGE(page->pmd_huge_pte, page);

1531

VM_BUG_ON_PAGE(page->pmd_huge_pte, page);

1532

#endif

1532

#endif

1533

ptlock_free(page);

1533

ptlock_free(page);

1534

}

1534

}

1535

1536

#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte)

1536

#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte)

1537

1538

#else

1538

#else

1539

1540

static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)

1540

static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)

1541

{

1541

{

1542

return &mm->page_table_lock;

1542

return &mm->page_table_lock;

1543

}

1543

}

1544

1545

static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; }

1545

static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; }

1546

static inline void pgtable_pmd_page_dtor(struct page *page) {}

1546

static inline void pgtable_pmd_page_dtor(struct page *page) {}

1547

1548

#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)

1548

#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)

1549

1550

#endif

1550

#endif

1551

1552

static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)

1552

static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)

1553

{

1553

{

1554

spinlock_t *ptl = pmd_lockptr(mm, pmd);

1554

spinlock_t *ptl = pmd_lockptr(mm, pmd);

1555

spin_lock(ptl);

1555

spin_lock(ptl);

1556

return ptl;

1556

return ptl;

1557

}

1557

}

1558

1559

extern void free_area_init(unsigned long * zones_size);

1559

extern void free_area_init(unsigned long * zones_size);

1560

extern void free_area_init_node(int nid, unsigned long * zones_size,

1560

extern void free_area_init_node(int nid, unsigned long * zones_size,

1561

unsigned long zone_start_pfn, unsigned long *zholes_size);

1561

unsigned long zone_start_pfn, unsigned long *zholes_size);

1562

extern void free_initmem(void);

1562

extern void free_initmem(void);

1563

1564

/*

1564

/*

1565

* Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)

1565

* Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)

1566

* into the buddy system. The freed pages will be poisoned with pattern

1566

* into the buddy system. The freed pages will be poisoned with pattern

1567

* "poison" if it's within range [0, UCHAR_MAX].

1567

* "poison" if it's within range [0, UCHAR_MAX].

1568

* Return pages freed into the buddy system.

1568

* Return pages freed into the buddy system.

1569

*/

1569

*/

1570

extern unsigned long free_reserved_area(void *start, void *end,

1570

extern unsigned long free_reserved_area(void *start, void *end,

1571

int poison, char *s);

1571

int poison, char *s);

1572

1573

#ifdef CONFIG_HIGHMEM

1573

#ifdef CONFIG_HIGHMEM

1574

/*

1574

/*

1575

* Free a highmem page into the buddy system, adjusting totalhigh_pages

1575

* Free a highmem page into the buddy system, adjusting totalhigh_pages

1576

* and totalram_pages.

1576

* and totalram_pages.

1577

*/

1577

*/

1578

extern void free_highmem_page(struct page *page);

1578

extern void free_highmem_page(struct page *page);

1579

#endif

1579

#endif

1580

1581

extern void adjust_managed_page_count(struct page *page, long count);

1581

extern void adjust_managed_page_count(struct page *page, long count);

1582

extern void mem_init_print_info(const char *str);

1582

extern void mem_init_print_info(const char *str);

1583

1584

/* Free the reserved page into the buddy system, so it gets managed. */

1584

/* Free the reserved page into the buddy system, so it gets managed. */

1585

static inline void __free_reserved_page(struct page *page)

1585

static inline void __free_reserved_page(struct page *page)

1586

{

1586

{

1587

ClearPageReserved(page);

1587

ClearPageReserved(page);

1588

init_page_count(page);

1588

init_page_count(page);

1589

__free_page(page);

1589

__free_page(page);

1590

}

1590

}

1591

1592

static inline void free_reserved_page(struct page *page)

1592

static inline void free_reserved_page(struct page *page)

1593

{

1593

{

1594

__free_reserved_page(page);

1594

__free_reserved_page(page);

1595

adjust_managed_page_count(page, 1);

1595

adjust_managed_page_count(page, 1);

1596

}

1596

}

1597

1598

static inline void mark_page_reserved(struct page *page)

1598

static inline void mark_page_reserved(struct page *page)

1599

{

1599

{

1600

SetPageReserved(page);

1600

SetPageReserved(page);

1601

adjust_managed_page_count(page, -1);

1601

adjust_managed_page_count(page, -1);

1602

}

1602

}

1603

1604

/*

1604

/*

1605

* Default method to free all the __init memory into the buddy system.

1605

* Default method to free all the __init memory into the buddy system.

1606

* The freed pages will be poisoned with pattern "poison" if it's within

1606

* The freed pages will be poisoned with pattern "poison" if it's within

1607

* range [0, UCHAR_MAX].

1607

* range [0, UCHAR_MAX].

1608

* Return pages freed into the buddy system.

1608

* Return pages freed into the buddy system.

1609

*/

1609

*/

1610

static inline unsigned long free_initmem_default(int poison)

1610

static inline unsigned long free_initmem_default(int poison)

1611

{

1611

{

1612

extern char __init_begin[], __init_end[];

1612

extern char __init_begin[], __init_end[];

1613

1614

return free_reserved_area(&__init_begin, &__init_end,

1614

return free_reserved_area(&__init_begin, &__init_end,

1615

poison, "unused kernel");

1615

poison, "unused kernel");

1616

}

1616

}

1617

1618

static inline unsigned long get_num_physpages(void)

1618

static inline unsigned long get_num_physpages(void)

1619

{

1619

{

1620

int nid;

1620

int nid;

1621

unsigned long phys_pages = 0;

1621

unsigned long phys_pages = 0;

1622

1623

for_each_online_node(nid)

1623

for_each_online_node(nid)

1624

phys_pages += node_present_pages(nid);

1624

phys_pages += node_present_pages(nid);

1625

1626

return phys_pages;

1626

return phys_pages;

1627

}

1627

}

1628

1629

#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP

1629

#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP

1630

/*

1630

/*

1631

* With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its

1631

* With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its

1632

* zones, allocate the backing mem_map and account for memory holes in a more

1632

* zones, allocate the backing mem_map and account for memory holes in a more

1633

* architecture independent manner. This is a substitute for creating the

1633

* architecture independent manner. This is a substitute for creating the

1634

* zone_sizes[] and zholes_size[] arrays and passing them to

1634

* zone_sizes[] and zholes_size[] arrays and passing them to

1635

* free_area_init_node()

1635

* free_area_init_node()

1636

*

1636

*

1637

* An architecture is expected to register range of page frames backed by

1637

* An architecture is expected to register range of page frames backed by

1638

* physical memory with memblock_add[_node]() before calling

1638

* physical memory with memblock_add[_node]() before calling

1639

* free_area_init_nodes() passing in the PFN each zone ends at. At a basic

1639

* free_area_init_nodes() passing in the PFN each zone ends at. At a basic

1640

* usage, an architecture is expected to do something like

1640

* usage, an architecture is expected to do something like

1641

*

1641

*

1642

* unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,

1642

* unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,

1643

* max_highmem_pfn};

1643

* max_highmem_pfn};

1644

* for_each_valid_physical_page_range()

1644

* for_each_valid_physical_page_range()

1645

* memblock_add_node(base, size, nid)

1645

* memblock_add_node(base, size, nid)

1646

* free_area_init_nodes(max_zone_pfns);

1646

* free_area_init_nodes(max_zone_pfns);

1647

*

1647

*

1648

* free_bootmem_with_active_regions() calls free_bootmem_node() for each

1648

* free_bootmem_with_active_regions() calls free_bootmem_node() for each

1649

* registered physical page range. Similarly

1649

* registered physical page range. Similarly

1650

* sparse_memory_present_with_active_regions() calls memory_present() for

1650

* sparse_memory_present_with_active_regions() calls memory_present() for

1651

* each range when SPARSEMEM is enabled.

1651

* each range when SPARSEMEM is enabled.

1652

*

1652

*

1653

* See mm/page_alloc.c for more information on each function exposed by

1653

* See mm/page_alloc.c for more information on each function exposed by

1654

* CONFIG_HAVE_MEMBLOCK_NODE_MAP.

1654

* CONFIG_HAVE_MEMBLOCK_NODE_MAP.

1655

*/

1655

*/

1656

extern void free_area_init_nodes(unsigned long *max_zone_pfn);

1656

extern void free_area_init_nodes(unsigned long *max_zone_pfn);

1657

unsigned long node_map_pfn_alignment(void);

1657

unsigned long node_map_pfn_alignment(void);

1658

unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,

1658

unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,

1659

unsigned long end_pfn);

1659

unsigned long end_pfn);

1660

extern unsigned long absent_pages_in_range(unsigned long start_pfn,

1660

extern unsigned long absent_pages_in_range(unsigned long start_pfn,

1661

unsigned long end_pfn);

1661

unsigned long end_pfn);

1662

extern void get_pfn_range_for_nid(unsigned int nid,

1662

extern void get_pfn_range_for_nid(unsigned int nid,

1663

unsigned long *start_pfn, unsigned long *end_pfn);

1663

unsigned long *start_pfn, unsigned long *end_pfn);

1664

extern unsigned long find_min_pfn_with_active_regions(void);

1664

extern unsigned long find_min_pfn_with_active_regions(void);

1665

extern void free_bootmem_with_active_regions(int nid,

1665

extern void free_bootmem_with_active_regions(int nid,

1666

unsigned long max_low_pfn);

1666

unsigned long max_low_pfn);

1667

extern void sparse_memory_present_with_active_regions(int nid);

1667

extern void sparse_memory_present_with_active_regions(int nid);

1668

1669

#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */

1669

#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */

1670

1671

#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \

1671

#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \

1672

!defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)

1672

!defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)

1673

static inline int __early_pfn_to_nid(unsigned long pfn)

1673

static inline int __early_pfn_to_nid(unsigned long pfn)

1674

{

1674

{

1675

return 0;

1675

return 0;

1676

}

1676

}

1677

#else

1677

#else

1678

/* please see mm/page_alloc.c */

1678

/* please see mm/page_alloc.c */

1679

extern int __meminit early_pfn_to_nid(unsigned long pfn);

1679

extern int __meminit early_pfn_to_nid(unsigned long pfn);

1680

/* there is a per-arch backend function. */

1680

/* there is a per-arch backend function. */

1681

extern int __meminit __early_pfn_to_nid(unsigned long pfn);

1681

extern int __meminit __early_pfn_to_nid(unsigned long pfn);

1682

#endif

1682

#endif

1683

1684

extern void set_dma_reserve(unsigned long new_dma_reserve);

1684

extern void set_dma_reserve(unsigned long new_dma_reserve);

1685

extern void memmap_init_zone(unsigned long, int, unsigned long,

1685

extern void memmap_init_zone(unsigned long, int, unsigned long,

1686

unsigned long, enum memmap_context);

1686

unsigned long, enum memmap_context);

1687

extern void setup_per_zone_wmarks(void);

1687

extern void setup_per_zone_wmarks(void);

1688

extern int __meminit init_per_zone_wmark_min(void);

1688

extern int __meminit init_per_zone_wmark_min(void);

1689

extern void mem_init(void);

1689

extern void mem_init(void);

1690

extern void __init mmap_init(void);

1690

extern void __init mmap_init(void);

1691

extern void show_mem(unsigned int flags);

1691

extern void show_mem(unsigned int flags);

1692

extern void si_meminfo(struct sysinfo * val);

1692

extern void si_meminfo(struct sysinfo * val);

1693

extern void si_meminfo_node(struct sysinfo *val, int nid);

1693

extern void si_meminfo_node(struct sysinfo *val, int nid);

1694

1695

extern __printf(3, 4)

1695

extern __printf(3, 4)

1696

void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);

1696

void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);

1697

1698

extern void setup_per_cpu_pageset(void);

1698

extern void setup_per_cpu_pageset(void);

1699

1700

extern void zone_pcp_update(struct zone *zone);

1700

extern void zone_pcp_update(struct zone *zone);

1701

extern void zone_pcp_reset(struct zone *zone);

1701

extern void zone_pcp_reset(struct zone *zone);

1702

1703

/* page_alloc.c */

1703

/* page_alloc.c */

1704

extern int min_free_kbytes;

1704

extern int min_free_kbytes;

1705

1706

/* nommu.c */

1706

/* nommu.c */

1707

extern atomic_long_t mmap_pages_allocated;

1707

extern atomic_long_t mmap_pages_allocated;

1708

extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);

1708

extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);

1709

1710

/* interval_tree.c */

1710

/* interval_tree.c */

1711

void vma_interval_tree_insert(struct vm_area_struct *node,

1711

void vma_interval_tree_insert(struct vm_area_struct *node,

1712

struct rb_root *root);

1712

struct rb_root *root);

1713

void vma_interval_tree_insert_after(struct vm_area_struct *node,

1713

void vma_interval_tree_insert_after(struct vm_area_struct *node,

1714

struct vm_area_struct *prev,

1714

struct vm_area_struct *prev,

1715

struct rb_root *root);

1715

struct rb_root *root);

1716

void vma_interval_tree_remove(struct vm_area_struct *node,

1716

void vma_interval_tree_remove(struct vm_area_struct *node,

1717

struct rb_root *root);

1717

struct rb_root *root);

1718

struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,

1718

struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,

1719

unsigned long start, unsigned long last);

1719

unsigned long start, unsigned long last);

1720

struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,

1720

struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,

1721

unsigned long start, unsigned long last);

1721

unsigned long start, unsigned long last);

1722

1723

#define vma_interval_tree_foreach(vma, root, start, last) \

1723

#define vma_interval_tree_foreach(vma, root, start, last) \

1724

for (vma = vma_interval_tree_iter_first(root, start, last); \

1724

for (vma = vma_interval_tree_iter_first(root, start, last); \

1725

vma; vma = vma_interval_tree_iter_next(vma, start, last))

1725

vma; vma = vma_interval_tree_iter_next(vma, start, last))

1726

1727

static inline void vma_nonlinear_insert(struct vm_area_struct *vma,

1727

static inline void vma_nonlinear_insert(struct vm_area_struct *vma,

1728

struct list_head *list)

1728

struct list_head *list)

1729

{

1729

{

1730

list_add_tail(&vma->shared.nonlinear, list);

1730

list_add_tail(&vma->shared.nonlinear, list);

1731

}

1731

}

1732

1733

void anon_vma_interval_tree_insert(struct anon_vma_chain *node,

1733

void anon_vma_interval_tree_insert(struct anon_vma_chain *node,

1734

struct rb_root *root);

1734

struct rb_root *root);

1735

void anon_vma_interval_tree_remove(struct anon_vma_chain *node,

1735

void anon_vma_interval_tree_remove(struct anon_vma_chain *node,

1736

struct rb_root *root);

1736

struct rb_root *root);

1737

struct anon_vma_chain *anon_vma_interval_tree_iter_first(

1737

struct anon_vma_chain *anon_vma_interval_tree_iter_first(

1738

struct rb_root *root, unsigned long start, unsigned long last);

1738

struct rb_root *root, unsigned long start, unsigned long last);

1739

struct anon_vma_chain *anon_vma_interval_tree_iter_next(

1739

struct anon_vma_chain *anon_vma_interval_tree_iter_next(

1740

struct anon_vma_chain *node, unsigned long start, unsigned long last);

1740

struct anon_vma_chain *node, unsigned long start, unsigned long last);

1741

#ifdef CONFIG_DEBUG_VM_RB

1741

#ifdef CONFIG_DEBUG_VM_RB

1742

void anon_vma_interval_tree_verify(struct anon_vma_chain *node);

1742

void anon_vma_interval_tree_verify(struct anon_vma_chain *node);

1743

#endif

1743

#endif

1744

1745

#define anon_vma_interval_tree_foreach(avc, root, start, last) \

1745

#define anon_vma_interval_tree_foreach(avc, root, start, last) \

1746

for (avc = anon_vma_interval_tree_iter_first(root, start, last); \

1746

for (avc = anon_vma_interval_tree_iter_first(root, start, last); \

1747

avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))

1747

avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))

1748

1749

/* mmap.c */

1749

/* mmap.c */

1750

extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);

1750

extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);

1751

extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,

1751

extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,

1752

unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);

1752

unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);

1753

extern struct vm_area_struct *vma_merge(struct mm_struct *,

1753

extern struct vm_area_struct *vma_merge(struct mm_struct *,

1754

struct vm_area_struct *prev, unsigned long addr, unsigned long end,

1754

struct vm_area_struct *prev, unsigned long addr, unsigned long end,

1755

unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,

1755

unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,

1756

struct mempolicy *);

1756

struct mempolicy *);

1757

extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);

1757

extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);

1758

extern int split_vma(struct mm_struct *,

1758

extern int split_vma(struct mm_struct *,

1759

struct vm_area_struct *, unsigned long addr, int new_below);

1759

struct vm_area_struct *, unsigned long addr, int new_below);

1760

extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);

1760

extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);

1761

extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,

1761

extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,

1762

struct rb_node **, struct rb_node *);

1762

struct rb_node **, struct rb_node *);

1763

extern void unlink_file_vma(struct vm_area_struct *);

1763

extern void unlink_file_vma(struct vm_area_struct *);

1764

extern struct vm_area_struct *copy_vma(struct vm_area_struct **,

1764

extern struct vm_area_struct *copy_vma(struct vm_area_struct **,

1765

unsigned long addr, unsigned long len, pgoff_t pgoff,

1765

unsigned long addr, unsigned long len, pgoff_t pgoff,

1766

bool *need_rmap_locks);

1766

bool *need_rmap_locks);

1767

extern void exit_mmap(struct mm_struct *);

1767

extern void exit_mmap(struct mm_struct *);

1768

1769

extern int mm_take_all_locks(struct mm_struct *mm);

1769

extern int mm_take_all_locks(struct mm_struct *mm);

1770

extern void mm_drop_all_locks(struct mm_struct *mm);

1770

extern void mm_drop_all_locks(struct mm_struct *mm);

1771

1772

extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);

1772

extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);

1773

extern struct file *get_mm_exe_file(struct mm_struct *mm);

1773

extern struct file *get_mm_exe_file(struct mm_struct *mm);

1774

1775

extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);

1775

extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);

1776

extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,

1776

extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,

1777

unsigned long addr, unsigned long len,

1777

unsigned long addr, unsigned long len,

1778

unsigned long flags, struct page **pages);

1778

unsigned long flags, struct page **pages);

1779

extern int install_special_mapping(struct mm_struct *mm,

1779

extern int install_special_mapping(struct mm_struct *mm,

1780

unsigned long addr, unsigned long len,

1780

unsigned long addr, unsigned long len,

1781

unsigned long flags, struct page **pages);

1781

unsigned long flags, struct page **pages);

1782

1783

extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);

1783

extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);

1784

1785

extern unsigned long mmap_region(struct file *file, unsigned long addr,

1785

extern unsigned long mmap_region(struct file *file, unsigned long addr,

1786

unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);

1786

unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);

1787

extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,

1787

extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,

1788

unsigned long len, unsigned long prot, unsigned long flags,

1788

unsigned long len, unsigned long prot, unsigned long flags,

1789

unsigned long pgoff, unsigned long *populate);

1789

unsigned long pgoff, unsigned long *populate);

1790

extern int do_munmap(struct mm_struct *, unsigned long, size_t);

1790

extern int do_munmap(struct mm_struct *, unsigned long, size_t);

1791

1792

#ifdef CONFIG_MMU

1792

#ifdef CONFIG_MMU

1793

extern int __mm_populate(unsigned long addr, unsigned long len,

1793

extern int __mm_populate(unsigned long addr, unsigned long len,

1794

int ignore_errors);

1794

int ignore_errors);

1795

static inline void mm_populate(unsigned long addr, unsigned long len)

1795

static inline void mm_populate(unsigned long addr, unsigned long len)

1796

{

1796

{

1797

/* Ignore errors */

1797

/* Ignore errors */

1798

(void) __mm_populate(addr, len, 1);

1798

(void) __mm_populate(addr, len, 1);

1799

}

1799

}

1800

#else

1800

#else

1801

static inline void mm_populate(unsigned long addr, unsigned long len) {}

1801

static inline void mm_populate(unsigned long addr, unsigned long len) {}

1802

#endif

1802

#endif

1803

1804

/* These take the mm semaphore themselves */

1804

/* These take the mm semaphore themselves */

1805

extern unsigned long vm_brk(unsigned long, unsigned long);

1805

extern unsigned long vm_brk(unsigned long, unsigned long);

1806

extern int vm_munmap(unsigned long, size_t);

1806

extern int vm_munmap(unsigned long, size_t);

1807

extern unsigned long vm_mmap(struct file *, unsigned long,

1807

extern unsigned long vm_mmap(struct file *, unsigned long,

1808

unsigned long, unsigned long,

1808

unsigned long, unsigned long,

1809

unsigned long, unsigned long);

1809

unsigned long, unsigned long);

1810

1811

struct vm_unmapped_area_info {

1811

struct vm_unmapped_area_info {

1812

#define VM_UNMAPPED_AREA_TOPDOWN 1

1812

#define VM_UNMAPPED_AREA_TOPDOWN 1

1813

unsigned long flags;

1813

unsigned long flags;

1814

unsigned long length;

1814

unsigned long length;

1815

unsigned long low_limit;

1815

unsigned long low_limit;

1816

unsigned long high_limit;

1816

unsigned long high_limit;

1817

unsigned long align_mask;

1817

unsigned long align_mask;

1818

unsigned long align_offset;

1818

unsigned long align_offset;

1819

};

1819

};

1820

1821

extern unsigned long unmapped_area(struct vm_unmapped_area_info *info);

1821

extern unsigned long unmapped_area(struct vm_unmapped_area_info *info);

1822

extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);

1822

extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);

1823

1824

/*

1824

/*

1825

* Search for an unmapped address range.

1825

* Search for an unmapped address range.

1826

*

1826

*

1827

* We are looking for a range that:

1827

* We are looking for a range that:

1828

* - does not intersect with any VMA;

1828

* - does not intersect with any VMA;

1829

* - is contained within the [low_limit, high_limit) interval;

1829

* - is contained within the [low_limit, high_limit) interval;

1830

* - is at least the desired size.

1830

* - is at least the desired size.

1831

* - satisfies (begin_addr & align_mask) == (align_offset & align_mask)

1831

* - satisfies (begin_addr & align_mask) == (align_offset & align_mask)

1832

*/

1832

*/

1833

static inline unsigned long

1833

static inline unsigned long

1834

vm_unmapped_area(struct vm_unmapped_area_info *info)

1834

vm_unmapped_area(struct vm_unmapped_area_info *info)

1835

{

1835

{

1836

if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN))

1836

if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN))

1837

return unmapped_area(info);

1837

return unmapped_area(info);

1838

else

1838

else

1839

return unmapped_area_topdown(info);

1839

return unmapped_area_topdown(info);

1840

}

1840

}

1841

1842

/* truncate.c */

1842

/* truncate.c */

1843

extern void truncate_inode_pages(struct address_space *, loff_t);

1843

extern void truncate_inode_pages(struct address_space *, loff_t);

1844

extern void truncate_inode_pages_range(struct address_space *,

1844

extern void truncate_inode_pages_range(struct address_space *,

1845

loff_t lstart, loff_t lend);

1845

loff_t lstart, loff_t lend);

1846

extern void truncate_inode_pages_final(struct address_space *);

1846

extern void truncate_inode_pages_final(struct address_space *);

1847

1848

/* generic vm_area_ops exported for stackable file systems */

1848

/* generic vm_area_ops exported for stackable file systems */

1849

extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);

1849

extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);

1850

extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf);

1850

extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf);

1851

extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);

1851

extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);

1852

1853

/* mm/page-writeback.c */

1853

/* mm/page-writeback.c */

1854

int write_one_page(struct page *page, int wait);

1854

int write_one_page(struct page *page, int wait);

1855

void task_dirty_inc(struct task_struct *tsk);

1855

void task_dirty_inc(struct task_struct *tsk);

1856

1857

/* readahead.c */

1857

/* readahead.c */

1858

#define VM_MAX_READAHEAD 128 /* kbytes */

1858

#define VM_MAX_READAHEAD 128 /* kbytes */

1859

#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */

1859

#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */

1860

1861

int force_page_cache_readahead(struct address_space *mapping, struct file *filp,

1861

int force_page_cache_readahead(struct address_space *mapping, struct file *filp,

1862

pgoff_t offset, unsigned long nr_to_read);

1862

pgoff_t offset, unsigned long nr_to_read);

1863

1864

void page_cache_sync_readahead(struct address_space *mapping,

1864

void page_cache_sync_readahead(struct address_space *mapping,

1865

struct file_ra_state *ra,

1865

struct file_ra_state *ra,

1866

struct file *filp,

1866

struct file *filp,

1867

pgoff_t offset,

1867

pgoff_t offset,

1868

unsigned long size);

1868

unsigned long size);

1869

1870

void page_cache_async_readahead(struct address_space *mapping,

1870

void page_cache_async_readahead(struct address_space *mapping,

1871

struct file_ra_state *ra,

1871

struct file_ra_state *ra,

1872

struct file *filp,

1872

struct file *filp,

1873

struct page *pg,

1873

struct page *pg,

1874

pgoff_t offset,

1874

pgoff_t offset,

1875

unsigned long size);

1875

unsigned long size);

1876

1877

unsigned long max_sane_readahead(unsigned long nr);

1877

unsigned long max_sane_readahead(unsigned long nr);

1878

unsigned long ra_submit(struct file_ra_state *ra,

1879

struct address_space *mapping,

1880

struct file *filp);

1881

1878

1882

/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */

1879

/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */

1883

extern int expand_stack(struct vm_area_struct *vma, unsigned long address);

1880

extern int expand_stack(struct vm_area_struct *vma, unsigned long address);

1884

1881

1885

/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */

1882

/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */

1886

extern int expand_downwards(struct vm_area_struct *vma,

1883

extern int expand_downwards(struct vm_area_struct *vma,

1887

unsigned long address);

1884

unsigned long address);

1888

#if VM_GROWSUP

1885

#if VM_GROWSUP

1889

extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);

1886

extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);

1890

#else

1887

#else

1891

#define expand_upwards(vma, address) do { } while (0)

1888

#define expand_upwards(vma, address) do { } while (0)

1892

#endif

1889

#endif

1893

1890

1894

/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */

1891

/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */

1895

extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);

1892

extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);

1896

extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,

1893

extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,

1897

struct vm_area_struct **pprev);

1894

struct vm_area_struct **pprev);

1898

1895

1899

/* Look up the first VMA which intersects the interval start_addr..end_addr-1,

1896

/* Look up the first VMA which intersects the interval start_addr..end_addr-1,

1900

NULL if none. Assume start_addr < end_addr. */

1897

NULL if none. Assume start_addr < end_addr. */

1901

static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)

1898

static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)

1902

{

1899

{

1903

struct vm_area_struct * vma = find_vma(mm,start_addr);

1900

struct vm_area_struct * vma = find_vma(mm,start_addr);

1904

1901

1905

if (vma && end_addr <= vma->vm_start)

1902

if (vma && end_addr <= vma->vm_start)

1906

vma = NULL;

1903

vma = NULL;

1907

return vma;

1904

return vma;

1908

}

1905

}

1909

1906

1910

static inline unsigned long vma_pages(struct vm_area_struct *vma)

1907

static inline unsigned long vma_pages(struct vm_area_struct *vma)

1911

{

1908

{

1912

return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;

1909

return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;

1913

}

1910

}

1914

1911

1915

/* Look up the first VMA which exactly match the interval vm_start ... vm_end */

1912

/* Look up the first VMA which exactly match the interval vm_start ... vm_end */

1916

static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,

1913

static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,

1917

unsigned long vm_start, unsigned long vm_end)

1914

unsigned long vm_start, unsigned long vm_end)

1918

{

1915

{

1919

struct vm_area_struct *vma = find_vma(mm, vm_start);

1916

struct vm_area_struct *vma = find_vma(mm, vm_start);

1920

1917

1921

if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end))

1918

if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end))

1922

vma = NULL;

1919

vma = NULL;

1923

1920

1924

return vma;

1921

return vma;

1925

}

1922

}

1926

1923

1927

#ifdef CONFIG_MMU

1924

#ifdef CONFIG_MMU

1928

pgprot_t vm_get_page_prot(unsigned long vm_flags);

1925

pgprot_t vm_get_page_prot(unsigned long vm_flags);

1929

#else

1926

#else

1930

static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)

1927

static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)

1931

{

1928

{

1932

return __pgprot(0);

1929

return __pgprot(0);

1933

}

1930

}

1934

#endif

1931

#endif

1935

1932

1936

#ifdef CONFIG_NUMA_BALANCING

1933

#ifdef CONFIG_NUMA_BALANCING

1937

unsigned long change_prot_numa(struct vm_area_struct *vma,

1934

unsigned long change_prot_numa(struct vm_area_struct *vma,

1938

unsigned long start, unsigned long end);

1935

unsigned long start, unsigned long end);

1939

#endif

1936

#endif

1940

1937

1941

struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);

1938

struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);

1942

int remap_pfn_range(struct vm_area_struct *, unsigned long addr,

1939

int remap_pfn_range(struct vm_area_struct *, unsigned long addr,

1943

unsigned long pfn, unsigned long size, pgprot_t);

1940

unsigned long pfn, unsigned long size, pgprot_t);

1944

int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);

1941

int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);

1945

int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,

1942

int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,

1946

unsigned long pfn);

1943

unsigned long pfn);

1947

int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,

1944

int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,

1948

unsigned long pfn);

1945

unsigned long pfn);

1949

int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);

1946

int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);

1950

1947

1951

1948

1952

struct page *follow_page_mask(struct vm_area_struct *vma,

1949

struct page *follow_page_mask(struct vm_area_struct *vma,

1953

unsigned long address, unsigned int foll_flags,

1950

unsigned long address, unsigned int foll_flags,

1954

unsigned int *page_mask);

1951

unsigned int *page_mask);

1955

1952

1956

static inline struct page *follow_page(struct vm_area_struct *vma,

1953

static inline struct page *follow_page(struct vm_area_struct *vma,

1957

unsigned long address, unsigned int foll_flags)

1954

unsigned long address, unsigned int foll_flags)

1958

{

1955

{

1959

unsigned int unused_page_mask;

1956

unsigned int unused_page_mask;

1960

return follow_page_mask(vma, address, foll_flags, &unused_page_mask);

1957

return follow_page_mask(vma, address, foll_flags, &unused_page_mask);

1961

}

1958

}

1962

1959

1963

#define FOLL_WRITE 0x01 /* check pte is writable */

1960

#define FOLL_WRITE 0x01 /* check pte is writable */

1964

#define FOLL_TOUCH 0x02 /* mark page accessed */

1961

#define FOLL_TOUCH 0x02 /* mark page accessed */

1965

#define FOLL_GET 0x04 /* do get_page on page */

1962

#define FOLL_GET 0x04 /* do get_page on page */

1966

#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */

1963

#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */

1967

#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */

1964

#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */

1968

#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO

1965

#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO

1969

* and return without waiting upon it */

1966

* and return without waiting upon it */

1970

#define FOLL_MLOCK 0x40 /* mark page as mlocked */

1967

#define FOLL_MLOCK 0x40 /* mark page as mlocked */

1971

#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */

1968

#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */

1972

#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */

1969

#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */

1973

#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */

1970

#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */

1974

#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */

1971

#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */

1975

1972

1976

typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,

1973

typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,

1977

void *data);

1974

void *data);

1978

extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,

1975

extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,

1979

unsigned long size, pte_fn_t fn, void *data);

1976

unsigned long size, pte_fn_t fn, void *data);

1980

1977

1981

#ifdef CONFIG_PROC_FS

1978

#ifdef CONFIG_PROC_FS

1982

void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);

1979

void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);

1983

#else

1980

#else

1984

static inline void vm_stat_account(struct mm_struct *mm,

1981

static inline void vm_stat_account(struct mm_struct *mm,

1985

unsigned long flags, struct file *file, long pages)

1982

unsigned long flags, struct file *file, long pages)

1986

{

1983

{

1987

mm->total_vm += pages;

1984

mm->total_vm += pages;

1988

}

1985

}

1989

#endif /* CONFIG_PROC_FS */

1986

#endif /* CONFIG_PROC_FS */

1990

1987

1991

#ifdef CONFIG_DEBUG_PAGEALLOC

1988

#ifdef CONFIG_DEBUG_PAGEALLOC

1992

extern void kernel_map_pages(struct page *page, int numpages, int enable);

1989

extern void kernel_map_pages(struct page *page, int numpages, int enable);

1993

#ifdef CONFIG_HIBERNATION

1990

#ifdef CONFIG_HIBERNATION

1994

extern bool kernel_page_present(struct page *page);

1991

extern bool kernel_page_present(struct page *page);

1995

#endif /* CONFIG_HIBERNATION */

1992

#endif /* CONFIG_HIBERNATION */

1996

#else

1993

#else

1997

static inline void

1994

static inline void

1998

kernel_map_pages(struct page *page, int numpages, int enable) {}

1995

kernel_map_pages(struct page *page, int numpages, int enable) {}

1999

#ifdef CONFIG_HIBERNATION

1996

#ifdef CONFIG_HIBERNATION

2000

static inline bool kernel_page_present(struct page *page) { return true; }

1997

static inline bool kernel_page_present(struct page *page) { return true; }

2001

#endif /* CONFIG_HIBERNATION */

1998

#endif /* CONFIG_HIBERNATION */

2002

#endif

1999

#endif

2003

2000

2004

extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);

2001

extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);

2005

#ifdef __HAVE_ARCH_GATE_AREA

2002

#ifdef __HAVE_ARCH_GATE_AREA

2006

int in_gate_area_no_mm(unsigned long addr);

2003

int in_gate_area_no_mm(unsigned long addr);

2007

int in_gate_area(struct mm_struct *mm, unsigned long addr);

2004

int in_gate_area(struct mm_struct *mm, unsigned long addr);

2008

#else

2005

#else

2009

int in_gate_area_no_mm(unsigned long addr);

2006

int in_gate_area_no_mm(unsigned long addr);

2010

#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})

2007

#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})

2011

#endif /* __HAVE_ARCH_GATE_AREA */

2008

#endif /* __HAVE_ARCH_GATE_AREA */

2012

2009

2013

#ifdef CONFIG_SYSCTL

2010

#ifdef CONFIG_SYSCTL

2014

extern int sysctl_drop_caches;

2011

extern int sysctl_drop_caches;

2015

int drop_caches_sysctl_handler(struct ctl_table *, int,

2012

int drop_caches_sysctl_handler(struct ctl_table *, int,

2016

void __user *, size_t *, loff_t *);

2013

void __user *, size_t *, loff_t *);

2017

#endif

2014

#endif

2018

2015

2019

unsigned long shrink_slab(struct shrink_control *shrink,

2016

unsigned long shrink_slab(struct shrink_control *shrink,

2020

unsigned long nr_pages_scanned,

2017

unsigned long nr_pages_scanned,

2021

unsigned long lru_pages);

2018

unsigned long lru_pages);

2022

2019

2023

#ifndef CONFIG_MMU

2020

#ifndef CONFIG_MMU

2024

#define randomize_va_space 0

2021

#define randomize_va_space 0

2025

#else

2022

#else

2026

extern int randomize_va_space;

2023

extern int randomize_va_space;

2027

#endif

2024

#endif

2028

2025

2029

const char * arch_vma_name(struct vm_area_struct *vma);

2026

const char * arch_vma_name(struct vm_area_struct *vma);

2030

void print_vma_addr(char *prefix, unsigned long rip);

2027

void print_vma_addr(char *prefix, unsigned long rip);

2031

2028

2032

void sparse_mem_maps_populate_node(struct page **map_map,

2029

void sparse_mem_maps_populate_node(struct page **map_map,

2033

unsigned long pnum_begin,

2030

unsigned long pnum_begin,

2034

unsigned long pnum_end,

2031

unsigned long pnum_end,

2035

unsigned long map_count,

2032

unsigned long map_count,

2036

int nodeid);

2033

int nodeid);

2037

2034

2038

struct page *sparse_mem_map_populate(unsigned long pnum, int nid);

2035

struct page *sparse_mem_map_populate(unsigned long pnum, int nid);

2039

pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);

2036

pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);

2040

pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);

2037

pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);

2041

pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);

2038

pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);

2042

pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);

2039

pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);

2043

void *vmemmap_alloc_block(unsigned long size, int node);

2040

void *vmemmap_alloc_block(unsigned long size, int node);

2044

void *vmemmap_alloc_block_buf(unsigned long size, int node);

2041

void *vmemmap_alloc_block_buf(unsigned long size, int node);

2045

void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);

2042

void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);

2046

int vmemmap_populate_basepages(unsigned long start, unsigned long end,

2043

int vmemmap_populate_basepages(unsigned long start, unsigned long end,

2047

int node);

2044

int node);

2048

int vmemmap_populate(unsigned long start, unsigned long end, int node);

2045

int vmemmap_populate(unsigned long start, unsigned long end, int node);

2049

void vmemmap_populate_print_last(void);

2046

void vmemmap_populate_print_last(void);

2050

#ifdef CONFIG_MEMORY_HOTPLUG

2047

#ifdef CONFIG_MEMORY_HOTPLUG

2051

void vmemmap_free(unsigned long start, unsigned long end);

2048

void vmemmap_free(unsigned long start, unsigned long end);

2052

#endif

2049

#endif

2053

void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,

2050

void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,

2054

unsigned long size);

2051

unsigned long size);

2055

2052

2056

enum mf_flags {

2053

enum mf_flags {

2057

MF_COUNT_INCREASED = 1 << 0,

2054

MF_COUNT_INCREASED = 1 << 0,

2058

MF_ACTION_REQUIRED = 1 << 1,

2055

MF_ACTION_REQUIRED = 1 << 1,

2059

MF_MUST_KILL = 1 << 2,

2056

MF_MUST_KILL = 1 << 2,

2060

MF_SOFT_OFFLINE = 1 << 3,

2057

MF_SOFT_OFFLINE = 1 << 3,

2061

};

2058

};

2062

extern int memory_failure(unsigned long pfn, int trapno, int flags);

2059

extern int memory_failure(unsigned long pfn, int trapno, int flags);

2063

extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);

2060

extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);

2064

extern int unpoison_memory(unsigned long pfn);

2061

extern int unpoison_memory(unsigned long pfn);

2065

extern int sysctl_memory_failure_early_kill;

2062

extern int sysctl_memory_failure_early_kill;

2066

extern int sysctl_memory_failure_recovery;

2063

extern int sysctl_memory_failure_recovery;

2067

extern void shake_page(struct page *p, int access);

2064

extern void shake_page(struct page *p, int access);

2068

extern atomic_long_t num_poisoned_pages;

2065

extern atomic_long_t num_poisoned_pages;

2069

extern int soft_offline_page(struct page *page, int flags);

2066

extern int soft_offline_page(struct page *page, int flags);

2070

2067

2071

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)

2068

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)

2072

extern void clear_huge_page(struct page *page,

2069

extern void clear_huge_page(struct page *page,

2073

unsigned long addr,

2070

unsigned long addr,

2074

unsigned int pages_per_huge_page);

2071

unsigned int pages_per_huge_page);

2075

extern void copy_user_huge_page(struct page *dst, struct page *src,

2072

extern void copy_user_huge_page(struct page *dst, struct page *src,

2076

unsigned long addr, struct vm_area_struct *vma,

2073

unsigned long addr, struct vm_area_struct *vma,

2077

unsigned int pages_per_huge_page);

2074

unsigned int pages_per_huge_page);

2078

#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */

2075

#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */

2079

2076

2080

#ifdef CONFIG_DEBUG_PAGEALLOC

2077

#ifdef CONFIG_DEBUG_PAGEALLOC

2081

extern unsigned int _debug_guardpage_minorder;

2078

extern unsigned int _debug_guardpage_minorder;

2082

2079

2083

static inline unsigned int debug_guardpage_minorder(void)

2080

static inline unsigned int debug_guardpage_minorder(void)

2084

{

2081

{

2085

return _debug_guardpage_minorder;

2082

return _debug_guardpage_minorder;

2086

}

2083

}

2087

2084

2088

static inline bool page_is_guard(struct page *page)

2085

static inline bool page_is_guard(struct page *page)

2089

{

2086

{

2090

return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);

2087

return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);

2091

}

2088

}

2092

#else

2089

#else

2093

static inline unsigned int debug_guardpage_minorder(void) { return 0; }

2090

static inline unsigned int debug_guardpage_minorder(void) { return 0; }

2094

static inline bool page_is_guard(struct page *page) { return false; }

2091

static inline bool page_is_guard(struct page *page) { return false; }

2095

#endif /* CONFIG_DEBUG_PAGEALLOC */

2092

#endif /* CONFIG_DEBUG_PAGEALLOC */

2096

2093

2097

#if MAX_NUMNODES > 1

2094

#if MAX_NUMNODES > 1

2098

void __init setup_nr_node_ids(void);

2095

void __init setup_nr_node_ids(void);

2099

#else

2096

#else

2100

static inline void setup_nr_node_ids(void) {}

2097

static inline void setup_nr_node_ids(void) {}

2101

#endif

2098

#endif

2102

2099

2103

#endif /* __KERNEL__ */

2100

#endif /* __KERNEL__ */

2104

#endif /* _LINUX_MM_H */

2101

#endif /* _LINUX_MM_H */

2105

2102

GITLAB

mm/readahead.c: inline ra_submit

 #ifndef _LINUX_MM_H
 #define _LINUX_MM_H
 #include <linux/errno.h>
 #ifdef __KERNEL__
 #include <linux/mmdebug.h>
 #include <linux/gfp.h>
 #include <linux/bug.h>
 #include <linux/list.h>
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
 #include <linux/atomic.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
 #include <linux/range.h>
 #include <linux/pfn.h>
 #include <linux/bit_spinlock.h>
 #include <linux/shrinker.h>
 struct mempolicy;
 struct anon_vma;
 struct anon_vma_chain;
 struct file_ra_state;
 struct user_struct;
 struct writeback_control;
 #ifndef CONFIG_NEED_MULTIPLE_NODES	/* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
 static inline void set_max_mapnr(unsigned long limit)
 {
 	max_mapnr = limit;
 }
 #else
 static inline void set_max_mapnr(unsigned long limit) { }
 #endif
 extern unsigned long totalram_pages;
 extern void * high_memory;
 extern int page_cluster;
 #ifdef CONFIG_SYSCTL
 extern int sysctl_legacy_va_layout;
 #else
 #define sysctl_legacy_va_layout 0
 #endif
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #ifndef __pa_symbol
 #define __pa_symbol(x)  __pa(RELOC_HIDE((unsigned long)(x), 0))
 #endif
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 extern unsigned long sysctl_overcommit_kbytes;
 extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
 				    size_t *, loff_t *);
 extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
 				    size_t *, loff_t *);
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
 /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
 #define PAGE_ALIGNED(addr)	IS_ALIGNED((unsigned long)addr, PAGE_SIZE)
 /*
  * Linux kernel virtual memory manager primitives.
  * The idea being to have a "virtual" mm in the same way
  * we have a virtual fs - giving a cleaner interface to the
  * mm details, and allowing different kinds of memory mappings
  * (from shared memory to executable loading to arbitrary
  * mmap() functions).
  */
 extern struct kmem_cache *vm_area_cachep;
 #ifndef CONFIG_MMU
 extern struct rb_root nommu_region_tree;
 extern struct rw_semaphore nommu_region_sem;
 extern unsigned int kobjsize(const void *objp);
 #endif
 /*
  * vm_flags in vm_area_struct, see mm_types.h.
  */
 #define VM_NONE		0x00000000
 #define VM_READ		0x00000001	/* currently active flags */
 #define VM_WRITE	0x00000002
 #define VM_EXEC		0x00000004
 #define VM_SHARED	0x00000008
 /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
 #define VM_MAYREAD	0x00000010	/* limits for mprotect() etc */
 #define VM_MAYWRITE	0x00000020
 #define VM_MAYEXEC	0x00000040
 #define VM_MAYSHARE	0x00000080
 #define VM_GROWSDOWN	0x00000100	/* general info on the segment */
 #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */
 #define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
 #define VM_LOCKED	0x00002000
 #define VM_IO           0x00004000	/* Memory mapped I/O or similar */
 					/* Used by sys_madvise() */
 #define VM_SEQ_READ	0x00008000	/* App will access data sequentially */
 #define VM_RAND_READ	0x00010000	/* App will not benefit from clustered reads */
 #define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
 #define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
 #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_ARCH_1	0x01000000	/* Architecture-specific flag */
 #define VM_DONTDUMP	0x04000000	/* Do not include in the core dump */
 #ifdef CONFIG_MEM_SOFT_DIRTY
 # define VM_SOFTDIRTY	0x08000000	/* Not soft dirty clean area */
 #else
 # define VM_SOFTDIRTY	0
 #endif
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_HUGEPAGE	0x20000000	/* MADV_HUGEPAGE marked this vma */
 #define VM_NOHUGEPAGE	0x40000000	/* MADV_NOHUGEPAGE marked this vma */
 #define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
 #if defined(CONFIG_X86)
 # define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
 #elif defined(CONFIG_PPC)
 # define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
 # define VM_GROWSUP	VM_ARCH_1
 #elif defined(CONFIG_METAG)
 # define VM_GROWSUP	VM_ARCH_1
 #elif defined(CONFIG_IA64)
 # define VM_GROWSUP	VM_ARCH_1
 #elif !defined(CONFIG_MMU)
 # define VM_MAPPED_COPY	VM_ARCH_1	/* T if mapped copy of data (nommu mmap) */
 #endif
 #ifndef VM_GROWSUP
 # define VM_GROWSUP	VM_NONE
 #endif
 /* Bits set in the VMA until the stack is in its final location */
 #define VM_STACK_INCOMPLETE_SETUP	(VM_RAND_READ | VM_SEQ_READ)
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
 #endif
 #ifdef CONFIG_STACK_GROWSUP
 #define VM_STACK_FLAGS	(VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
 #else
 #define VM_STACK_FLAGS	(VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
 #endif
 /*
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
  */
 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
 /* This mask defines which mm->def_flags a process can inherit its parent */
 #define VM_INIT_DEF_MASK	VM_NOHUGEPAGE
 /*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
  */
 extern pgprot_t protection_map[16];
 #define FAULT_FLAG_WRITE	0x01	/* Fault was a write access */
 #define FAULT_FLAG_NONLINEAR	0x02	/* Fault was via a nonlinear mapping */
 #define FAULT_FLAG_MKWRITE	0x04	/* Fault was mkwrite of existing pte */
 #define FAULT_FLAG_ALLOW_RETRY	0x08	/* Retry fault if blocking */
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
 #define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
 #define FAULT_FLAG_TRIED	0x40	/* second try */
 #define FAULT_FLAG_USER		0x80	/* The fault originated in userspace */
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
  * ->fault function. The vma's ->fault is responsible for returning a bitmask
  * of VM_FAULT_xxx flags that give details about how the fault was handled.
  *
  * pgoff should be used in favour of virtual_address, if possible. If pgoff
  * is used, one may implement ->remap_pages to get nonlinear mapping support.
  */
 struct vm_fault {
 	unsigned int flags;		/* FAULT_FLAG_xxx flags */
 	pgoff_t pgoff;			/* Logical page offset based on vma */
 	void __user *virtual_address;	/* Faulting virtual address */
 	struct page *page;		/* ->fault handlers should return a
 					 * page here, unless VM_FAULT_NOPAGE
 					 * is set (which is also implied by
 					 * VM_FAULT_ERROR).
 					 */
 	/* for ->map_pages() only */
 	pgoff_t max_pgoff;		/* map pages for offset from pgoff till
 					 * max_pgoff inclusive */
 	pte_t *pte;			/* pte entry associated with ->pgoff */
 };
 /*
  * These are the virtual MM functions - opening of an area, closing and
  * unmapping it (needed to keep files on disk up-to-date etc), pointer
  * to the functions called when a no-page or a wp-page exception occurs.
  */
 struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
 	void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
 	int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
 	/* called by access_process_vm when get_user_pages() fails, typically
 	 * for use by special VMAs that can switch between memory and hardware
 	 */
 	int (*access)(struct vm_area_struct *vma, unsigned long addr,
 		      void *buf, int len, int write);
 #ifdef CONFIG_NUMA
 	/*
 	 * set_policy() op must add a reference to any non-NULL @new mempolicy
 	 * to hold the policy upon return.  Caller should pass NULL @new to
 	 * remove a policy and fall back to surrounding context--i.e. do not
 	 * install a MPOL_DEFAULT policy, nor the task or system default
 	 * mempolicy.
 	 */
 	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
 	/*
 	 * get_policy() op must add reference [mpol_get()] to any policy at
 	 * (vma,addr) marked as MPOL_SHARED.  The shared policy infrastructure
 	 * in mm/mempolicy.c will do this automatically.
 	 * get_policy() must NOT add a ref if the policy at (vma,addr) is not
 	 * marked as MPOL_SHARED. vma policies are protected by the mmap_sem.
 	 * If no [shared/vma] mempolicy exists at the addr, get_policy() op
 	 * must return NULL--i.e., do not "fallback" to task or system default
 	 * policy.
 	 */
 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
 					unsigned long addr);
 	int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from,
 		const nodemask_t *to, unsigned long flags);
 #endif
 	/* called by sys_remap_file_pages() to populate non-linear mapping */
 	int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
 			   unsigned long size, pgoff_t pgoff);
 };
 struct mmu_gather;
 struct inode;
 #define page_private(page)		((page)->private)
 #define set_page_private(page, v)	((page)->private = (v))
 /* It's valid only if the page is free path or free_list */
 static inline void set_freepage_migratetype(struct page *page, int migratetype)
 {
 	page->index = migratetype;
 }
 /* It's valid only if the page is free path or free_list */
 static inline int get_freepage_migratetype(struct page *page)
 {
 	return page->index;
 }
 /*
  * FIXME: take this include out, include page-flags.h in
  * files which need it (119 of them)
  */
 #include <linux/page-flags.h>
 #include <linux/huge_mm.h>
 /*
  * Methods to modify the page usage count.
  *
  * What counts for a page usage:
  * - cache mapping   (page->mapping)
  * - private data    (page->private)
  * - page mapped in a task's page tables, each mapping
  *   is counted separately
  *
  * Also, many kernel routines increase the page count before a critical
  * routine so they can be sure the page doesn't go away from under them.
  */
 /*
  * Drop a ref, return true if the refcount fell to zero (the page has no users)
  */
 static inline int put_page_testzero(struct page *page)
 {
 	VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0, page);
 	return atomic_dec_and_test(&page->_count);
 }
 /*
  * Try to grab a ref unless the page has a refcount of zero, return false if
  * that is the case.
  * This can be called when MMU is off so it must not access
  * any of the virtual mappings.
  */
 static inline int get_page_unless_zero(struct page *page)
 {
 	return atomic_inc_not_zero(&page->_count);
 }
 /*
  * Try to drop a ref unless the page has a refcount of one, return false if
  * that is the case.
  * This is to make sure that the refcount won't become zero after this drop.
  * This can be called when MMU is off so it must not access
  * any of the virtual mappings.
  */
 static inline int put_page_unless_one(struct page *page)
 {
 	return atomic_add_unless(&page->_count, -1, 1);
 }
 extern int page_is_ram(unsigned long pfn);
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
 unsigned long vmalloc_to_pfn(const void *addr);
 /*
  * Determine if an address is within the vmalloc range
  *
  * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
  * is no special casing required.
  */
 static inline int is_vmalloc_addr(const void *x)
 {
 #ifdef CONFIG_MMU
 	unsigned long addr = (unsigned long)x;
 	return addr >= VMALLOC_START && addr < VMALLOC_END;
 #else
 	return 0;
 #endif
 }
 #ifdef CONFIG_MMU
 extern int is_vmalloc_or_module_addr(const void *x);
 #else
 static inline int is_vmalloc_or_module_addr(const void *x)
 {
 	return 0;
 }
 #endif
 static inline void compound_lock(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	VM_BUG_ON_PAGE(PageSlab(page), page);
 	bit_spin_lock(PG_compound_lock, &page->flags);
 #endif
 }
 static inline void compound_unlock(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	VM_BUG_ON_PAGE(PageSlab(page), page);
 	bit_spin_unlock(PG_compound_lock, &page->flags);
 #endif
 }
 static inline unsigned long compound_lock_irqsave(struct page *page)
 {
 	unsigned long uninitialized_var(flags);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	local_irq_save(flags);
 	compound_lock(page);
 #endif
 	return flags;
 }
 static inline void compound_unlock_irqrestore(struct page *page,
 					      unsigned long flags)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	compound_unlock(page);
 	local_irq_restore(flags);
 #endif
 }
 static inline struct page *compound_head(struct page *page)
 {
 	if (unlikely(PageTail(page))) {
 		struct page *head = page->first_page;
 		/*
 		 * page->first_page may be a dangling pointer to an old
 		 * compound page, so recheck that it is still a tail
 		 * page before returning.
 		 */
 		smp_rmb();
 		if (likely(PageTail(page)))
 			return head;
 	}
 	return page;
 }
 /*
  * The atomic page->_mapcount, starts from -1: so that transitions
  * both from it and to it can be tracked, using atomic_inc_and_test
  * and atomic_add_negative(-1).
  */
 static inline void page_mapcount_reset(struct page *page)
 {
 	atomic_set(&(page)->_mapcount, -1);
 }
 static inline int page_mapcount(struct page *page)
 {
 	return atomic_read(&(page)->_mapcount) + 1;
 }
 static inline int page_count(struct page *page)
 {
 	return atomic_read(&compound_head(page)->_count);
 }
 #ifdef CONFIG_HUGETLB_PAGE
 extern int PageHeadHuge(struct page *page_head);
 #else /* CONFIG_HUGETLB_PAGE */
 static inline int PageHeadHuge(struct page *page_head)
 {
 	return 0;
 }
 #endif /* CONFIG_HUGETLB_PAGE */
 static inline bool __compound_tail_refcounted(struct page *page)
 {
 	return !PageSlab(page) && !PageHeadHuge(page);
 }
 /*
  * This takes a head page as parameter and tells if the
  * tail page reference counting can be skipped.
  *
  * For this to be safe, PageSlab and PageHeadHuge must remain true on
  * any given page where they return true here, until all tail pins
  * have been released.
  */
 static inline bool compound_tail_refcounted(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 	return __compound_tail_refcounted(page);
 }
 static inline void get_huge_page_tail(struct page *page)
 {
 	/*
 	 * __split_huge_page_refcount() cannot run from under us.
 	 */
 	VM_BUG_ON_PAGE(!PageTail(page), page);
 	VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
 	VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
 	if (compound_tail_refcounted(page->first_page))
 		atomic_inc(&page->_mapcount);
 }
 extern bool __get_page_tail(struct page *page);
 static inline void get_page(struct page *page)
 {
 	if (unlikely(PageTail(page)))
 		if (likely(__get_page_tail(page)))
 			return;
 	/*
 	 * Getting a normal page or the head of a compound page
 	 * requires to already have an elevated page->_count.
 	 */
 	VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
 	atomic_inc(&page->_count);
 }
 static inline struct page *virt_to_head_page(const void *x)
 {
 	struct page *page = virt_to_page(x);
 	return compound_head(page);
 }
 /*
  * Setup the page count before being freed into the page allocator for
  * the first time (boot or memory hotplug)
  */
 static inline void init_page_count(struct page *page)
 {
 	atomic_set(&page->_count, 1);
 }
 /*
  * PageBuddy() indicate that the page is free and in the buddy system
  * (see mm/page_alloc.c).
  *
  * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to
  * -2 so that an underflow of the page_mapcount() won't be mistaken
  * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very
  * efficiently by most CPU architectures.
  */
 #define PAGE_BUDDY_MAPCOUNT_VALUE (-128)
 static inline int PageBuddy(struct page *page)
 {
 	return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE;
 }
 static inline void __SetPageBuddy(struct page *page)
 {
 	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
 	atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);
 }
 static inline void __ClearPageBuddy(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageBuddy(page), page);
 	atomic_set(&page->_mapcount, -1);
 }
 void put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 void split_page(struct page *page, unsigned int order);
 int split_free_page(struct page *page);
 /*
  * Compound pages have a destructor function.  Provide a
  * prototype for that function and accessor functions.
  * These are _only_ valid on the head of a PG_compound page.
  */
 typedef void compound_page_dtor(struct page *);
 static inline void set_compound_page_dtor(struct page *page,
 						compound_page_dtor *dtor)
 {
 	page[1].lru.next = (void *)dtor;
 }
 static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
 {
 	return (compound_page_dtor *)page[1].lru.next;
 }
 static inline int compound_order(struct page *page)
 {
 	if (!PageHead(page))
 		return 0;
 	return (unsigned long)page[1].lru.prev;
 }
 static inline void set_compound_order(struct page *page, unsigned long order)
 {
 	page[1].lru.prev = (void *)order;
 }
 #ifdef CONFIG_MMU
 /*
  * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
  * servicing faults for write access.  In the normal case, do always want
  * pte_mkwrite.  But get_user_pages can cause write faults for mappings
  * that do not have writing enabled, when used by access_process_vm.
  */
 static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 {
 	if (likely(vma->vm_flags & VM_WRITE))
 		pte = pte_mkwrite(pte);
 	return pte;
 }
 void do_set_pte(struct vm_area_struct *vma, unsigned long address,
 		struct page *page, pte_t *pte, bool write, bool anon);
 #endif
 /*
  * Multiple processes may "see" the same page. E.g. for untouched
  * mappings of /dev/null, all processes see the same page full of
  * zeroes, and text pages of executables and shared libraries have
  * only one copy in memory, at most, normally.
  *
  * For the non-reserved pages, page_count(page) denotes a reference count.
  *   page_count() == 0 means the page is free. page->lru is then used for
  *   freelist management in the buddy allocator.
  *   page_count() > 0  means the page has been allocated.
  *
  * Pages are allocated by the slab allocator in order to provide memory
  * to kmalloc and kmem_cache_alloc. In this case, the management of the
  * page, and the fields in 'struct page' are the responsibility of mm/slab.c
  * unless a particular usage is carefully commented. (the responsibility of
  * freeing the kmalloc memory is the caller's, of course).
  *
  * A page may be used by anyone else who does a __get_free_page().
  * In this case, page_count still tracks the references, and should only
  * be used through the normal accessor functions. The top bits of page->flags
  * and page->virtual store page management information, but all other fields
  * are unused and could be used privately, carefully. The management of this
  * page is the responsibility of the one who allocated it, and those who have
  * subsequently been given references to it.
  *
  * The other pages (we may call them "pagecache pages") are completely
  * managed by the Linux memory manager: I/O, buffers, swapping etc.
  * The following discussion applies only to them.
  *
  * A pagecache page contains an opaque `private' member, which belongs to the
  * page's address_space. Usually, this is the address of a circular list of
  * the page's disk buffers. PG_private must be set to tell the VM to call
  * into the filesystem to release these pages.
  *
  * A page may belong to an inode's memory mapping. In this case, page->mapping
  * is the pointer to the inode, and page->index is the file offset of the page,
  * in units of PAGE_CACHE_SIZE.
  *
  * If pagecache pages are not associated with an inode, they are said to be
  * anonymous pages. These may become associated with the swapcache, and in that
  * case PG_swapcache is set, and page->private is an offset into the swapcache.
  *
  * In either case (swapcache or inode backed), the pagecache itself holds one
  * reference to the page. Setting PG_private should also increment the
  * refcount. The each user mapping also has a reference to the page.
  *
  * The pagecache pages are stored in a per-mapping radix tree, which is
  * rooted at mapping->page_tree, and indexed by offset.
  * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
  * lists, we instead now tag pages as dirty/writeback in the radix tree.
  *
  * All pagecache pages may be subject to I/O:
  * - inode pages may need to be read from disk,
  * - inode pages which have been modified and are MAP_SHARED may need
  *   to be written back to the inode on disk,
  * - anonymous pages (including MAP_PRIVATE file mappings) which have been
  *   modified may need to be swapped out to swap space and (later) to be read
  *   back into memory.
  */
 /*
  * The zone field is never updated after free_area_init_core()
  * sets it, so none of the operations on it need to be atomic.
  */
 /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */
 #define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
 #define LAST_CPUPID_PGOFF	(ZONES_PGOFF - LAST_CPUPID_WIDTH)
 /*
  * Define the bit shifts to access each section.  For non-existent
  * sections we define the shift as 0; that plus a 0 mask ensures
  * the compiler will optimise away reference to them.
  */
 #define SECTIONS_PGSHIFT	(SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
 #define NODES_PGSHIFT		(NODES_PGOFF * (NODES_WIDTH != 0))
 #define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_WIDTH != 0))
 #define LAST_CPUPID_PGSHIFT	(LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))
 /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
 #ifdef NODE_NOT_IN_PAGE_FLAGS
 #define ZONEID_SHIFT		(SECTIONS_SHIFT + ZONES_SHIFT)
 #define ZONEID_PGOFF		((SECTIONS_PGOFF < ZONES_PGOFF)? \
 						SECTIONS_PGOFF : ZONES_PGOFF)
 #else
 #define ZONEID_SHIFT		(NODES_SHIFT + ZONES_SHIFT)
 #define ZONEID_PGOFF		((NODES_PGOFF < ZONES_PGOFF)? \
 						NODES_PGOFF : ZONES_PGOFF)
 #endif
 #define ZONEID_PGSHIFT		(ZONEID_PGOFF * (ZONEID_SHIFT != 0))
 #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
 #error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
 #endif
 #define ZONES_MASK		((1UL << ZONES_WIDTH) - 1)
 #define NODES_MASK		((1UL << NODES_WIDTH) - 1)
 #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
 #define LAST_CPUPID_MASK	((1UL << LAST_CPUPID_WIDTH) - 1)
 #define ZONEID_MASK		((1UL << ZONEID_SHIFT) - 1)
 static inline enum zone_type page_zonenum(const struct page *page)
 {
 	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define SECTION_IN_PAGE_FLAGS
 #endif
 /*
  * The identification function is mainly used by the buddy allocator for
  * determining if two pages could be buddies. We are not really identifying
  * the zone since we could be using the section number id if we do not have
  * node id available in page flags.
  * We only guarantee that it will return the same value for two combinable
  * pages in a zone.
  */
 static inline int page_zone_id(struct page *page)
 {
 	return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
 }
 static inline int zone_to_nid(struct zone *zone)
 {
 #ifdef CONFIG_NUMA
 	return zone->node;
 #else
 	return 0;
 #endif
 }
 #ifdef NODE_NOT_IN_PAGE_FLAGS
 extern int page_to_nid(const struct page *page);
 #else
 static inline int page_to_nid(const struct page *page)
 {
 	return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
 }
 #endif
 #ifdef CONFIG_NUMA_BALANCING
 static inline int cpu_pid_to_cpupid(int cpu, int pid)
 {
 	return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
 }
 static inline int cpupid_to_pid(int cpupid)
 {
 	return cpupid & LAST__PID_MASK;
 }
 static inline int cpupid_to_cpu(int cpupid)
 {
 	return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK;
 }
 static inline int cpupid_to_nid(int cpupid)
 {
 	return cpu_to_node(cpupid_to_cpu(cpupid));
 }
 static inline bool cpupid_pid_unset(int cpupid)
 {
 	return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK);
 }
 static inline bool cpupid_cpu_unset(int cpupid)
 {
 	return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);
 }
 static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
 {
 	return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);
 }
 #define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
 #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
 static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
 	return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK);
 }
 static inline int page_cpupid_last(struct page *page)
 {
 	return page->_last_cpupid;
 }
 static inline void page_cpupid_reset_last(struct page *page)
 {
 	page->_last_cpupid = -1 & LAST_CPUPID_MASK;
 }
 #else
 static inline int page_cpupid_last(struct page *page)
 {
 	return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
 }
 extern int page_cpupid_xchg_last(struct page *page, int cpupid);
 static inline void page_cpupid_reset_last(struct page *page)
 {
 	int cpupid = (1 << LAST_CPUPID_SHIFT) - 1;
 	page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);
 	page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;
 }
 #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
 #else /* !CONFIG_NUMA_BALANCING */
 static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
 	return page_to_nid(page); /* XXX */
 }
 static inline int page_cpupid_last(struct page *page)
 {
 	return page_to_nid(page); /* XXX */
 }
 static inline int cpupid_to_nid(int cpupid)
 {
 	return -1;
 }
 static inline int cpupid_to_pid(int cpupid)
 {
 	return -1;
 }
 static inline int cpupid_to_cpu(int cpupid)
 {
 	return -1;
 }
 static inline int cpu_pid_to_cpupid(int nid, int pid)
 {
 	return -1;
 }
 static inline bool cpupid_pid_unset(int cpupid)
 {
 	return 1;
 }
 static inline void page_cpupid_reset_last(struct page *page)
 {
 }
 static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
 {
 	return false;
 }
 #endif /* CONFIG_NUMA_BALANCING */
 static inline struct zone *page_zone(const struct page *page)
 {
 	return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
 }
 #ifdef SECTION_IN_PAGE_FLAGS
 static inline void set_page_section(struct page *page, unsigned long section)
 {
 	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
 	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
 }
 static inline unsigned long page_to_section(const struct page *page)
 {
 	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
 }
 #endif
 static inline void set_page_zone(struct page *page, enum zone_type zone)
 {
 	page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
 	page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
 }
 static inline void set_page_node(struct page *page, unsigned long node)
 {
 	page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
 	page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
 }
 static inline void set_page_links(struct page *page, enum zone_type zone,
 	unsigned long node, unsigned long pfn)
 {
 	set_page_zone(page, zone);
 	set_page_node(page, node);
 #ifdef SECTION_IN_PAGE_FLAGS
 	set_page_section(page, pfn_to_section_nr(pfn));
 #endif
 }
 /*
  * Some inline functions in vmstat.h depend on page_zone()
  */
 #include <linux/vmstat.h>
 static __always_inline void *lowmem_page_address(const struct page *page)
 {
 	return __va(PFN_PHYS(page_to_pfn(page)));
 }
 #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
 #define HASHED_PAGE_VIRTUAL
 #endif
 #if defined(WANT_PAGE_VIRTUAL)
 static inline void *page_address(const struct page *page)
 {
 	return page->virtual;
 }
 static inline void set_page_address(struct page *page, void *address)
 {
 	page->virtual = address;
 }
 #define page_address_init()  do { } while(0)
 #endif
 #if defined(HASHED_PAGE_VIRTUAL)
 void *page_address(const struct page *page);
 void set_page_address(struct page *page, void *virtual);
 void page_address_init(void);
 #endif
 #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
 #define page_address(page) lowmem_page_address(page)
 #define set_page_address(page, address)  do { } while(0)
 #define page_address_init()  do { } while(0)
 #endif
 /*
  * On an anonymous page mapped into a user virtual memory area,
  * page->mapping points to its anon_vma, not to a struct address_space;
  * with the PAGE_MAPPING_ANON bit set to distinguish it.  See rmap.h.
  *
  * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
  * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit;
  * and then page->mapping points, not to an anon_vma, but to a private
  * structure which KSM associates with that merged page.  See ksm.h.
  *
  * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used.
  *
  * Please note that, confusingly, "page_mapping" refers to the inode
  * address_space which maps the page from disk; whereas "page_mapped"
  * refers to user virtual address space into which the page is mapped.
  */
 #define PAGE_MAPPING_ANON	1
 #define PAGE_MAPPING_KSM	2
 #define PAGE_MAPPING_FLAGS	(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)
 extern struct address_space *page_mapping(struct page *page);
 /* Neutral page->mapping pointer to address_space or anon_vma or other */
 static inline void *page_rmapping(struct page *page)
 {
 	return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
 }
 extern struct address_space *__page_file_mapping(struct page *);
 static inline
 struct address_space *page_file_mapping(struct page *page)
 {
 	if (unlikely(PageSwapCache(page)))
 		return __page_file_mapping(page);
 	return page->mapping;
 }
 static inline int PageAnon(struct page *page)
 {
 	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
 }
 /*
  * Return the pagecache index of the passed page.  Regular pagecache pages
  * use ->index whereas swapcache pages use ->private
  */
 static inline pgoff_t page_index(struct page *page)
 {
 	if (unlikely(PageSwapCache(page)))
 		return page_private(page);
 	return page->index;
 }
 extern pgoff_t __page_file_index(struct page *page);
 /*
  * Return the file index of the page. Regular pagecache pages use ->index
  * whereas swapcache pages use swp_offset(->private)
  */
 static inline pgoff_t page_file_index(struct page *page)
 {
 	if (unlikely(PageSwapCache(page)))
 		return __page_file_index(page);
 	return page->index;
 }
 /*
  * Return true if this page is mapped into pagetables.
  */
 static inline int page_mapped(struct page *page)
 {
 	return atomic_read(&(page)->_mapcount) >= 0;
 }
 /*
  * Different kinds of faults, as returned by handle_mm_fault().
  * Used to decide whether a process gets delivered SIGBUS or
  * just gets major/minor fault counters bumped up.
  */
 #define VM_FAULT_MINOR	0 /* For backwards compat. Remove me quickly. */
 #define VM_FAULT_OOM	0x0001
 #define VM_FAULT_SIGBUS	0x0002
 #define VM_FAULT_MAJOR	0x0004
 #define VM_FAULT_WRITE	0x0008	/* Special case for get_user_pages */
 #define VM_FAULT_HWPOISON 0x0010	/* Hit poisoned small page */
 #define VM_FAULT_HWPOISON_LARGE 0x0020  /* Hit poisoned large page. Index encoded in upper bits */
 #define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
 #define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
 			 VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE)
 /* Encode hstate index for a hwpoisoned large page */
 #define VM_FAULT_SET_HINDEX(x) ((x) << 12)
 #define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
 /*
  * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
  */
 extern void pagefault_out_of_memory(void);
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 /*
  * Flags passed to show_mem() and show_free_areas() to suppress output in
  * various contexts.
  */
 #define SHOW_MEM_FILTER_NODES		(0x0001u)	/* disallowed nodes */
 extern void show_free_areas(unsigned int flags);
 extern bool skip_free_areas_node(unsigned int flags, int nid);
 int shmem_zero_setup(struct vm_area_struct *);
 #ifdef CONFIG_SHMEM
 bool shmem_mapping(struct address_space *mapping);
 #else
 static inline bool shmem_mapping(struct address_space *mapping)
 {
 	return false;
 }
 #endif
 extern int can_do_mlock(void);
 extern int user_shm_lock(size_t, struct user_struct *);
 extern void user_shm_unlock(size_t, struct user_struct *);
 /*
  * Parameter block passed down to zap_pte_range in exceptional cases.
  */
 struct zap_details {
 	struct vm_area_struct *nonlinear_vma;	/* Check page->index if set */
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
 };
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 		pte_t pte);
 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size);
 void zap_page_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size, struct zap_details *);
 void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long start, unsigned long end);
 /**
  * mm_walk - callbacks for walk_page_range
  * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
  * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
  * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
  *	       this handler is required to be able to handle
  *	       pmd_trans_huge() pmds.  They may simply choose to
  *	       split_huge_page() instead of handling it explicitly.
  * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
  * @pte_hole: if set, called for each hole at all levels
  * @hugetlb_entry: if set, called for each hugetlb entry
  *		   *Caution*: The caller must hold mmap_sem() if @hugetlb_entry
  * 			      is used.
  *
  * (see walk_page_range for more details)
  */
 struct mm_walk {
 	int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
 			 unsigned long next, struct mm_walk *walk);
 	int (*pud_entry)(pud_t *pud, unsigned long addr,
 	                 unsigned long next, struct mm_walk *walk);
 	int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
 			 unsigned long next, struct mm_walk *walk);
 	int (*pte_entry)(pte_t *pte, unsigned long addr,
 			 unsigned long next, struct mm_walk *walk);
 	int (*pte_hole)(unsigned long addr, unsigned long next,
 			struct mm_walk *walk);
 	int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
 			     unsigned long addr, unsigned long next,
 			     struct mm_walk *walk);
 	struct mm_struct *mm;
 	void *private;
 };
 int walk_page_range(unsigned long addr, unsigned long end,
 		struct mm_walk *walk);
 void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
 	unsigned long *pfn);
 int follow_phys(struct vm_area_struct *vma, unsigned long address,
 		unsigned int flags, unsigned long *prot, resource_size_t *phys);
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 			void *buf, int len, int write);
 static inline void unmap_shared_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen)
 {
 	unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 extern void truncate_pagecache(struct inode *inode, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
 int invalidate_inode_page(struct page *page);
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
 extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long address, unsigned int fault_flags);
 #else
 static inline int handle_mm_fault(struct mm_struct *mm,
 			struct vm_area_struct *vma, unsigned long address,
 			unsigned int flags)
 {
 	/* should never happen if there's no MMU */
 	BUG();
 	return VM_FAULT_SIGBUS;
 }
 static inline int fixup_user_fault(struct task_struct *tsk,
 		struct mm_struct *mm, unsigned long address,
 		unsigned int fault_flags)
 {
 	/* should never happen if there's no MMU */
 	BUG();
 	return -EFAULT;
 }
 #endif
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
 		void *buf, int len, int write);
 long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		      unsigned long start, unsigned long nr_pages,
 		      unsigned int foll_flags, struct page **pages,
 		      struct vm_area_struct **vmas, int *nonblocking);
 long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		    unsigned long start, unsigned long nr_pages,
 		    int write, int force, struct page **pages,
 		    struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
 struct kvec;
 int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
 			struct page **pages);
 int get_kernel_page(unsigned long start, int write, struct page **pages);
 struct page *get_dump_page(unsigned long addr);
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned int offset,
 			      unsigned int length);
 int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
 void account_page_dirtied(struct page *page, struct address_space *mapping);
 void account_page_writeback(struct page *page);
 int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 /* Is the vma a continuation of the stack vma above it? */
 static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
 {
 	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
 }
 static inline int stack_guard_page_start(struct vm_area_struct *vma,
 					     unsigned long addr)
 {
 	return (vma->vm_flags & VM_GROWSDOWN) &&
 		(vma->vm_start == addr) &&
 		!vma_growsdown(vma->vm_prev, addr);
 }
 /* Is the vma a continuation of the stack vma below it? */
 static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
 {
 	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
 }
 static inline int stack_guard_page_end(struct vm_area_struct *vma,
 					   unsigned long addr)
 {
 	return (vma->vm_flags & VM_GROWSUP) &&
 		(vma->vm_end == addr) &&
 		!vma_growsup(vma->vm_next, addr);
 }
 extern pid_t
 vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len,
 		bool need_rmap_locks);
 extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 			      unsigned long end, pgprot_t newprot,
 			      int dirty_accountable, int prot_numa);
 extern int mprotect_fixup(struct vm_area_struct *vma,
 			  struct vm_area_struct **pprev, unsigned long start,
 			  unsigned long end, unsigned long newflags);
 /*
  * doesn't attempt to fault and will return short.
  */
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages);
 /*
  * per-process(per-mm_struct) statistics.
  */
 static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
 {
 	long val = atomic_long_read(&mm->rss_stat.count[member]);
 #ifdef SPLIT_RSS_COUNTING
 	/*
 	 * counter is updated in asynchronous manner and may go to minus.
 	 * But it's never be expected number for users.
 	 */
 	if (val < 0)
 		val = 0;
 #endif
 	return (unsigned long)val;
 }
 static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
 {
 	atomic_long_add(value, &mm->rss_stat.count[member]);
 }
 static inline void inc_mm_counter(struct mm_struct *mm, int member)
 {
 	atomic_long_inc(&mm->rss_stat.count[member]);
 }
 static inline void dec_mm_counter(struct mm_struct *mm, int member)
 {
 	atomic_long_dec(&mm->rss_stat.count[member]);
 }
 static inline unsigned long get_mm_rss(struct mm_struct *mm)
 {
 	return get_mm_counter(mm, MM_FILEPAGES) +
 		get_mm_counter(mm, MM_ANONPAGES);
 }
 static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
 {
 	return max(mm->hiwater_rss, get_mm_rss(mm));
 }
 static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
 {
 	return max(mm->hiwater_vm, mm->total_vm);
 }
 static inline void update_hiwater_rss(struct mm_struct *mm)
 {
 	unsigned long _rss = get_mm_rss(mm);
 	if ((mm)->hiwater_rss < _rss)
 		(mm)->hiwater_rss = _rss;
 }
 static inline void update_hiwater_vm(struct mm_struct *mm)
 {
 	if (mm->hiwater_vm < mm->total_vm)
 		mm->hiwater_vm = mm->total_vm;
 }
 static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
 					 struct mm_struct *mm)
 {
 	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
 	if (*maxrss < hiwater_rss)
 		*maxrss = hiwater_rss;
 }
 #if defined(SPLIT_RSS_COUNTING)
 void sync_mm_rss(struct mm_struct *mm);
 #else
 static inline void sync_mm_rss(struct mm_struct *mm)
 {
 }
 #endif
 int vma_wants_writenotify(struct vm_area_struct *vma);
 extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
 			       spinlock_t **ptl);
 static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
 				    spinlock_t **ptl)
 {
 	pte_t *ptep;
 	__cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl));
 	return ptep;
 }
 #ifdef __PAGETABLE_PUD_FOLDED
 static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
 						unsigned long address)
 {
 	return 0;
 }
 #else
 int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
 #endif
 #ifdef __PAGETABLE_PMD_FOLDED
 static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
 						unsigned long address)
 {
 	return 0;
 }
 #else
 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 #endif
 int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 		pmd_t *pmd, unsigned long address);
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 /*
  * The following ifdef needed to get the 4level-fixup.h header to work.
  * Remove it when 4level-fixup.h has been removed.
  */
 #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
 static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 {
 	return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
 		NULL: pud_offset(pgd, address);
 }
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 {
 	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
 		NULL: pmd_offset(pud, address);
 }
 #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
 #if USE_SPLIT_PTE_PTLOCKS
 #if ALLOC_SPLIT_PTLOCKS
 void __init ptlock_cache_init(void);
 extern bool ptlock_alloc(struct page *page);
 extern void ptlock_free(struct page *page);
 static inline spinlock_t *ptlock_ptr(struct page *page)
 {
 	return page->ptl;
 }
 #else /* ALLOC_SPLIT_PTLOCKS */
 static inline void ptlock_cache_init(void)
 {
 }
 static inline bool ptlock_alloc(struct page *page)
 {
 	return true;
 }
 static inline void ptlock_free(struct page *page)
 {
 }
 static inline spinlock_t *ptlock_ptr(struct page *page)
 {
 	return &page->ptl;
 }
 #endif /* ALLOC_SPLIT_PTLOCKS */
 static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return ptlock_ptr(pmd_page(*pmd));
 }
 static inline bool ptlock_init(struct page *page)
 {
 	/*
 	 * prep_new_page() initialize page->private (and therefore page->ptl)
 	 * with 0. Make sure nobody took it in use in between.
 	 *
 	 * It can happen if arch try to use slab for page table allocation:
 	 * slab code uses page->slab_cache and page->first_page (for tail
 	 * pages), which share storage with page->ptl.
 	 */
 	VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);
 	if (!ptlock_alloc(page))
 		return false;
 	spin_lock_init(ptlock_ptr(page));
 	return true;
 }
 /* Reset page->mapping so free_pages_check won't complain. */
 static inline void pte_lock_deinit(struct page *page)
 {
 	page->mapping = NULL;
 	ptlock_free(page);
 }
 #else	/* !USE_SPLIT_PTE_PTLOCKS */
 /*
  * We use mm->page_table_lock to guard all pagetable pages of the mm.
  */
 static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return &mm->page_table_lock;
 }
 static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_init(struct page *page) { return true; }
 static inline void pte_lock_deinit(struct page *page) {}
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 static inline void pgtable_init(void)
 {
 	ptlock_cache_init();
 	pgtable_cache_init();
 }
 static inline bool pgtable_page_ctor(struct page *page)
 {
 	inc_zone_page_state(page, NR_PAGETABLE);
 	return ptlock_init(page);
 }
 static inline void pgtable_page_dtor(struct page *page)
 {
 	pte_lock_deinit(page);
 	dec_zone_page_state(page, NR_PAGETABLE);
 }
 #define pte_offset_map_lock(mm, pmd, address, ptlp)	\
 ({							\
 	spinlock_t *__ptl = pte_lockptr(mm, pmd);	\
 	pte_t *__pte = pte_offset_map(pmd, address);	\
 	*(ptlp) = __ptl;				\
 	spin_lock(__ptl);				\
 	__pte;						\
 })
 #define pte_unmap_unlock(pte, ptl)	do {		\
 	spin_unlock(ptl);				\
 	pte_unmap(pte);					\
 } while (0)
 #define pte_alloc_map(mm, vma, pmd, address)				\
 	((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma,	\
 							pmd, address))?	\
 	 NULL: pte_offset_map(pmd, address))
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
 	((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL,	\
 							pmd, address))?	\
 		NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
 #define pte_alloc_kernel(pmd, address)			\
 	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
 		NULL: pte_offset_kernel(pmd, address))
 #if USE_SPLIT_PMD_PTLOCKS
 static struct page *pmd_to_page(pmd_t *pmd)
 {
 	unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
 	return virt_to_page((void *)((unsigned long) pmd & mask));
 }
 static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return ptlock_ptr(pmd_to_page(pmd));
 }
 static inline bool pgtable_pmd_page_ctor(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	page->pmd_huge_pte = NULL;
 #endif
 	return ptlock_init(page);
 }
 static inline void pgtable_pmd_page_dtor(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	VM_BUG_ON_PAGE(page->pmd_huge_pte, page);
 #endif
 	ptlock_free(page);
 }
 #define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte)
 #else
 static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
 	return &mm->page_table_lock;
 }
 static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; }
 static inline void pgtable_pmd_page_dtor(struct page *page) {}
 #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)
 #endif
 static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
 {
 	spinlock_t *ptl = pmd_lockptr(mm, pmd);
 	spin_lock(ptl);
 	return ptl;
 }
 extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, unsigned long * zones_size,
 		unsigned long zone_start_pfn, unsigned long *zholes_size);
 extern void free_initmem(void);
 /*
  * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
  * into the buddy system. The freed pages will be poisoned with pattern
  * "poison" if it's within range [0, UCHAR_MAX].
  * Return pages freed into the buddy system.
  */
 extern unsigned long free_reserved_area(void *start, void *end,
 					int poison, char *s);
 #ifdef	CONFIG_HIGHMEM
 /*
  * Free a highmem page into the buddy system, adjusting totalhigh_pages
  * and totalram_pages.
  */
 extern void free_highmem_page(struct page *page);
 #endif
 extern void adjust_managed_page_count(struct page *page, long count);
 extern void mem_init_print_info(const char *str);
 /* Free the reserved page into the buddy system, so it gets managed. */
 static inline void __free_reserved_page(struct page *page)
 {
 	ClearPageReserved(page);
 	init_page_count(page);
 	__free_page(page);
 }
 static inline void free_reserved_page(struct page *page)
 {
 	__free_reserved_page(page);
 	adjust_managed_page_count(page, 1);
 }
 static inline void mark_page_reserved(struct page *page)
 {
 	SetPageReserved(page);
 	adjust_managed_page_count(page, -1);
 }
 /*
  * Default method to free all the __init memory into the buddy system.
  * The freed pages will be poisoned with pattern "poison" if it's within
  * range [0, UCHAR_MAX].
  * Return pages freed into the buddy system.
  */
 static inline unsigned long free_initmem_default(int poison)
 {
 	extern char __init_begin[], __init_end[];
 	return free_reserved_area(&__init_begin, &__init_end,
 				  poison, "unused kernel");
 }
 static inline unsigned long get_num_physpages(void)
 {
 	int nid;
 	unsigned long phys_pages = 0;
 	for_each_online_node(nid)
 		phys_pages += node_present_pages(nid);
 	return phys_pages;
 }
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 /*
  * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
  * zones, allocate the backing mem_map and account for memory holes in a more
  * architecture independent manner. This is a substitute for creating the
  * zone_sizes[] and zholes_size[] arrays and passing them to
  * free_area_init_node()
  *
  * An architecture is expected to register range of page frames backed by
  * physical memory with memblock_add[_node]() before calling
  * free_area_init_nodes() passing in the PFN each zone ends at. At a basic
  * usage, an architecture is expected to do something like
  *
  * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
  * 							 max_highmem_pfn};
  * for_each_valid_physical_page_range()
  * 	memblock_add_node(base, size, nid)
  * free_area_init_nodes(max_zone_pfns);
  *
  * free_bootmem_with_active_regions() calls free_bootmem_node() for each
  * registered physical page range.  Similarly
  * sparse_memory_present_with_active_regions() calls memory_present() for
  * each range when SPARSEMEM is enabled.
  *
  * See mm/page_alloc.c for more information on each function exposed by
  * CONFIG_HAVE_MEMBLOCK_NODE_MAP.
  */
 extern void free_area_init_nodes(unsigned long *max_zone_pfn);
 unsigned long node_map_pfn_alignment(void);
 unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
 						unsigned long end_pfn);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
 extern void get_pfn_range_for_nid(unsigned int nid,
 			unsigned long *start_pfn, unsigned long *end_pfn);
 extern unsigned long find_min_pfn_with_active_regions(void);
 extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 extern void sparse_memory_present_with_active_regions(int nid);
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
     !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
 static inline int __early_pfn_to_nid(unsigned long pfn)
 {
 	return 0;
 }
 #else
 /* please see mm/page_alloc.c */
 extern int __meminit early_pfn_to_nid(unsigned long pfn);
 /* there is a per-arch backend function. */
 extern int __meminit __early_pfn_to_nid(unsigned long pfn);
 #endif
 extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_zone(unsigned long, int, unsigned long,
 				unsigned long, enum memmap_context);
 extern void setup_per_zone_wmarks(void);
 extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(unsigned int flags);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern __printf(3, 4)
 void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
 extern void setup_per_cpu_pageset(void);
 extern void zone_pcp_update(struct zone *zone);
 extern void zone_pcp_reset(struct zone *zone);
 /* page_alloc.c */
 extern int min_free_kbytes;
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
 /* interval_tree.c */
 void vma_interval_tree_insert(struct vm_area_struct *node,
 			      struct rb_root *root);
 void vma_interval_tree_insert_after(struct vm_area_struct *node,
 				    struct vm_area_struct *prev,
 				    struct rb_root *root);
 void vma_interval_tree_remove(struct vm_area_struct *node,
 			      struct rb_root *root);
 struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,
 				unsigned long start, unsigned long last);
 struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
 				unsigned long start, unsigned long last);
 #define vma_interval_tree_foreach(vma, root, start, last)		\
 	for (vma = vma_interval_tree_iter_first(root, start, last);	\
 	     vma; vma = vma_interval_tree_iter_next(vma, start, last))
 static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
 					struct list_head *list)
 {
 	list_add_tail(&vma->shared.nonlinear, list);
 }
 void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
 				   struct rb_root *root);
 void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
 				   struct rb_root *root);
 struct anon_vma_chain *anon_vma_interval_tree_iter_first(
 	struct rb_root *root, unsigned long start, unsigned long last);
 struct anon_vma_chain *anon_vma_interval_tree_iter_next(
 	struct anon_vma_chain *node, unsigned long start, unsigned long last);
 #ifdef CONFIG_DEBUG_VM_RB
 void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
 #endif
 #define anon_vma_interval_tree_foreach(avc, root, start, last)		 \
 	for (avc = anon_vma_interval_tree_iter_first(root, start, last); \
 	     avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))
 /* mmap.c */
 extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
 extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);
 extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
 	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
 	struct mempolicy *);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 extern int split_vma(struct mm_struct *,
 	struct vm_area_struct *, unsigned long addr, int new_below);
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
 	struct rb_node **, struct rb_node *);
 extern void unlink_file_vma(struct vm_area_struct *);
 extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	unsigned long addr, unsigned long len, pgoff_t pgoff,
 	bool *need_rmap_locks);
 extern void exit_mmap(struct mm_struct *);
 extern int mm_take_all_locks(struct mm_struct *mm);
 extern void mm_drop_all_locks(struct mm_struct *mm);
 extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
 extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
 				   unsigned long addr, unsigned long len,
 				   unsigned long flags, struct page **pages);
 extern int install_special_mapping(struct mm_struct *mm,
 				   unsigned long addr, unsigned long len,
 				   unsigned long flags, struct page **pages);
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
 extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	unsigned long pgoff, unsigned long *populate);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t);
 #ifdef CONFIG_MMU
 extern int __mm_populate(unsigned long addr, unsigned long len,
 			 int ignore_errors);
 static inline void mm_populate(unsigned long addr, unsigned long len)
 {
 	/* Ignore errors */
 	(void) __mm_populate(addr, len, 1);
 }
 #else
 static inline void mm_populate(unsigned long addr, unsigned long len) {}
 #endif
 /* These take the mm semaphore themselves */
 extern unsigned long vm_brk(unsigned long, unsigned long);
 extern int vm_munmap(unsigned long, size_t);
 extern unsigned long vm_mmap(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
 struct vm_unmapped_area_info {
 #define VM_UNMAPPED_AREA_TOPDOWN 1
 	unsigned long flags;
 	unsigned long length;
 	unsigned long low_limit;
 	unsigned long high_limit;
 	unsigned long align_mask;
 	unsigned long align_offset;
 };
 extern unsigned long unmapped_area(struct vm_unmapped_area_info *info);
 extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);
 /*
  * Search for an unmapped address range.
  *
  * We are looking for a range that:
  * - does not intersect with any VMA;
  * - is contained within the [low_limit, high_limit) interval;
  * - is at least the desired size.
  * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
  */
 static inline unsigned long
 vm_unmapped_area(struct vm_unmapped_area_info *info)
 {
 	if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN))
 		return unmapped_area(info);
 	else
 		return unmapped_area_topdown(info);
 }
 /* truncate.c */
 extern void truncate_inode_pages(struct address_space *, loff_t);
 extern void truncate_inode_pages_range(struct address_space *,
 				       loff_t lstart, loff_t lend);
 extern void truncate_inode_pages_final(struct address_space *);
 /* generic vm_area_ops exported for stackable file systems */
 extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
 extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 /* mm/page-writeback.c */
 int write_one_page(struct page *page, int wait);
 void task_dirty_inc(struct task_struct *tsk);
 /* readahead.c */
 #define VM_MAX_READAHEAD	128	/* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 void page_cache_sync_readahead(struct address_space *mapping,
 			       struct file_ra_state *ra,
 			       struct file *filp,
 			       pgoff_t offset,
 			       unsigned long size);
 void page_cache_async_readahead(struct address_space *mapping,
 				struct file_ra_state *ra,
 				struct file *filp,
 				struct page *pg,
 				pgoff_t offset,
 				unsigned long size);
 unsigned long max_sane_readahead(unsigned long nr);
-unsigned long ra_submit(struct file_ra_state *ra,
-			struct address_space *mapping,
-			struct file *filp);
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
 /* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */
 extern int expand_downwards(struct vm_area_struct *vma,
 		unsigned long address);
 #if VM_GROWSUP
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
 #else
   #define expand_upwards(vma, address) do { } while (0)
 #endif
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
 extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
 					     struct vm_area_struct **pprev);
 /* Look up the first VMA which intersects the interval start_addr..end_addr-1,
    NULL if none.  Assume start_addr < end_addr. */
 static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
 {
 	struct vm_area_struct * vma = find_vma(mm,start_addr);
 	if (vma && end_addr <= vma->vm_start)
 		vma = NULL;
 	return vma;
 }
 static inline unsigned long vma_pages(struct vm_area_struct *vma)
 {
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 /* Look up the first VMA which exactly match the interval vm_start ... vm_end */
 static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
 				unsigned long vm_start, unsigned long vm_end)
 {
 	struct vm_area_struct *vma = find_vma(mm, vm_start);
 	if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end))
 		vma = NULL;
 	return vma;
 }
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
 #else
 static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
 	return __pgprot(0);
 }
 #endif
 #ifdef CONFIG_NUMA_BALANCING
 unsigned long change_prot_numa(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end);
 #endif
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
 struct page *follow_page_mask(struct vm_area_struct *vma,
 			      unsigned long address, unsigned int foll_flags,
 			      unsigned int *page_mask);
 static inline struct page *follow_page(struct vm_area_struct *vma,
 		unsigned long address, unsigned int foll_flags)
 {
 	unsigned int unused_page_mask;
 	return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
 }
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
 #define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
 #define FOLL_NOWAIT	0x20	/* if a disk transfer is needed, start the IO
 				 * and return without waiting upon it */
 #define FOLL_MLOCK	0x40	/* mark page as mlocked */
 #define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
 #define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
 #define FOLL_MIGRATION	0x400	/* wait for page to replace migration entry */
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
 			       unsigned long size, pte_fn_t fn, void *data);
 #ifdef CONFIG_PROC_FS
 void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
 #else
 static inline void vm_stat_account(struct mm_struct *mm,
 			unsigned long flags, struct file *file, long pages)
 {
 	mm->total_vm += pages;
 }
 #endif /* CONFIG_PROC_FS */
 #ifdef CONFIG_DEBUG_PAGEALLOC
 extern void kernel_map_pages(struct page *page, int numpages, int enable);
 #ifdef CONFIG_HIBERNATION
 extern bool kernel_page_present(struct page *page);
 #endif /* CONFIG_HIBERNATION */
 #else
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable) {}
 #ifdef CONFIG_HIBERNATION
 static inline bool kernel_page_present(struct page *page) { return true; }
 #endif /* CONFIG_HIBERNATION */
 #endif
 extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
 #ifdef	__HAVE_ARCH_GATE_AREA
 int in_gate_area_no_mm(unsigned long addr);
 int in_gate_area(struct mm_struct *mm, unsigned long addr);
 #else
 int in_gate_area_no_mm(unsigned long addr);
 #define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})
 #endif	/* __HAVE_ARCH_GATE_AREA */
 #ifdef CONFIG_SYSCTL
 extern int sysctl_drop_caches;
 int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 #endif
 unsigned long shrink_slab(struct shrink_control *shrink,
 			  unsigned long nr_pages_scanned,
 			  unsigned long lru_pages);
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
 #else
 extern int randomize_va_space;
 #endif
 const char * arch_vma_name(struct vm_area_struct *vma);
 void print_vma_addr(char *prefix, unsigned long rip);
 void sparse_mem_maps_populate_node(struct page **map_map,
 				   unsigned long pnum_begin,
 				   unsigned long pnum_end,
 				   unsigned long map_count,
 				   int nodeid);
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
 pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
 pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
 void *vmemmap_alloc_block(unsigned long size, int node);
 void *vmemmap_alloc_block_buf(unsigned long size, int node);
 void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
 int vmemmap_populate_basepages(unsigned long start, unsigned long end,
 			       int node);
 int vmemmap_populate(unsigned long start, unsigned long end, int node);
 void vmemmap_populate_print_last(void);
 #ifdef CONFIG_MEMORY_HOTPLUG
 void vmemmap_free(unsigned long start, unsigned long end);
 #endif
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
 				  unsigned long size);
 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
 	MF_ACTION_REQUIRED = 1 << 1,
 	MF_MUST_KILL = 1 << 2,
 	MF_SOFT_OFFLINE = 1 << 3,
 };
 extern int memory_failure(unsigned long pfn, int trapno, int flags);
 extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
 extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p, int access);
 extern atomic_long_t num_poisoned_pages;
 extern int soft_offline_page(struct page *page, int flags);
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
 extern void clear_huge_page(struct page *page,
 			    unsigned long addr,
 			    unsigned int pages_per_huge_page);
 extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned long addr, struct vm_area_struct *vma,
 				unsigned int pages_per_huge_page);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 #ifdef CONFIG_DEBUG_PAGEALLOC
 extern unsigned int _debug_guardpage_minorder;
 static inline unsigned int debug_guardpage_minorder(void)
 {
 	return _debug_guardpage_minorder;
 }
 static inline bool page_is_guard(struct page *page)
 {
 	return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
 }
 #else
 static inline unsigned int debug_guardpage_minorder(void) { return 0; }
 static inline bool page_is_guard(struct page *page) { return false; }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 #if MAX_NUMNODES > 1
 void __init setup_nr_node_ids(void);
 #else
 static inline void setup_nr_node_ids(void) {}
 #endif
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */

 /* internal.h: mm/ internal definitions
  *
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
 #ifndef __MM_INTERNAL_H
 #define __MM_INTERNAL_H
+#include <linux/fs.h>
 #include <linux/mm.h>
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 static inline void set_page_count(struct page *page, int v)
 {
 	atomic_set(&page->_count, v);
+}
+extern int __do_page_cache_readahead(struct address_space *mapping,
+		struct file *filp, pgoff_t offset, unsigned long nr_to_read,
+		unsigned long lookahead_size);
+/*
+ * Submit IO for the read-ahead request in file_ra_state.
+ */
+static inline unsigned long ra_submit(struct file_ra_state *ra,
+		struct address_space *mapping, struct file *filp)
+{
+	return __do_page_cache_readahead(mapping, filp,
+					ra->start, ra->size, ra->async_size);
 }
 /*
  * Turn a non-refcounted page (->_count == 0) into refcounted with
  * a count of one.
  */
 static inline void set_page_refcounted(struct page *page)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(atomic_read(&page->_count), page);
 	set_page_count(page, 1);
 }
 static inline void __get_page_tail_foll(struct page *page,
 					bool get_page_head)
 {
 	/*
 	 * If we're getting a tail page, the elevated page->_count is
 	 * required only in the head page and we will elevate the head
 	 * page->_count and tail page->_mapcount.
 	 *
 	 * We elevate page_tail->_mapcount for tail pages to force
 	 * page_tail->_count to be zero at all times to avoid getting
 	 * false positives from get_page_unless_zero() with
 	 * speculative page access (like in
 	 * page_cache_get_speculative()) on tail pages.
 	 */
 	VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page);
 	if (get_page_head)
 		atomic_inc(&page->first_page->_count);
 	get_huge_page_tail(page);
 }
 /*
  * This is meant to be called as the FOLL_GET operation of
  * follow_page() and it must be called while holding the proper PT
  * lock while the pte (or pmd_trans_huge) is still mapping the page.
  */
 static inline void get_page_foll(struct page *page)
 {
 	if (unlikely(PageTail(page)))
 		/*
 		 * This is safe only because
 		 * __split_huge_page_refcount() can't run under
 		 * get_page_foll() because we hold the proper PT lock.
 		 */
 		__get_page_tail_foll(page, true);
 	else {
 		/*
 		 * Getting a normal page or the head of a compound page
 		 * requires to already have an elevated page->_count.
 		 */
 		VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
 		atomic_inc(&page->_count);
 	}
 }
 extern unsigned long highest_memmap_pfn;
 /*
  * in mm/vmscan.c:
  */
 extern int isolate_lru_page(struct page *page);
 extern void putback_lru_page(struct page *page);
 extern bool zone_reclaimable(struct zone *zone);
 /*
  * in mm/rmap.c:
  */
 extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
 /*
  * in mm/page_alloc.c
  */
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned long order);
 #ifdef CONFIG_MEMORY_FAILURE
 extern bool is_free_buddy_page(struct page *page);
 #endif
 extern int user_min_free_kbytes;
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 /*
  * in mm/compaction.c
  */
 /*
  * compact_control is used to track pages being migrated and the free pages
  * they are being migrated to during memory compaction. The free_pfn starts
  * at the end of a zone and migrate_pfn begins at the start. Movable pages
  * are moved to the end of a zone during a compaction run and the run
  * completes when free_pfn <= migrate_pfn
  */
 struct compact_control {
 	struct list_head freepages;	/* List of free pages to migrate to */
 	struct list_head migratepages;	/* List of pages being migrated */
 	unsigned long nr_freepages;	/* Number of isolated free pages */
 	unsigned long nr_migratepages;	/* Number of pages to migrate */
 	unsigned long free_pfn;		/* isolate_freepages search base */
 	unsigned long migrate_pfn;	/* isolate_migratepages search base */
 	bool sync;			/* Synchronous migration */
 	bool ignore_skip_hint;		/* Scan blocks even if marked skip */
 	bool finished_update_free;	/* True when the zone cached pfns are
 					 * no longer being updated
 					 */
 	bool finished_update_migrate;
 	int order;			/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
 	bool contended;			/* True if a lock was contended */
 };
 unsigned long
 isolate_freepages_range(struct compact_control *cc,
 			unsigned long start_pfn, unsigned long end_pfn);
 unsigned long
 isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	unsigned long low_pfn, unsigned long end_pfn, bool unevictable);
 #endif
 /*
  * This function returns the order of a free page in the buddy system. In
  * general, page_zone(page)->lock must be held by the caller to prevent the
  * page from being allocated in parallel and returning garbage as the order.
  * If a caller does not hold page_zone(page)->lock, it must guarantee that the
  * page cannot be allocated or merged in parallel.
  */
 static inline unsigned long page_order(struct page *page)
 {
 	/* PageBuddy() must be checked by the caller */
 	return page_private(page);
 }
 /* mm/util.c */
 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev, struct rb_node *rb_parent);
 #ifdef CONFIG_MMU
 extern long __mlock_vma_pages_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end, int *nonblocking);
 extern void munlock_vma_pages_range(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end);
 static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
 {
 	munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
 }
 /*
  * Called only in fault path, to determine if a new page is being
  * mapped into a LOCKED vma.  If it is, mark page as mlocked.
  */
 static inline int mlocked_vma_newpage(struct vm_area_struct *vma,
 				    struct page *page)
 {
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
 		return 0;
 	if (!TestSetPageMlocked(page)) {
 		mod_zone_page_state(page_zone(page), NR_MLOCK,
 				    hpage_nr_pages(page));
 		count_vm_event(UNEVICTABLE_PGMLOCKED);
 	}
 	return 1;
 }
 /*
  * must be called with vma's mmap_sem held for read or write, and page locked.
  */
 extern void mlock_vma_page(struct page *page);
 extern unsigned int munlock_vma_page(struct page *page);
 /*
  * Clear the page's PageMlocked().  This can be useful in a situation where
  * we want to unconditionally remove a page from the pagecache -- e.g.,
  * on truncation or freeing.
  *
  * It is legal to call this function for any page, mlocked or not.
  * If called for a page that is still mapped by mlocked vmas, all we do
  * is revert to lazy LRU behaviour -- semantics are not broken.
  */
 extern void clear_page_mlock(struct page *page);
 /*
  * mlock_migrate_page - called only from migrate_page_copy() to
  * migrate the Mlocked page flag; update statistics.
  */
 static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 {
 	if (TestClearPageMlocked(page)) {
 		unsigned long flags;
 		int nr_pages = hpage_nr_pages(page);
 		local_irq_save(flags);
 		__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
 		SetPageMlocked(newpage);
 		__mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages);
 		local_irq_restore(flags);
 	}
 }
 extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern unsigned long vma_address(struct page *page,
 				 struct vm_area_struct *vma);
 #endif
 #else /* !CONFIG_MMU */
 static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p)
 {
 	return 0;
 }
 static inline void clear_page_mlock(struct page *page) { }
 static inline void mlock_vma_page(struct page *page) { }
 static inline void mlock_migrate_page(struct page *new, struct page *old) { }
 #endif /* !CONFIG_MMU */
 /*
  * Return the mem_map entry representing the 'offset' subpage within
  * the maximally aligned gigantic page 'base'.  Handle any discontiguity
  * in the mem_map at MAX_ORDER_NR_PAGES boundaries.
  */
 static inline struct page *mem_map_offset(struct page *base, int offset)
 {
 	if (unlikely(offset >= MAX_ORDER_NR_PAGES))
 		return pfn_to_page(page_to_pfn(base) + offset);
 	return base + offset;
 }
 /*
  * Iterator over all subpages within the maximally aligned gigantic
  * page 'base'.  Handle any discontiguity in the mem_map.
  */
 static inline struct page *mem_map_next(struct page *iter,
 						struct page *base, int offset)
 {
 	if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) {
 		unsigned long pfn = page_to_pfn(base) + offset;
 		if (!pfn_valid(pfn))
 			return NULL;
 		return pfn_to_page(pfn);
 	}
 	return iter + 1;
 }
 /*
  * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
  * so all functions starting at paging_init should be marked __init
  * in those cases. SPARSEMEM, however, allows for memory hotplug,
  * and alloc_bootmem_node is not used.
  */
 #ifdef CONFIG_SPARSEMEM
 #define __paginginit __meminit
 #else
 #define __paginginit __init
 #endif
 /* Memory initialisation debug and verification */
 enum mminit_level {
 	MMINIT_WARNING,
 	MMINIT_VERIFY,
 	MMINIT_TRACE
 };
 #ifdef CONFIG_DEBUG_MEMORY_INIT
 extern int mminit_loglevel;
 #define mminit_dprintk(level, prefix, fmt, arg...) \
 do { \
 	if (level < mminit_loglevel) { \
 		printk(level <= MMINIT_WARNING ? KERN_WARNING : KERN_DEBUG); \
 		printk(KERN_CONT "mminit::" prefix " " fmt, ##arg); \
 	} \
 } while (0)
 extern void mminit_verify_pageflags_layout(void);
 extern void mminit_verify_page_links(struct page *page,
 		enum zone_type zone, unsigned long nid, unsigned long pfn);
 extern void mminit_verify_zonelist(void);
 #else
 static inline void mminit_dprintk(enum mminit_level level,
 				const char *prefix, const char *fmt, ...)
 {
 }
 static inline void mminit_verify_pageflags_layout(void)
 {
 }
 static inline void mminit_verify_page_links(struct page *page,
 		enum zone_type zone, unsigned long nid, unsigned long pfn)
 {
 }
 static inline void mminit_verify_zonelist(void)
 {
 }
 #endif /* CONFIG_DEBUG_MEMORY_INIT */
 /* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
 #if defined(CONFIG_SPARSEMEM)
 extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 				unsigned long *end_pfn);
 #else
 static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 				unsigned long *end_pfn)
 {
 }
 #endif /* CONFIG_SPARSEMEM */
 #define ZONE_RECLAIM_NOSCAN	-2
 #define ZONE_RECLAIM_FULL	-1
 #define ZONE_RECLAIM_SOME	0
 #define ZONE_RECLAIM_SUCCESS	1
 extern int hwpoison_filter(struct page *p);
 extern u32 hwpoison_filter_dev_major;
 extern u32 hwpoison_filter_dev_minor;
 extern u64 hwpoison_filter_flags_mask;
 extern u64 hwpoison_filter_flags_value;
 extern u64 hwpoison_filter_memcg;
 extern u32 hwpoison_filter_enable;
 extern unsigned long vm_mmap_pgoff(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
 extern void set_pageblock_order(void);
 unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 					    struct list_head *page_list);
 /* The ALLOC_WMARK bits are used as an index to zone->watermark */
 #define ALLOC_WMARK_MIN		WMARK_MIN
 #define ALLOC_WMARK_LOW		WMARK_LOW
 #define ALLOC_WMARK_HIGH	WMARK_HIGH
 #define ALLOC_NO_WATERMARKS	0x04 /* don't check watermarks at all */
 /* Mask to get the watermark bits */
 #define ALLOC_WMARK_MASK	(ALLOC_NO_WATERMARKS-1)
 #define ALLOC_HARDER		0x10 /* try to alloc harder */
 #define ALLOC_HIGH		0x20 /* __GFP_HIGH set */
 #define ALLOC_CPUSET		0x40 /* check for correct cpuset */
 #define ALLOC_CMA		0x80 /* allow allocations from CMA areas */
 #define ALLOC_FAIR		0x100 /* fair zone allocation */
 #endif	/* __MM_INTERNAL_H */

 /*
  * mm/readahead.c - address_space-level file readahead.
  *
  * Copyright (C) 2002, Linus Torvalds
  *
  * 09Apr2002	Andrew Morton
  *		Initial version.
  */
 #include <linux/kernel.h>
-#include <linux/fs.h>
 #include <linux/gfp.h>
-#include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 #include <linux/file.h>
+#include "internal.h"
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.
  */
 void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
 {
 	ra->ra_pages = mapping->backing_dev_info->ra_pages;
 	ra->prev_pos = -1;
 }
 EXPORT_SYMBOL_GPL(file_ra_state_init);
 #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
 /*
  * see if a page needs releasing upon read_cache_pages() failure
  * - the caller of read_cache_pages() may have set PG_private or PG_fscache
  *   before calling, such as the NFS fs marking pages that are cached locally
  *   on disk, thus we need to give the fs a chance to clean up in the event of
  *   an error
  */
 static void read_cache_pages_invalidate_page(struct address_space *mapping,
 					     struct page *page)
 {
 	if (page_has_private(page)) {
 		if (!trylock_page(page))
 			BUG();
 		page->mapping = mapping;
 		do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
 		page->mapping = NULL;
 		unlock_page(page);
 	}
 	page_cache_release(page);
 }
 /*
  * release a list of pages, invalidating them first if need be
  */
 static void read_cache_pages_invalidate_pages(struct address_space *mapping,
 					      struct list_head *pages)
 {
 	struct page *victim;
 	while (!list_empty(pages)) {
 		victim = list_to_page(pages);
 		list_del(&victim->lru);
 		read_cache_pages_invalidate_page(mapping, victim);
 	}
 }
 /**
  * read_cache_pages - populate an address space with some pages & start reads against them
  * @mapping: the address_space
  * @pages: The address of a list_head which contains the target pages.  These
  *   pages have their ->index populated and are otherwise uninitialised.
  * @filler: callback routine for filling a single page.
  * @data: private data for the callback routine.
  *
  * Hides the details of the LRU cache etc from the filesystems.
  */
 int read_cache_pages(struct address_space *mapping, struct list_head *pages,
 			int (*filler)(void *, struct page *), void *data)
 {
 	struct page *page;
 	int ret = 0;
 	while (!list_empty(pages)) {
 		page = list_to_page(pages);
 		list_del(&page->lru);
 		if (add_to_page_cache_lru(page, mapping,
 					page->index, GFP_KERNEL)) {
 			read_cache_pages_invalidate_page(mapping, page);
 			continue;
 		}
 		page_cache_release(page);
 		ret = filler(data, page);
 		if (unlikely(ret)) {
 			read_cache_pages_invalidate_pages(mapping, pages);
 			break;
 		}
 		task_io_account_read(PAGE_CACHE_SIZE);
 	}
 	return ret;
 }
 EXPORT_SYMBOL(read_cache_pages);
 static int read_pages(struct address_space *mapping, struct file *filp,
 		struct list_head *pages, unsigned nr_pages)
 {
 	struct blk_plug plug;
 	unsigned page_idx;
 	int ret;
 	blk_start_plug(&plug);
 	if (mapping->a_ops->readpages) {
 		ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
 		/* Clean up the remaining pages */
 		put_pages_list(pages);
 		goto out;
 	}
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_to_page(pages);
 		list_del(&page->lru);
 		if (!add_to_page_cache_lru(page, mapping,
 					page->index, GFP_KERNEL)) {
 			mapping->a_ops->readpage(filp, page);
 		}
 		page_cache_release(page);
 	}
 	ret = 0;
 out:
 	blk_finish_plug(&plug);
 	return ret;
 }
 /*
  * __do_page_cache_readahead() actually reads a chunk of disk.  It allocates all
  * the pages first, then submits them all for I/O. This avoids the very bad
  * behaviour which would occur if page allocations are causing VM writeback.
  * We really don't want to intermingle reads and writes like that.
  *
  * Returns the number of pages requested, or the maximum amount of I/O allowed.
  */
-static int
+int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
-__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read,
 			unsigned long lookahead_size)
 {
 	struct inode *inode = mapping->host;
 	struct page *page;
 	unsigned long end_index;	/* The last page we want to read */
 	LIST_HEAD(page_pool);
 	int page_idx;
 	int ret = 0;
 	loff_t isize = i_size_read(inode);
 	if (isize == 0)
 		goto out;
 	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
 	/*
 	 * Preallocate as many pages as we will need.
 	 */
 	for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
 		pgoff_t page_offset = offset + page_idx;
 		if (page_offset > end_index)
 			break;
 		rcu_read_lock();
 		page = radix_tree_lookup(&mapping->page_tree, page_offset);
 		rcu_read_unlock();
 		if (page && !radix_tree_exceptional_entry(page))
 			continue;
 		page = page_cache_alloc_readahead(mapping);
 		if (!page)
 			break;
 		page->index = page_offset;
 		list_add(&page->lru, &page_pool);
 		if (page_idx == nr_to_read - lookahead_size)
 			SetPageReadahead(page);
 		ret++;
 	}
 	/*
 	 * Now start the IO.  We ignore I/O errors - if the page is not
 	 * uptodate then the caller will launch readpage again, and
 	 * will then handle the error.
 	 */
 	if (ret)
 		read_pages(mapping, filp, &page_pool, ret);
 	BUG_ON(!list_empty(&page_pool));
 out:
 	return ret;
 }
 /*
  * Chunk the readahead into 2 megabyte units, so that we don't pin too much
  * memory at once.
  */
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 		pgoff_t offset, unsigned long nr_to_read)
 {
 	if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
 		return -EINVAL;
 	nr_to_read = max_sane_readahead(nr_to_read);
 	while (nr_to_read) {
 		int err;
 		unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
 		if (this_chunk > nr_to_read)
 			this_chunk = nr_to_read;
 		err = __do_page_cache_readahead(mapping, filp,
 						offset, this_chunk, 0);
 		if (err < 0)
 			return err;
 		offset += this_chunk;
 		nr_to_read -= this_chunk;
 	}
 	return 0;
 }
 #define MAX_READAHEAD   ((512*4096)/PAGE_CACHE_SIZE)
 /*
  * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a
  * sensible upper limit.
  */
 unsigned long max_sane_readahead(unsigned long nr)
 {
 	return min(nr, MAX_READAHEAD);
-}
-/*
- * Submit IO for the read-ahead request in file_ra_state.
- */
-unsigned long ra_submit(struct file_ra_state *ra,
-		       struct address_space *mapping, struct file *filp)
-{
-	int actual;
-	actual = __do_page_cache_readahead(mapping, filp,
-					ra->start, ra->size, ra->async_size);
-	return actual;
 }
 /*
  * Set the initial window size, round to next power of 2 and square
  * for small size, x 4 for medium, and x 2 for large
  * for 128k (32 page) max ra
  * 1-8 page = 32k initial, > 8 page = 128k initial
  */
 static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
 {
 	unsigned long newsize = roundup_pow_of_two(size);
 	if (newsize <= max / 32)
 		newsize = newsize * 4;
 	else if (newsize <= max / 4)
 		newsize = newsize * 2;
 	else
 		newsize = max;
 	return newsize;
 }
 /*
  *  Get the previous window size, ramp it up, and
  *  return it as the new window size.
  */
 static unsigned long get_next_ra_size(struct file_ra_state *ra,
 						unsigned long max)
 {
 	unsigned long cur = ra->size;
 	unsigned long newsize;
 	if (cur < max / 16)
 		newsize = 4 * cur;
 	else
 		newsize = 2 * cur;
 	return min(newsize, max);
 }
 /*
  * On-demand readahead design.
  *
  * The fields in struct file_ra_state represent the most-recently-executed
  * readahead attempt:
  *
  *                        |<----- async_size ---------|
  *     |------------------- size -------------------->|
  *     |==================#===========================|
  *     ^start             ^page marked with PG_readahead
  *
  * To overlap application thinking time and disk I/O time, we do
  * `readahead pipelining': Do not wait until the application consumed all
  * readahead pages and stalled on the missing page at readahead_index;
  * Instead, submit an asynchronous readahead I/O as soon as there are
  * only async_size pages left in the readahead window. Normally async_size
  * will be equal to size, for maximum pipelining.
  *
  * In interleaved sequential reads, concurrent streams on the same fd can
  * be invalidating each other's readahead state. So we flag the new readahead
  * page at (start+size-async_size) with PG_readahead, and use it as readahead
  * indicator. The flag won't be set on already cached pages, to avoid the
  * readahead-for-nothing fuss, saving pointless page cache lookups.
  *
  * prev_pos tracks the last visited byte in the _previous_ read request.
  * It should be maintained by the caller, and will be used for detecting
  * small random reads. Note that the readahead algorithm checks loosely
  * for sequential patterns. Hence interleaved reads might be served as
  * sequential ones.
  *
  * There is a special-case: if the first page which the application tries to
  * read happens to be the first page of the file, it is assumed that a linear
  * read is about to happen and the window is immediately set to the initial size
  * based on I/O request size and the max_readahead.
  *
  * The code ramps up the readahead size aggressively at first, but slow down as
  * it approaches max_readhead.
  */
 /*
  * Count contiguously cached pages from @offset-1 to @offset-@max,
  * this count is a conservative estimation of
  * 	- length of the sequential read sequence, or
  * 	- thrashing threshold in memory tight systems
  */
 static pgoff_t count_history_pages(struct address_space *mapping,
 				   struct file_ra_state *ra,
 				   pgoff_t offset, unsigned long max)
 {
 	pgoff_t head;
 	rcu_read_lock();
 	head = page_cache_prev_hole(mapping, offset - 1, max);
 	rcu_read_unlock();
 	return offset - 1 - head;
 }
 /*
  * page cache context based read-ahead
  */
 static int try_context_readahead(struct address_space *mapping,
 				 struct file_ra_state *ra,
 				 pgoff_t offset,
 				 unsigned long req_size,
 				 unsigned long max)
 {
 	pgoff_t size;
 	size = count_history_pages(mapping, ra, offset, max);
 	/*
 	 * not enough history pages:
 	 * it could be a random read
 	 */
 	if (size <= req_size)
 		return 0;
 	/*
 	 * starts from beginning of file:
 	 * it is a strong indication of long-run stream (or whole-file-read)
 	 */
 	if (size >= offset)
 		size *= 2;
 	ra->start = offset;
 	ra->size = min(size + req_size, max);
 	ra->async_size = 1;
 	return 1;
 }
 /*
  * A minimal readahead algorithm for trivial sequential/random reads.
  */
 static unsigned long
 ondemand_readahead(struct address_space *mapping,
 		   struct file_ra_state *ra, struct file *filp,
 		   bool hit_readahead_marker, pgoff_t offset,
 		   unsigned long req_size)
 {
 	unsigned long max = max_sane_readahead(ra->ra_pages);
 	pgoff_t prev_offset;
 	/*
 	 * start of file
 	 */
 	if (!offset)
 		goto initial_readahead;
 	/*
 	 * It's the expected callback offset, assume sequential access.
 	 * Ramp up sizes, and push forward the readahead window.
 	 */
 	if ((offset == (ra->start + ra->size - ra->async_size) ||
 	     offset == (ra->start + ra->size))) {
 		ra->start += ra->size;
 		ra->size = get_next_ra_size(ra, max);
 		ra->async_size = ra->size;
 		goto readit;
 	}
 	/*
 	 * Hit a marked page without valid readahead state.
 	 * E.g. interleaved reads.
 	 * Query the pagecache for async_size, which normally equals to
 	 * readahead size. Ramp it up and use it as the new readahead size.
 	 */
 	if (hit_readahead_marker) {
 		pgoff_t start;
 		rcu_read_lock();
 		start = page_cache_next_hole(mapping, offset + 1, max);
 		rcu_read_unlock();
 		if (!start || start - offset > max)
 			return 0;
 		ra->start = start;
 		ra->size = start - offset;	/* old async_size */
 		ra->size += req_size;
 		ra->size = get_next_ra_size(ra, max);
 		ra->async_size = ra->size;
 		goto readit;
 	}
 	/*
 	 * oversize read
 	 */
 	if (req_size > max)
 		goto initial_readahead;
 	/*
 	 * sequential cache miss
 	 * trivial case: (offset - prev_offset) == 1
 	 * unaligned reads: (offset - prev_offset) == 0
 	 */
 	prev_offset = (unsigned long long)ra->prev_pos >> PAGE_CACHE_SHIFT;
 	if (offset - prev_offset <= 1UL)
 		goto initial_readahead;
 	/*
 	 * Query the page cache and look for the traces(cached history pages)
 	 * that a sequential stream would leave behind.
 	 */
 	if (try_context_readahead(mapping, ra, offset, req_size, max))
 		goto readit;
 	/*
 	 * standalone, small random read
 	 * Read as is, and do not pollute the readahead state.
 	 */
 	return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
 initial_readahead:
 	ra->start = offset;
 	ra->size = get_init_ra_size(req_size, max);
 	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
 readit:
 	/*
 	 * Will this read hit the readahead marker made by itself?
 	 * If so, trigger the readahead marker hit now, and merge
 	 * the resulted next readahead window into the current one.
 	 */
 	if (offset == ra->start && ra->size == ra->async_size) {
 		ra->async_size = get_next_ra_size(ra, max);
 		ra->size += ra->async_size;
 	}
 	return ra_submit(ra, mapping, filp);
 }
 /**
  * page_cache_sync_readahead - generic file readahead
  * @mapping: address_space which holds the pagecache and I/O vectors
  * @ra: file_ra_state which holds the readahead state
  * @filp: passed on to ->readpage() and ->readpages()
  * @offset: start offset into @mapping, in pagecache page-sized units
  * @req_size: hint: total size of the read which the caller is performing in
  *            pagecache pages
  *
  * page_cache_sync_readahead() should be called when a cache miss happened:
  * it will submit the read.  The readahead logic may decide to piggyback more
  * pages onto the read request if access patterns suggest it will improve
  * performance.
  */
 void page_cache_sync_readahead(struct address_space *mapping,
 			       struct file_ra_state *ra, struct file *filp,
 			       pgoff_t offset, unsigned long req_size)
 {
 	/* no read-ahead */
 	if (!ra->ra_pages)
 		return;
 	/* be dumb */
 	if (filp && (filp->f_mode & FMODE_RANDOM)) {
 		force_page_cache_readahead(mapping, filp, offset, req_size);
 		return;
 	}
 	/* do read-ahead */
 	ondemand_readahead(mapping, ra, filp, false, offset, req_size);
 }
 EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
 /**
  * page_cache_async_readahead - file readahead for marked pages
  * @mapping: address_space which holds the pagecache and I/O vectors
  * @ra: file_ra_state which holds the readahead state
  * @filp: passed on to ->readpage() and ->readpages()
  * @page: the page at @offset which has the PG_readahead flag set
  * @offset: start offset into @mapping, in pagecache page-sized units
  * @req_size: hint: total size of the read which the caller is performing in
  *            pagecache pages
  *
  * page_cache_async_readahead() should be called when a page is used which
  * has the PG_readahead flag; this is a marker to suggest that the application
  * has used up enough of the readahead window that we should start pulling in
  * more pages.
  */
 void
 page_cache_async_readahead(struct address_space *mapping,
 			   struct file_ra_state *ra, struct file *filp,
 			   struct page *page, pgoff_t offset,
 			   unsigned long req_size)
 {
 	/* no read-ahead */
 	if (!ra->ra_pages)
 		return;
 	/*
 	 * Same bit is used for PG_readahead and PG_reclaim.
 	 */
 	if (PageWriteback(page))
 		return;
 	ClearPageReadahead(page);
 	/*
 	 * Defer asynchronous read-ahead on IO congestion.
 	 */
 	if (bdi_read_congested(mapping->backing_dev_info))
 		return;
 	/* do read-ahead */
 	ondemand_readahead(mapping, ra, filp, true, offset, req_size);
 }
 EXPORT_SYMBOL_GPL(page_cache_async_readahead);
 static ssize_t
 do_readahead(struct address_space *mapping, struct file *filp,
 	     pgoff_t index, unsigned long nr)
 {
 	if (!mapping || !mapping->a_ops)
 		return -EINVAL;
 	return force_page_cache_readahead(mapping, filp, index, nr);
 }
 SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
 {
 	ssize_t ret;
 	struct fd f;
 	ret = -EBADF;
 	f = fdget(fd);
 	if (f.file) {
 		if (f.file->f_mode & FMODE_READ) {
 			struct address_space *mapping = f.file->f_mapping;
 			pgoff_t start = offset >> PAGE_CACHE_SHIFT;
 			pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
 			unsigned long len = end - start + 1;
 			ret = do_readahead(mapping, f.file, start, len);
 		}
 		fdput(f);
 	}
 	return ret;