Eric Lee / smarc-ti-linux-kernel

1

/*

1

/*

2

* linux/mm/swap.c

2

* linux/mm/swap.c

3

*

3

*

4

5

*/

5

*/

6

7

/*

7

/*

8

* This file contains the default values for the operation of the

8

* This file contains the default values for the operation of the

9

* Linux VM subsystem. Fine-tuning documentation can be found in

9

* Linux VM subsystem. Fine-tuning documentation can be found in

10

* Documentation/sysctl/vm.txt.

10

* Documentation/sysctl/vm.txt.

11

* Started 18.12.91

11

* Started 18.12.91

12

* Swap aging added 23.2.95, Stephen Tweedie.

12

* Swap aging added 23.2.95, Stephen Tweedie.

13

* Buffermem limits added 12.3.98, Rik van Riel.

13

* Buffermem limits added 12.3.98, Rik van Riel.

14

*/

14

*/

15

16

#include <linux/mm.h>

16

#include <linux/mm.h>

17

#include <linux/sched.h>

17

#include <linux/sched.h>

18

#include <linux/kernel_stat.h>

18

#include <linux/kernel_stat.h>

19

#include <linux/swap.h>

19

#include <linux/swap.h>

20

#include <linux/mman.h>

20

#include <linux/mman.h>

21

#include <linux/pagemap.h>

21

#include <linux/pagemap.h>

22

#include <linux/pagevec.h>

22

#include <linux/pagevec.h>

23

#include <linux/init.h>

23

#include <linux/init.h>

24

#include <linux/export.h>

24

#include <linux/export.h>

25

#include <linux/mm_inline.h>

25

#include <linux/mm_inline.h>

26

#include <linux/percpu_counter.h>

26

#include <linux/percpu_counter.h>

27

#include <linux/percpu.h>

27

#include <linux/percpu.h>

28

#include <linux/cpu.h>

28

#include <linux/cpu.h>

29

#include <linux/notifier.h>

29

#include <linux/notifier.h>

30

#include <linux/backing-dev.h>

30

#include <linux/backing-dev.h>

31

#include <linux/memcontrol.h>

31

#include <linux/memcontrol.h>

32

#include <linux/gfp.h>

32

#include <linux/gfp.h>

33

#include <linux/uio.h>

33

#include <linux/uio.h>

34

#include <linux/hugetlb.h>

34

#include <linux/hugetlb.h>

35

36

#include "internal.h"

36

#include "internal.h"

37

38

#define CREATE_TRACE_POINTS

38

#define CREATE_TRACE_POINTS

39

#include <trace/events/pagemap.h>

39

#include <trace/events/pagemap.h>

40

41

/* How many pages do we try to swap or page in/out together? */

41

/* How many pages do we try to swap or page in/out together? */

42

int page_cluster;

42

int page_cluster;

43

44

static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);

44

static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);

45

static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);

45

static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);

46

static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);

46

static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);

47

48

/*

48

/*

49

* This path almost never happens for VM activity - pages are normally

49

* This path almost never happens for VM activity - pages are normally

50

* freed via pagevecs. But it gets used by networking.

50

* freed via pagevecs. But it gets used by networking.

51

*/

51

*/

52

static void __page_cache_release(struct page *page)

52

static void __page_cache_release(struct page *page)

53

{

53

{

54

if (PageLRU(page)) {

54

if (PageLRU(page)) {

55

struct zone *zone = page_zone(page);

55

struct zone *zone = page_zone(page);

56

struct lruvec *lruvec;

56

struct lruvec *lruvec;

57

unsigned long flags;

57

unsigned long flags;

58

59

spin_lock_irqsave(&zone->lru_lock, flags);

59

spin_lock_irqsave(&zone->lru_lock, flags);

60

lruvec = mem_cgroup_page_lruvec(page, zone);

60

lruvec = mem_cgroup_page_lruvec(page, zone);

61

VM_BUG_ON(!PageLRU(page));

61

VM_BUG_ON(!PageLRU(page));

62

__ClearPageLRU(page);

62

__ClearPageLRU(page);

63

del_page_from_lru_list(page, lruvec, page_off_lru(page));

63

del_page_from_lru_list(page, lruvec, page_off_lru(page));

64

spin_unlock_irqrestore(&zone->lru_lock, flags);

64

spin_unlock_irqrestore(&zone->lru_lock, flags);

65

}

65

}

66

}

66

}

67

68

static void __put_single_page(struct page *page)

68

static void __put_single_page(struct page *page)

69

{

69

{

70

__page_cache_release(page);

70

__page_cache_release(page);

71

free_hot_cold_page(page, false);

71

free_hot_cold_page(page, false);

72

}

72

}

73

74

static void __put_compound_page(struct page *page)

74

static void __put_compound_page(struct page *page)

75

{

75

{

76

compound_page_dtor *dtor;

76

compound_page_dtor *dtor;

77

78

__page_cache_release(page);

78

__page_cache_release(page);

79

dtor = get_compound_page_dtor(page);

79

dtor = get_compound_page_dtor(page);

80

(*dtor)(page);

80

(*dtor)(page);

81

}

81

}

82

83

static void put_compound_page(struct page *page)

83

static void put_compound_page(struct page *page)

84

{

84

{

85

if (unlikely(PageTail(page))) {

85

if (unlikely(PageTail(page))) {

86

/* __split_huge_page_refcount can run under us */

86

/* __split_huge_page_refcount can run under us */

87

struct page *page_head = compound_head(page);

87

struct page *page_head = compound_head(page);

88

89

if (likely(page != page_head &&

89

if (likely(page != page_head &&

90

get_page_unless_zero(page_head))) {

90

get_page_unless_zero(page_head))) {

91

unsigned long flags;

91

unsigned long flags;

92

93

/*

93

/*

94

* THP can not break up slab pages so avoid taking

94

* THP can not break up slab pages so avoid taking

95

* compound_lock(). Slab performs non-atomic bit ops

95

* compound_lock(). Slab performs non-atomic bit ops

96

* on page->flags for better performance. In particular

96

* on page->flags for better performance. In particular

97

* slab_unlock() in slub used to be a hot path. It is

97

* slab_unlock() in slub used to be a hot path. It is

98

* still hot on arches that do not support

98

* still hot on arches that do not support

99

* this_cpu_cmpxchg_double().

99

* this_cpu_cmpxchg_double().

100

*/

100

*/

101

if (PageSlab(page_head) || PageHeadHuge(page_head)) {

101

if (PageSlab(page_head) || PageHeadHuge(page_head)) {

102

if (likely(PageTail(page))) {

102

if (likely(PageTail(page))) {

103

/*

103

/*

104

* __split_huge_page_refcount

104

* __split_huge_page_refcount

105

* cannot race here.

105

* cannot race here.

106

*/

106

*/

107

VM_BUG_ON(!PageHead(page_head));

107

VM_BUG_ON(!PageHead(page_head));

108

atomic_dec(&page->_mapcount);

108

atomic_dec(&page->_mapcount);

109

if (put_page_testzero(page_head))

109

if (put_page_testzero(page_head))

110

VM_BUG_ON(1);

110

VM_BUG_ON(1);

111

if (put_page_testzero(page_head))

111

if (put_page_testzero(page_head))

112

__put_compound_page(page_head);

112

__put_compound_page(page_head);

113

return;

113

return;

114

} else

114

} else

115

/*

115

/*

116

* __split_huge_page_refcount

116

* __split_huge_page_refcount

117

* run before us, "page" was a

117

* run before us, "page" was a

118

* THP tail. The split

118

* THP tail. The split

119

* page_head has been freed

119

* page_head has been freed

120

* and reallocated as slab or

120

* and reallocated as slab or

121

* hugetlbfs page of smaller

121

* hugetlbfs page of smaller

122

* order (only possible if

122

* order (only possible if

123

* reallocated as slab on

123

* reallocated as slab on

124

* x86).

124

* x86).

125

*/

125

*/

126

goto skip_lock;

126

goto skip_lock;

127

}

127

}

128

/*

128

/*

129

* page_head wasn't a dangling pointer but it

129

* page_head wasn't a dangling pointer but it

130

* may not be a head page anymore by the time

130

* may not be a head page anymore by the time

131

* we obtain the lock. That is ok as long as it

131

* we obtain the lock. That is ok as long as it

132

* can't be freed from under us.

132

* can't be freed from under us.

133

*/

133

*/

134

flags = compound_lock_irqsave(page_head);

134

flags = compound_lock_irqsave(page_head);

135

if (unlikely(!PageTail(page))) {

135

if (unlikely(!PageTail(page))) {

136

/* __split_huge_page_refcount run before us */

136

/* __split_huge_page_refcount run before us */

137

compound_unlock_irqrestore(page_head, flags);

137

compound_unlock_irqrestore(page_head, flags);

138

skip_lock:

138

skip_lock:

139

if (put_page_testzero(page_head)) {

139

if (put_page_testzero(page_head)) {

140

/*

140

/*

141

* The head page may have been

141

* The head page may have been

142

* freed and reallocated as a

142

* freed and reallocated as a

143

* compound page of smaller

143

* compound page of smaller

144

* order and then freed again.

144

* order and then freed again.

145

* All we know is that it

145

* All we know is that it

146

* cannot have become: a THP

146

* cannot have become: a THP

147

* page, a compound page of

147

* page, a compound page of

148

* higher order, a tail page.

148

* higher order, a tail page.

149

* That is because we still

149

* That is because we still

150

* hold the refcount of the

150

* hold the refcount of the

151

* split THP tail and

151

* split THP tail and

152

* page_head was the THP head

152

* page_head was the THP head

153

* before the split.

153

* before the split.

154

*/

154

*/

155

if (PageHead(page_head))

155

if (PageHead(page_head))

156

__put_compound_page(page_head);

156

__put_compound_page(page_head);

157

else

157

else

158

__put_single_page(page_head);

158

__put_single_page(page_head);

159

}

159

}

160

out_put_single:

160

out_put_single:

161

if (put_page_testzero(page))

161

if (put_page_testzero(page))

162

__put_single_page(page);

162

__put_single_page(page);

163

return;

163

return;

164

}

164

}

165

VM_BUG_ON(page_head != page->first_page);

165

VM_BUG_ON(page_head != page->first_page);

166

/*

166

/*

167

* We can release the refcount taken by

167

* We can release the refcount taken by

168

* get_page_unless_zero() now that

168

* get_page_unless_zero() now that

169

* __split_huge_page_refcount() is blocked on

169

* __split_huge_page_refcount() is blocked on

170

* the compound_lock.

170

* the compound_lock.

171

*/

171

*/

172

if (put_page_testzero(page_head))

172

if (put_page_testzero(page_head))

173

VM_BUG_ON(1);

173

VM_BUG_ON(1);

174

/* __split_huge_page_refcount will wait now */

174

/* __split_huge_page_refcount will wait now */

175

VM_BUG_ON(page_mapcount(page) <= 0);

175

VM_BUG_ON(page_mapcount(page) <= 0);

176

atomic_dec(&page->_mapcount);

176

atomic_dec(&page->_mapcount);

177

VM_BUG_ON(atomic_read(&page_head->_count) <= 0);

177

VM_BUG_ON(atomic_read(&page_head->_count) <= 0);

178

VM_BUG_ON(atomic_read(&page->_count) != 0);

178

VM_BUG_ON(atomic_read(&page->_count) != 0);

179

compound_unlock_irqrestore(page_head, flags);

179

compound_unlock_irqrestore(page_head, flags);

180

181

if (put_page_testzero(page_head)) {

181

if (put_page_testzero(page_head)) {

182

if (PageHead(page_head))

182

if (PageHead(page_head))

183

__put_compound_page(page_head);

183

__put_compound_page(page_head);

184

else

184

else

185

__put_single_page(page_head);

185

__put_single_page(page_head);

186

}

186

}

187

} else {

187

} else {

188

/* page_head is a dangling pointer */

188

/* page_head is a dangling pointer */

189

VM_BUG_ON(PageTail(page));

189

VM_BUG_ON(PageTail(page));

190

goto out_put_single;

190

goto out_put_single;

191

}

191

}

192

} else if (put_page_testzero(page)) {

192

} else if (put_page_testzero(page)) {

193

if (PageHead(page))

193

if (PageHead(page))

194

__put_compound_page(page);

194

__put_compound_page(page);

195

else

195

else

196

__put_single_page(page);

196

__put_single_page(page);

197

}

197

}

198

}

198

}

199

200

void put_page(struct page *page)

200

void put_page(struct page *page)

201

{

201

{

202

if (unlikely(PageCompound(page)))

202

if (unlikely(PageCompound(page)))

203

put_compound_page(page);

203

put_compound_page(page);

204

else if (put_page_testzero(page))

204

else if (put_page_testzero(page))

205

__put_single_page(page);

205

__put_single_page(page);

206

}

206

}

207

EXPORT_SYMBOL(put_page);

207

EXPORT_SYMBOL(put_page);

208

209

/*

209

/*

210

* This function is exported but must not be called by anything other

210

* This function is exported but must not be called by anything other

211

* than get_page(). It implements the slow path of get_page().

211

* than get_page(). It implements the slow path of get_page().

212

*/

212

*/

213

bool __get_page_tail(struct page *page)

213

bool __get_page_tail(struct page *page)

214

{

214

{

215

/*

215

/*

216

* This takes care of get_page() if run on a tail page

216

* This takes care of get_page() if run on a tail page

217

* returned by one of the get_user_pages/follow_page variants.

217

* returned by one of the get_user_pages/follow_page variants.

218

* get_user_pages/follow_page itself doesn't need the compound

218

* get_user_pages/follow_page itself doesn't need the compound

219

* lock because it runs __get_page_tail_foll() under the

219

* lock because it runs __get_page_tail_foll() under the

220

* proper PT lock that already serializes against

220

* proper PT lock that already serializes against

221

* split_huge_page().

221

* split_huge_page().

222

*/

222

*/

223

unsigned long flags;

223

unsigned long flags;

224

bool got = false;

224

bool got = false;

225

struct page *page_head = compound_head(page);

225

struct page *page_head = compound_head(page);

226

227

if (likely(page != page_head && get_page_unless_zero(page_head))) {

227

if (likely(page != page_head && get_page_unless_zero(page_head))) {

228

/* Ref to put_compound_page() comment. */

228

/* Ref to put_compound_page() comment. */

229

if (PageSlab(page_head) || PageHeadHuge(page_head)) {

229

if (PageSlab(page_head) || PageHeadHuge(page_head)) {

230

if (likely(PageTail(page))) {

230

if (likely(PageTail(page))) {

231

/*

231

/*

232

* This is a hugetlbfs page or a slab

232

* This is a hugetlbfs page or a slab

233

* page. __split_huge_page_refcount

233

* page. __split_huge_page_refcount

234

* cannot race here.

234

* cannot race here.

235

*/

235

*/

236

VM_BUG_ON(!PageHead(page_head));

236

VM_BUG_ON(!PageHead(page_head));

237

__get_page_tail_foll(page, false);

237

__get_page_tail_foll(page, false);

238

return true;

238

return true;

239

} else {

239

} else {

240

/*

240

/*

241

* __split_huge_page_refcount run

241

* __split_huge_page_refcount run

242

* before us, "page" was a THP

242

* before us, "page" was a THP

243

* tail. The split page_head has been

243

* tail. The split page_head has been

244

* freed and reallocated as slab or

244

* freed and reallocated as slab or

245

* hugetlbfs page of smaller order

245

* hugetlbfs page of smaller order

246

* (only possible if reallocated as

246

* (only possible if reallocated as

247

* slab on x86).

247

* slab on x86).

248

*/

248

*/

249

put_page(page_head);

249

put_page(page_head);

250

return false;

250

return false;

251

}

251

}

252

}

252

}

253

254

/*

254

/*

255

* page_head wasn't a dangling pointer but it

255

* page_head wasn't a dangling pointer but it

256

* may not be a head page anymore by the time

256

* may not be a head page anymore by the time

257

* we obtain the lock. That is ok as long as it

257

* we obtain the lock. That is ok as long as it

258

* can't be freed from under us.

258

* can't be freed from under us.

259

*/

259

*/

260

flags = compound_lock_irqsave(page_head);

260

flags = compound_lock_irqsave(page_head);

261

/* here __split_huge_page_refcount won't run anymore */

261

/* here __split_huge_page_refcount won't run anymore */

262

if (likely(PageTail(page))) {

262

if (likely(PageTail(page))) {

263

__get_page_tail_foll(page, false);

263

__get_page_tail_foll(page, false);

264

got = true;

264

got = true;

265

}

265

}

266

compound_unlock_irqrestore(page_head, flags);

266

compound_unlock_irqrestore(page_head, flags);

267

if (unlikely(!got))

267

if (unlikely(!got))

268

put_page(page_head);

268

put_page(page_head);

269

}

269

}

270

return got;

270

return got;

271

}

271

}

272

EXPORT_SYMBOL(__get_page_tail);

272

EXPORT_SYMBOL(__get_page_tail);

273

274

/**

274

/**

275

* put_pages_list() - release a list of pages

275

* put_pages_list() - release a list of pages

276

* @pages: list of pages threaded on page->lru

276

* @pages: list of pages threaded on page->lru

277

*

277

*

278

* Release a list of pages which are strung together on page.lru. Currently

278

* Release a list of pages which are strung together on page.lru. Currently

279

* used by read_cache_pages() and related error recovery code.

279

* used by read_cache_pages() and related error recovery code.

280

*/

280

*/

281

void put_pages_list(struct list_head *pages)

281

void put_pages_list(struct list_head *pages)

282

{

282

{

283

while (!list_empty(pages)) {

283

while (!list_empty(pages)) {

284

struct page *victim;

284

struct page *victim;

285

286

victim = list_entry(pages->prev, struct page, lru);

286

victim = list_entry(pages->prev, struct page, lru);

287

list_del(&victim->lru);

287

list_del(&victim->lru);

288

page_cache_release(victim);

288

page_cache_release(victim);

289

}

289

}

290

}

290

}

291

EXPORT_SYMBOL(put_pages_list);

291

EXPORT_SYMBOL(put_pages_list);

292

293

/*

293

/*

294

* get_kernel_pages() - pin kernel pages in memory

294

* get_kernel_pages() - pin kernel pages in memory

295

* @kiov: An array of struct kvec structures

295

* @kiov: An array of struct kvec structures

296

* @nr_segs: number of segments to pin

296

* @nr_segs: number of segments to pin

297

* @write: pinning for read/write, currently ignored

297

* @write: pinning for read/write, currently ignored

298

* @pages: array that receives pointers to the pages pinned.

298

* @pages: array that receives pointers to the pages pinned.

299

* Should be at least nr_segs long.

299

* Should be at least nr_segs long.

300

*

300

*

301

* Returns number of pages pinned. This may be fewer than the number

301

* Returns number of pages pinned. This may be fewer than the number

302

* requested. If nr_pages is 0 or negative, returns 0. If no pages

302

* requested. If nr_pages is 0 or negative, returns 0. If no pages

303

* were pinned, returns -errno. Each page returned must be released

303

* were pinned, returns -errno. Each page returned must be released

304

* with a put_page() call when it is finished with.

304

* with a put_page() call when it is finished with.

305

*/

305

*/

306

int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,

306

int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,

307

struct page **pages)

307

struct page **pages)

308

{

308

{

309

int seg;

309

int seg;

310

311

for (seg = 0; seg < nr_segs; seg++) {

311

for (seg = 0; seg < nr_segs; seg++) {

312

if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))

312

if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))

313

return seg;

313

return seg;

314

315

pages[seg] = kmap_to_page(kiov[seg].iov_base);

315

pages[seg] = kmap_to_page(kiov[seg].iov_base);

316

page_cache_get(pages[seg]);

316

page_cache_get(pages[seg]);

317

}

317

}

318

319

return seg;

319

return seg;

320

}

320

}

321

EXPORT_SYMBOL_GPL(get_kernel_pages);

321

EXPORT_SYMBOL_GPL(get_kernel_pages);

322

323

/*

323

/*

324

* get_kernel_page() - pin a kernel page in memory

324

* get_kernel_page() - pin a kernel page in memory

325

* @start: starting kernel address

325

* @start: starting kernel address

326

* @write: pinning for read/write, currently ignored

326

* @write: pinning for read/write, currently ignored

327

* @pages: array that receives pointer to the page pinned.

327

* @pages: array that receives pointer to the page pinned.

328

* Must be at least nr_segs long.

328

* Must be at least nr_segs long.

329

*

329

*

330

* Returns 1 if page is pinned. If the page was not pinned, returns

330

* Returns 1 if page is pinned. If the page was not pinned, returns

331

* -errno. The page returned must be released with a put_page() call

331

* -errno. The page returned must be released with a put_page() call

332

* when it is finished with.

332

* when it is finished with.

333

*/

333

*/

334

int get_kernel_page(unsigned long start, int write, struct page **pages)

334

int get_kernel_page(unsigned long start, int write, struct page **pages)

335

{

335

{

336

const struct kvec kiov = {

336

const struct kvec kiov = {

337

.iov_base = (void *)start,

337

.iov_base = (void *)start,

338

.iov_len = PAGE_SIZE

338

.iov_len = PAGE_SIZE

339

};

339

};

340

341

return get_kernel_pages(&kiov, 1, write, pages);

341

return get_kernel_pages(&kiov, 1, write, pages);

342

}

342

}

343

EXPORT_SYMBOL_GPL(get_kernel_page);

343

EXPORT_SYMBOL_GPL(get_kernel_page);

344

345

static void pagevec_lru_move_fn(struct pagevec *pvec,

345

static void pagevec_lru_move_fn(struct pagevec *pvec,

346

void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),

346

void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),

347

void *arg)

347

void *arg)

348

{

348

{

349

int i;

349

int i;

350

struct zone *zone = NULL;

350

struct zone *zone = NULL;

351

struct lruvec *lruvec;

351

struct lruvec *lruvec;

352

unsigned long flags = 0;

352

unsigned long flags = 0;

353

354

for (i = 0; i < pagevec_count(pvec); i++) {

354

for (i = 0; i < pagevec_count(pvec); i++) {

355

struct page *page = pvec->pages[i];

355

struct page *page = pvec->pages[i];

356

struct zone *pagezone = page_zone(page);

356

struct zone *pagezone = page_zone(page);

357

358

if (pagezone != zone) {

358

if (pagezone != zone) {

359

if (zone)

359

if (zone)

360

spin_unlock_irqrestore(&zone->lru_lock, flags);

360

spin_unlock_irqrestore(&zone->lru_lock, flags);

361

zone = pagezone;

361

zone = pagezone;

362

spin_lock_irqsave(&zone->lru_lock, flags);

362

spin_lock_irqsave(&zone->lru_lock, flags);

363

}

363

}

364

365

lruvec = mem_cgroup_page_lruvec(page, zone);

365

lruvec = mem_cgroup_page_lruvec(page, zone);

366

(*move_fn)(page, lruvec, arg);

366

(*move_fn)(page, lruvec, arg);

367

}

367

}

368

if (zone)

368

if (zone)

369

spin_unlock_irqrestore(&zone->lru_lock, flags);

369

spin_unlock_irqrestore(&zone->lru_lock, flags);

370

release_pages(pvec->pages, pvec->nr, pvec->cold);

370

release_pages(pvec->pages, pvec->nr, pvec->cold);

371

pagevec_reinit(pvec);

371

pagevec_reinit(pvec);

372

}

372

}

373

374

static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,

374

static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,

375

void *arg)

375

void *arg)

376

{

376

{

377

int *pgmoved = arg;

377

int *pgmoved = arg;

378

379

if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {

379

if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {

380

enum lru_list lru = page_lru_base_type(page);

380

enum lru_list lru = page_lru_base_type(page);

381

list_move_tail(&page->lru, &lruvec->lists[lru]);

381

list_move_tail(&page->lru, &lruvec->lists[lru]);

382

(*pgmoved)++;

382

(*pgmoved)++;

383

}

383

}

384

}

384

}

385

386

/*

386

/*

387

* pagevec_move_tail() must be called with IRQ disabled.

387

* pagevec_move_tail() must be called with IRQ disabled.

388

* Otherwise this may cause nasty races.

388

* Otherwise this may cause nasty races.

389

*/

389

*/

390

static void pagevec_move_tail(struct pagevec *pvec)

390

static void pagevec_move_tail(struct pagevec *pvec)

391

{

391

{

392

int pgmoved = 0;

392

int pgmoved = 0;

393

394

pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);

394

pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);

395

__count_vm_events(PGROTATED, pgmoved);

395

__count_vm_events(PGROTATED, pgmoved);

396

}

396

}

397

398

/*

398

/*

399

* Writeback is about to end against a page which has been marked for immediate

399

* Writeback is about to end against a page which has been marked for immediate

400

* reclaim. If it still appears to be reclaimable, move it to the tail of the

400

* reclaim. If it still appears to be reclaimable, move it to the tail of the

401

* inactive list.

401

* inactive list.

402

*/

402

*/

403

void rotate_reclaimable_page(struct page *page)

403

void rotate_reclaimable_page(struct page *page)

404

{

404

{

405

if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&

405

if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&

406

!PageUnevictable(page) && PageLRU(page)) {

406

!PageUnevictable(page) && PageLRU(page)) {

407

struct pagevec *pvec;

407

struct pagevec *pvec;

408

unsigned long flags;

408

unsigned long flags;

409

410

page_cache_get(page);

410

page_cache_get(page);

411

local_irq_save(flags);

411

local_irq_save(flags);

412

pvec = &__get_cpu_var(lru_rotate_pvecs);

412

pvec = &__get_cpu_var(lru_rotate_pvecs);

413

if (!pagevec_add(pvec, page))

413

if (!pagevec_add(pvec, page))

414

pagevec_move_tail(pvec);

414

pagevec_move_tail(pvec);

415

local_irq_restore(flags);

415

local_irq_restore(flags);

416

}

416

}

417

}

417

}

418

419

static void update_page_reclaim_stat(struct lruvec *lruvec,

419

static void update_page_reclaim_stat(struct lruvec *lruvec,

420

int file, int rotated)

420

int file, int rotated)

421

{

421

{

422

struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;

422

struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;

423

424

reclaim_stat->recent_scanned[file]++;

424

reclaim_stat->recent_scanned[file]++;

425

if (rotated)

425

if (rotated)

426

reclaim_stat->recent_rotated[file]++;

426

reclaim_stat->recent_rotated[file]++;

427

}

427

}

428

429

static void __activate_page(struct page *page, struct lruvec *lruvec,

429

static void __activate_page(struct page *page, struct lruvec *lruvec,

430

void *arg)

430

void *arg)

431

{

431

{

432

if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {

432

if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {

433

int file = page_is_file_cache(page);

433

int file = page_is_file_cache(page);

434

int lru = page_lru_base_type(page);

434

int lru = page_lru_base_type(page);

435

436

del_page_from_lru_list(page, lruvec, lru);

436

del_page_from_lru_list(page, lruvec, lru);

437

SetPageActive(page);

437

SetPageActive(page);

438

lru += LRU_ACTIVE;

438

lru += LRU_ACTIVE;

439

add_page_to_lru_list(page, lruvec, lru);

439

add_page_to_lru_list(page, lruvec, lru);

440

trace_mm_lru_activate(page, page_to_pfn(page));

440

trace_mm_lru_activate(page, page_to_pfn(page));

441

442

__count_vm_event(PGACTIVATE);

442

__count_vm_event(PGACTIVATE);

443

update_page_reclaim_stat(lruvec, file, 1);

443

update_page_reclaim_stat(lruvec, file, 1);

444

}

444

}

445

}

445

}

446

447

#ifdef CONFIG_SMP

447

#ifdef CONFIG_SMP

448

static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);

448

static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);

449

450

static void activate_page_drain(int cpu)

450

static void activate_page_drain(int cpu)

451

{

451

{

452

struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);

452

struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);

453

454

if (pagevec_count(pvec))

454

if (pagevec_count(pvec))

455

pagevec_lru_move_fn(pvec, __activate_page, NULL);

455

pagevec_lru_move_fn(pvec, __activate_page, NULL);

456

}

456

}

457

458

static bool need_activate_page_drain(int cpu)

458

static bool need_activate_page_drain(int cpu)

459

{

459

{

460

return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;

460

return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;

461

}

461

}

462

463

void activate_page(struct page *page)

463

void activate_page(struct page *page)

464

{

464

{

465

if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {

465

if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {

466

struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);

466

struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);

467

468

page_cache_get(page);

468

page_cache_get(page);

469

if (!pagevec_add(pvec, page))

469

if (!pagevec_add(pvec, page))

470

pagevec_lru_move_fn(pvec, __activate_page, NULL);

470

pagevec_lru_move_fn(pvec, __activate_page, NULL);

471

put_cpu_var(activate_page_pvecs);

471

put_cpu_var(activate_page_pvecs);

472

}

472

}

473

}

473

}

474

475

#else

475

#else

476

static inline void activate_page_drain(int cpu)

476

static inline void activate_page_drain(int cpu)

477

{

477

{

478

}

478

}

479

480

static bool need_activate_page_drain(int cpu)

480

static bool need_activate_page_drain(int cpu)

481

{

481

{

482

return false;

482

return false;

483

}

483

}

484

485

void activate_page(struct page *page)

485

void activate_page(struct page *page)

486

{

486

{

487

struct zone *zone = page_zone(page);

487

struct zone *zone = page_zone(page);

488

489

spin_lock_irq(&zone->lru_lock);

489

spin_lock_irq(&zone->lru_lock);

490

__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);

490

__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);

491

spin_unlock_irq(&zone->lru_lock);

491

spin_unlock_irq(&zone->lru_lock);

492

}

492

}

493

#endif

493

#endif

494

495

static void __lru_cache_activate_page(struct page *page)

495

static void __lru_cache_activate_page(struct page *page)

496

{

496

{

497

struct pagevec *pvec = &get_cpu_var(lru_add_pvec);

497

struct pagevec *pvec = &get_cpu_var(lru_add_pvec);

498

int i;

498

int i;

499

500

/*

500

/*

501

* Search backwards on the optimistic assumption that the page being

501

* Search backwards on the optimistic assumption that the page being

502

* activated has just been added to this pagevec. Note that only

502

* activated has just been added to this pagevec. Note that only

503

* the local pagevec is examined as a !PageLRU page could be in the

503

* the local pagevec is examined as a !PageLRU page could be in the

504

* process of being released, reclaimed, migrated or on a remote

504

* process of being released, reclaimed, migrated or on a remote

505

* pagevec that is currently being drained. Furthermore, marking

505

* pagevec that is currently being drained. Furthermore, marking

506

* a remote pagevec's page PageActive potentially hits a race where

506

* a remote pagevec's page PageActive potentially hits a race where

507

* a page is marked PageActive just after it is added to the inactive

507

* a page is marked PageActive just after it is added to the inactive

508

* list causing accounting errors and BUG_ON checks to trigger.

508

* list causing accounting errors and BUG_ON checks to trigger.

509

*/

509

*/

510

for (i = pagevec_count(pvec) - 1; i >= 0; i--) {

510

for (i = pagevec_count(pvec) - 1; i >= 0; i--) {

511

struct page *pagevec_page = pvec->pages[i];

511

struct page *pagevec_page = pvec->pages[i];

512

513

if (pagevec_page == page) {

513

if (pagevec_page == page) {

514

SetPageActive(page);

514

SetPageActive(page);

515

break;

515

break;

516

}

516

}

517

}

517

}

518

519

put_cpu_var(lru_add_pvec);

519

put_cpu_var(lru_add_pvec);

520

}

520

}

521

522

/*

522

/*

523

* Mark a page as having seen activity.

523

* Mark a page as having seen activity.

524

*

524

*

525

* inactive,unreferenced -> inactive,referenced

525

* inactive,unreferenced -> inactive,referenced

526

* inactive,referenced -> active,unreferenced

526

* inactive,referenced -> active,unreferenced

527

* active,unreferenced -> active,referenced

527

* active,unreferenced -> active,referenced

528

*/

528

*/

529

void mark_page_accessed(struct page *page)

529

void mark_page_accessed(struct page *page)

530

{

530

{

531

if (!PageActive(page) && !PageUnevictable(page) &&

531

if (!PageActive(page) && !PageUnevictable(page) &&

532

PageReferenced(page)) {

532

PageReferenced(page)) {

533

534

/*

534

/*

535

* If the page is on the LRU, queue it for activation via

535

* If the page is on the LRU, queue it for activation via

536

* activate_page_pvecs. Otherwise, assume the page is on a

536

* activate_page_pvecs. Otherwise, assume the page is on a

537

* pagevec, mark it active and it'll be moved to the active

537

* pagevec, mark it active and it'll be moved to the active

538

* LRU on the next drain.

538

* LRU on the next drain.

539

*/

539

*/

540

if (PageLRU(page))

540

if (PageLRU(page))

541

activate_page(page);

541

activate_page(page);

542

else

542

else

543

__lru_cache_activate_page(page);

543

__lru_cache_activate_page(page);

544

ClearPageReferenced(page);

544

ClearPageReferenced(page);

545

} else if (!PageReferenced(page)) {

545

} else if (!PageReferenced(page)) {

546

SetPageReferenced(page);

546

SetPageReferenced(page);

547

}

547

}

548

}

548

}

549

EXPORT_SYMBOL(mark_page_accessed);

549

EXPORT_SYMBOL(mark_page_accessed);

550

551

static void __lru_cache_add(struct page *page)

551

static void __lru_cache_add(struct page *page)

552

{

552

{

553

struct pagevec *pvec = &get_cpu_var(lru_add_pvec);

553

struct pagevec *pvec = &get_cpu_var(lru_add_pvec);

554

555

page_cache_get(page);

555

page_cache_get(page);

556

if (!pagevec_space(pvec))

556

if (!pagevec_space(pvec))

557

__pagevec_lru_add(pvec);

557

__pagevec_lru_add(pvec);

558

pagevec_add(pvec, page);

558

pagevec_add(pvec, page);

559

put_cpu_var(lru_add_pvec);

559

put_cpu_var(lru_add_pvec);

560

}

560

}

561

562

/**

562

/**

563

* lru_cache_add: add a page to the page lists

563

* lru_cache_add: add a page to the page lists

564

* @page: the page to add

564

* @page: the page to add

565

*/

565

*/

566

void lru_cache_add_anon(struct page *page)

566

void lru_cache_add_anon(struct page *page)

567

{

567

{

568

ClearPageActive(page);

568

if (PageActive(page))

569

ClearPageActive(page);

569

__lru_cache_add(page);

570

__lru_cache_add(page);

570

}

571

}

571

572

void lru_cache_add_file(struct page *page)

573

void lru_cache_add_file(struct page *page)

573

{

574

{

574

ClearPageActive(page);

575

if (PageActive(page))

576

ClearPageActive(page);

575

__lru_cache_add(page);

577

__lru_cache_add(page);

576

}

578

}

577

EXPORT_SYMBOL(lru_cache_add_file);

579

EXPORT_SYMBOL(lru_cache_add_file);

578

580

579

/**

581

/**

580

* lru_cache_add - add a page to a page list

582

* lru_cache_add - add a page to a page list

581

* @page: the page to be added to the LRU.

583

* @page: the page to be added to the LRU.

582

*

584

*

583

* Queue the page for addition to the LRU via pagevec. The decision on whether

585

* Queue the page for addition to the LRU via pagevec. The decision on whether

584

* to add the page to the [in]active [file|anon] list is deferred until the

586

* to add the page to the [in]active [file|anon] list is deferred until the

585

* pagevec is drained. This gives a chance for the caller of lru_cache_add()

587

* pagevec is drained. This gives a chance for the caller of lru_cache_add()

586

* have the page added to the active list using mark_page_accessed().

588

* have the page added to the active list using mark_page_accessed().

587

*/

589

*/

588

void lru_cache_add(struct page *page)

590

void lru_cache_add(struct page *page)

589

{

591

{

590

VM_BUG_ON(PageActive(page) && PageUnevictable(page));

592

VM_BUG_ON(PageActive(page) && PageUnevictable(page));

591

VM_BUG_ON(PageLRU(page));

593

VM_BUG_ON(PageLRU(page));

592

__lru_cache_add(page);

594

__lru_cache_add(page);

593

}

595

}

594

596

595

/**

597

/**

596

* add_page_to_unevictable_list - add a page to the unevictable list

598

* add_page_to_unevictable_list - add a page to the unevictable list

597

* @page: the page to be added to the unevictable list

599

* @page: the page to be added to the unevictable list

598

*

600

*

599

* Add page directly to its zone's unevictable list. To avoid races with

601

* Add page directly to its zone's unevictable list. To avoid races with

600

* tasks that might be making the page evictable, through eg. munlock,

602

* tasks that might be making the page evictable, through eg. munlock,

601

* munmap or exit, while it's not on the lru, we want to add the page

603

* munmap or exit, while it's not on the lru, we want to add the page

602

* while it's locked or otherwise "invisible" to other tasks. This is

604

* while it's locked or otherwise "invisible" to other tasks. This is

603

* difficult to do when using the pagevec cache, so bypass that.

605

* difficult to do when using the pagevec cache, so bypass that.

604

*/

606

*/

605

void add_page_to_unevictable_list(struct page *page)

607

void add_page_to_unevictable_list(struct page *page)

606

{

608

{

607

struct zone *zone = page_zone(page);

609

struct zone *zone = page_zone(page);

608

struct lruvec *lruvec;

610

struct lruvec *lruvec;

609

611

610

spin_lock_irq(&zone->lru_lock);

612

spin_lock_irq(&zone->lru_lock);

611

lruvec = mem_cgroup_page_lruvec(page, zone);

613

lruvec = mem_cgroup_page_lruvec(page, zone);

612

ClearPageActive(page);

614

ClearPageActive(page);

613

SetPageUnevictable(page);

615

SetPageUnevictable(page);

614

SetPageLRU(page);

616

SetPageLRU(page);

615

add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);

617

add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);

616

spin_unlock_irq(&zone->lru_lock);

618

spin_unlock_irq(&zone->lru_lock);

617

}

619

}

618

620

619

/*

621

/*

620

* If the page can not be invalidated, it is moved to the

622

* If the page can not be invalidated, it is moved to the

621

* inactive list to speed up its reclaim. It is moved to the

623

* inactive list to speed up its reclaim. It is moved to the

622

* head of the list, rather than the tail, to give the flusher

624

* head of the list, rather than the tail, to give the flusher

623

* threads some time to write it out, as this is much more

625

* threads some time to write it out, as this is much more

624

* effective than the single-page writeout from reclaim.

626

* effective than the single-page writeout from reclaim.

625

*

627

*

626

* If the page isn't page_mapped and dirty/writeback, the page

628

* If the page isn't page_mapped and dirty/writeback, the page

627

* could reclaim asap using PG_reclaim.

629

* could reclaim asap using PG_reclaim.

628

*

630

*

629

* 1. active, mapped page -> none

631

* 1. active, mapped page -> none

630

* 2. active, dirty/writeback page -> inactive, head, PG_reclaim

632

* 2. active, dirty/writeback page -> inactive, head, PG_reclaim

631

* 3. inactive, mapped page -> none

633

* 3. inactive, mapped page -> none

632

* 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim

634

* 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim

633

* 5. inactive, clean -> inactive, tail

635

* 5. inactive, clean -> inactive, tail

634

* 6. Others -> none

636

* 6. Others -> none

635

*

637

*

636

* In 4, why it moves inactive's head, the VM expects the page would

638

* In 4, why it moves inactive's head, the VM expects the page would

637

* be write it out by flusher threads as this is much more effective

639

* be write it out by flusher threads as this is much more effective

638

* than the single-page writeout from reclaim.

640

* than the single-page writeout from reclaim.

639

*/

641

*/

640

static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,

642

static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,

641

void *arg)

643

void *arg)

642

{

644

{

643

int lru, file;

645

int lru, file;

644

bool active;

646

bool active;

645

647

646

if (!PageLRU(page))

648

if (!PageLRU(page))

647

return;

649

return;

648

650

649

if (PageUnevictable(page))

651

if (PageUnevictable(page))

650

return;

652

return;

651

653

652

/* Some processes are using the page */

654

/* Some processes are using the page */

653

if (page_mapped(page))

655

if (page_mapped(page))

654

return;

656

return;

655

657

656

active = PageActive(page);

658

active = PageActive(page);

657

file = page_is_file_cache(page);

659

file = page_is_file_cache(page);

658

lru = page_lru_base_type(page);

660

lru = page_lru_base_type(page);

659

661

660

del_page_from_lru_list(page, lruvec, lru + active);

662

del_page_from_lru_list(page, lruvec, lru + active);

661

ClearPageActive(page);

663

ClearPageActive(page);

662

ClearPageReferenced(page);

664

ClearPageReferenced(page);

663

add_page_to_lru_list(page, lruvec, lru);

665

add_page_to_lru_list(page, lruvec, lru);

664

666

665

if (PageWriteback(page) || PageDirty(page)) {

667

if (PageWriteback(page) || PageDirty(page)) {

666

/*

668

/*

667

* PG_reclaim could be raced with end_page_writeback

669

* PG_reclaim could be raced with end_page_writeback

668

* It can make readahead confusing. But race window

670

* It can make readahead confusing. But race window

669

* is _really_ small and it's non-critical problem.

671

* is _really_ small and it's non-critical problem.

670

*/

672

*/

671

SetPageReclaim(page);

673

SetPageReclaim(page);

672

} else {

674

} else {

673

/*

675

/*

674

* The page's writeback ends up during pagevec

676

* The page's writeback ends up during pagevec

675

* We moves tha page into tail of inactive.

677

* We moves tha page into tail of inactive.

676

*/

678

*/

677

list_move_tail(&page->lru, &lruvec->lists[lru]);

679

list_move_tail(&page->lru, &lruvec->lists[lru]);

678

__count_vm_event(PGROTATED);

680

__count_vm_event(PGROTATED);

679

}

681

}

680

682

681

if (active)

683

if (active)

682

__count_vm_event(PGDEACTIVATE);

684

__count_vm_event(PGDEACTIVATE);

683

update_page_reclaim_stat(lruvec, file, 0);

685

update_page_reclaim_stat(lruvec, file, 0);

684

}

686

}

685

687

686

/*

688

/*

687

* Drain pages out of the cpu's pagevecs.

689

* Drain pages out of the cpu's pagevecs.

688

* Either "cpu" is the current CPU, and preemption has already been

690

* Either "cpu" is the current CPU, and preemption has already been

689

* disabled; or "cpu" is being hot-unplugged, and is already dead.

691

* disabled; or "cpu" is being hot-unplugged, and is already dead.

690

*/

692

*/

691

void lru_add_drain_cpu(int cpu)

693

void lru_add_drain_cpu(int cpu)

692

{

694

{

693

struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);

695

struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);

694

696

695

if (pagevec_count(pvec))

697

if (pagevec_count(pvec))

696

__pagevec_lru_add(pvec);

698

__pagevec_lru_add(pvec);

697

699

698

pvec = &per_cpu(lru_rotate_pvecs, cpu);

700

pvec = &per_cpu(lru_rotate_pvecs, cpu);

699

if (pagevec_count(pvec)) {

701

if (pagevec_count(pvec)) {

700

unsigned long flags;

702

unsigned long flags;

701

703

702

/* No harm done if a racing interrupt already did this */

704

/* No harm done if a racing interrupt already did this */

703

local_irq_save(flags);

705

local_irq_save(flags);

704

pagevec_move_tail(pvec);

706

pagevec_move_tail(pvec);

705

local_irq_restore(flags);

707

local_irq_restore(flags);

706

}

708

}

707

709

708

pvec = &per_cpu(lru_deactivate_pvecs, cpu);

710

pvec = &per_cpu(lru_deactivate_pvecs, cpu);

709

if (pagevec_count(pvec))

711

if (pagevec_count(pvec))

710

pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);

712

pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);

711

713

712

activate_page_drain(cpu);

714

activate_page_drain(cpu);

713

}

715

}

714

716

715

/**

717

/**

716

* deactivate_page - forcefully deactivate a page

718

* deactivate_page - forcefully deactivate a page

717

* @page: page to deactivate

719

* @page: page to deactivate

718

*

720

*

719

* This function hints the VM that @page is a good reclaim candidate,

721

* This function hints the VM that @page is a good reclaim candidate,

720

* for example if its invalidation fails due to the page being dirty

722

* for example if its invalidation fails due to the page being dirty

721

* or under writeback.

723

* or under writeback.

722

*/

724

*/

723

void deactivate_page(struct page *page)

725

void deactivate_page(struct page *page)

724

{

726

{

725

/*

727

/*

726

* In a workload with many unevictable page such as mprotect, unevictable

728

* In a workload with many unevictable page such as mprotect, unevictable

727

* page deactivation for accelerating reclaim is pointless.

729

* page deactivation for accelerating reclaim is pointless.

728

*/

730

*/

729

if (PageUnevictable(page))

731

if (PageUnevictable(page))

730

return;

732

return;

731

733

732

if (likely(get_page_unless_zero(page))) {

734

if (likely(get_page_unless_zero(page))) {

733

struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);

735

struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);

734

736

735

if (!pagevec_add(pvec, page))

737

if (!pagevec_add(pvec, page))

736

pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);

738

pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);

737

put_cpu_var(lru_deactivate_pvecs);

739

put_cpu_var(lru_deactivate_pvecs);

738

}

740

}

739

}

741

}

740

742

741

void lru_add_drain(void)

743

void lru_add_drain(void)

742

{

744

{

743

lru_add_drain_cpu(get_cpu());

745

lru_add_drain_cpu(get_cpu());

744

put_cpu();

746

put_cpu();

745

}

747

}

746

748

747

static void lru_add_drain_per_cpu(struct work_struct *dummy)

749

static void lru_add_drain_per_cpu(struct work_struct *dummy)

748

{

750

{

749

lru_add_drain();

751

lru_add_drain();

750

}

752

}

751

753

752

static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);

754

static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);

753

755

754

void lru_add_drain_all(void)

756

void lru_add_drain_all(void)

755

{

757

{

756

static DEFINE_MUTEX(lock);

758

static DEFINE_MUTEX(lock);

757

static struct cpumask has_work;

759

static struct cpumask has_work;

758

int cpu;

760

int cpu;

759

761

760

mutex_lock(&lock);

762

mutex_lock(&lock);

761

get_online_cpus();

763

get_online_cpus();

762

cpumask_clear(&has_work);

764

cpumask_clear(&has_work);

763

765

764

for_each_online_cpu(cpu) {

766

for_each_online_cpu(cpu) {

765

struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);

767

struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);

766

768

767

if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||

769

if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||

768

pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||

770

pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||

769

pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||

771

pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||

770

need_activate_page_drain(cpu)) {

772

need_activate_page_drain(cpu)) {

771

INIT_WORK(work, lru_add_drain_per_cpu);

773

INIT_WORK(work, lru_add_drain_per_cpu);

772

schedule_work_on(cpu, work);

774

schedule_work_on(cpu, work);

773

cpumask_set_cpu(cpu, &has_work);

775

cpumask_set_cpu(cpu, &has_work);

774

}

776

}

775

}

777

}

776

778

777

for_each_cpu(cpu, &has_work)

779

for_each_cpu(cpu, &has_work)

778

flush_work(&per_cpu(lru_add_drain_work, cpu));

780

flush_work(&per_cpu(lru_add_drain_work, cpu));

779

781

780

put_online_cpus();

782

put_online_cpus();

781

mutex_unlock(&lock);

783

mutex_unlock(&lock);

782

}

784

}

783

785

784

/*

786

/*

785

* Batched page_cache_release(). Decrement the reference count on all the

787

* Batched page_cache_release(). Decrement the reference count on all the

786

* passed pages. If it fell to zero then remove the page from the LRU and

788

* passed pages. If it fell to zero then remove the page from the LRU and

787

* free it.

789

* free it.

788

*

790

*

789

* Avoid taking zone->lru_lock if possible, but if it is taken, retain it

791

* Avoid taking zone->lru_lock if possible, but if it is taken, retain it

790

* for the remainder of the operation.

792

* for the remainder of the operation.

791

*

793

*

792

* The locking in this function is against shrink_inactive_list(): we recheck

794

* The locking in this function is against shrink_inactive_list(): we recheck

793

* the page count inside the lock to see whether shrink_inactive_list()

795

* the page count inside the lock to see whether shrink_inactive_list()

794

* grabbed the page via the LRU. If it did, give up: shrink_inactive_list()

796

* grabbed the page via the LRU. If it did, give up: shrink_inactive_list()

795

* will free it.

797

* will free it.

796

*/

798

*/

797

void release_pages(struct page **pages, int nr, bool cold)

799

void release_pages(struct page **pages, int nr, bool cold)

798

{

800

{

799

int i;

801

int i;

800

LIST_HEAD(pages_to_free);

802

LIST_HEAD(pages_to_free);

801

struct zone *zone = NULL;

803

struct zone *zone = NULL;

802

struct lruvec *lruvec;

804

struct lruvec *lruvec;

803

unsigned long uninitialized_var(flags);

805

unsigned long uninitialized_var(flags);

804

806

805

for (i = 0; i < nr; i++) {

807

for (i = 0; i < nr; i++) {

806

struct page *page = pages[i];

808

struct page *page = pages[i];

807

809

808

if (unlikely(PageCompound(page))) {

810

if (unlikely(PageCompound(page))) {

809

if (zone) {

811

if (zone) {

810

spin_unlock_irqrestore(&zone->lru_lock, flags);

812

spin_unlock_irqrestore(&zone->lru_lock, flags);

811

zone = NULL;

813

zone = NULL;

812

}

814

}

813

put_compound_page(page);

815

put_compound_page(page);

814

continue;

816

continue;

815

}

817

}

816

818

817

if (!put_page_testzero(page))

819

if (!put_page_testzero(page))

818

continue;

820

continue;

819

821

820

if (PageLRU(page)) {

822

if (PageLRU(page)) {

821

struct zone *pagezone = page_zone(page);

823

struct zone *pagezone = page_zone(page);

822

824

823

if (pagezone != zone) {

825

if (pagezone != zone) {

824

if (zone)

826

if (zone)

825

spin_unlock_irqrestore(&zone->lru_lock,

827

spin_unlock_irqrestore(&zone->lru_lock,

826

flags);

828

flags);

827

zone = pagezone;

829

zone = pagezone;

828

spin_lock_irqsave(&zone->lru_lock, flags);

830

spin_lock_irqsave(&zone->lru_lock, flags);

829

}

831

}

830

832

831

lruvec = mem_cgroup_page_lruvec(page, zone);

833

lruvec = mem_cgroup_page_lruvec(page, zone);

832

VM_BUG_ON(!PageLRU(page));

834

VM_BUG_ON(!PageLRU(page));

833

__ClearPageLRU(page);

835

__ClearPageLRU(page);

834

del_page_from_lru_list(page, lruvec, page_off_lru(page));

836

del_page_from_lru_list(page, lruvec, page_off_lru(page));

835

}

837

}

836

838

837

/* Clear Active bit in case of parallel mark_page_accessed */

839

/* Clear Active bit in case of parallel mark_page_accessed */

838

__ClearPageActive(page);

840

__ClearPageActive(page);

839

841

840

list_add(&page->lru, &pages_to_free);

842

list_add(&page->lru, &pages_to_free);

841

}

843

}

842

if (zone)

844

if (zone)

843

spin_unlock_irqrestore(&zone->lru_lock, flags);

845

spin_unlock_irqrestore(&zone->lru_lock, flags);

844

846

845

free_hot_cold_page_list(&pages_to_free, cold);

847

free_hot_cold_page_list(&pages_to_free, cold);

846

}

848

}

847

EXPORT_SYMBOL(release_pages);

849

EXPORT_SYMBOL(release_pages);

848

850

849

/*

851

/*

850

* The pages which we're about to release may be in the deferred lru-addition

852

* The pages which we're about to release may be in the deferred lru-addition

851

* queues. That would prevent them from really being freed right now. That's

853

* queues. That would prevent them from really being freed right now. That's

852

* OK from a correctness point of view but is inefficient - those pages may be

854

* OK from a correctness point of view but is inefficient - those pages may be

853

* cache-warm and we want to give them back to the page allocator ASAP.

855

* cache-warm and we want to give them back to the page allocator ASAP.

854

*

856

*

855

* So __pagevec_release() will drain those queues here. __pagevec_lru_add()

857

* So __pagevec_release() will drain those queues here. __pagevec_lru_add()

856

* and __pagevec_lru_add_active() call release_pages() directly to avoid

858

* and __pagevec_lru_add_active() call release_pages() directly to avoid

857

* mutual recursion.

859

* mutual recursion.

858

*/

860

*/

859

void __pagevec_release(struct pagevec *pvec)

861

void __pagevec_release(struct pagevec *pvec)

860

{

862

{

861

lru_add_drain();

863

lru_add_drain();

862

release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);

864

release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);

863

pagevec_reinit(pvec);

865

pagevec_reinit(pvec);

864

}

866

}

865

EXPORT_SYMBOL(__pagevec_release);

867

EXPORT_SYMBOL(__pagevec_release);

866

868

867

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

869

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

868

/* used by __split_huge_page_refcount() */

870

/* used by __split_huge_page_refcount() */

869

void lru_add_page_tail(struct page *page, struct page *page_tail,

871

void lru_add_page_tail(struct page *page, struct page *page_tail,

870

struct lruvec *lruvec, struct list_head *list)

872

struct lruvec *lruvec, struct list_head *list)

871

{

873

{

872

const int file = 0;

874

const int file = 0;

873

875

874

VM_BUG_ON(!PageHead(page));

876

VM_BUG_ON(!PageHead(page));

875

VM_BUG_ON(PageCompound(page_tail));

877

VM_BUG_ON(PageCompound(page_tail));

876

VM_BUG_ON(PageLRU(page_tail));

878

VM_BUG_ON(PageLRU(page_tail));

877

VM_BUG_ON(NR_CPUS != 1 &&

879

VM_BUG_ON(NR_CPUS != 1 &&

878

!spin_is_locked(&lruvec_zone(lruvec)->lru_lock));

880

!spin_is_locked(&lruvec_zone(lruvec)->lru_lock));

879

881

880

if (!list)

882

if (!list)

881

SetPageLRU(page_tail);

883

SetPageLRU(page_tail);

882

884

883

if (likely(PageLRU(page)))

885

if (likely(PageLRU(page)))

884

list_add_tail(&page_tail->lru, &page->lru);

886

list_add_tail(&page_tail->lru, &page->lru);

885

else if (list) {

887

else if (list) {

886

/* page reclaim is reclaiming a huge page */

888

/* page reclaim is reclaiming a huge page */

887

get_page(page_tail);

889

get_page(page_tail);

888

list_add_tail(&page_tail->lru, list);

890

list_add_tail(&page_tail->lru, list);

889

} else {

891

} else {

890

struct list_head *list_head;

892

struct list_head *list_head;

891

/*

893

/*

892

* Head page has not yet been counted, as an hpage,

894

* Head page has not yet been counted, as an hpage,

893

* so we must account for each subpage individually.

895

* so we must account for each subpage individually.

894

*

896

*

895

* Use the standard add function to put page_tail on the list,

897

* Use the standard add function to put page_tail on the list,

896

* but then correct its position so they all end up in order.

898

* but then correct its position so they all end up in order.

897

*/

899

*/

898

add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));

900

add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));

899

list_head = page_tail->lru.prev;

901

list_head = page_tail->lru.prev;

900

list_move_tail(&page_tail->lru, list_head);

902

list_move_tail(&page_tail->lru, list_head);

901

}

903

}

902

904

903

if (!PageUnevictable(page))

905

if (!PageUnevictable(page))

904

update_page_reclaim_stat(lruvec, file, PageActive(page_tail));

906

update_page_reclaim_stat(lruvec, file, PageActive(page_tail));

905

}

907

}

906

#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

908

#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

907

909

908

static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,

910

static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,

909

void *arg)

911

void *arg)

910

{

912

{

911

int file = page_is_file_cache(page);

913

int file = page_is_file_cache(page);

912

int active = PageActive(page);

914

int active = PageActive(page);

913

enum lru_list lru = page_lru(page);

915

enum lru_list lru = page_lru(page);

914

916

915

VM_BUG_ON(PageLRU(page));

917

VM_BUG_ON(PageLRU(page));

916

918

917

SetPageLRU(page);

919

SetPageLRU(page);

918

add_page_to_lru_list(page, lruvec, lru);

920

add_page_to_lru_list(page, lruvec, lru);

919

update_page_reclaim_stat(lruvec, file, active);

921

update_page_reclaim_stat(lruvec, file, active);

920

trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));

922

trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));

921

}

923

}

922

924

923

/*

925

/*

924

* Add the passed pages to the LRU, then drop the caller's refcount

926

* Add the passed pages to the LRU, then drop the caller's refcount

925

* on them. Reinitialises the caller's pagevec.

927

* on them. Reinitialises the caller's pagevec.

926

*/

928

*/

927

void __pagevec_lru_add(struct pagevec *pvec)

929

void __pagevec_lru_add(struct pagevec *pvec)

928

{

930

{

929

pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL);

931

pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL);

930

}

932

}

931

EXPORT_SYMBOL(__pagevec_lru_add);

933

EXPORT_SYMBOL(__pagevec_lru_add);

932

934

933

/**

935

/**

934

* pagevec_lookup_entries - gang pagecache lookup

936

* pagevec_lookup_entries - gang pagecache lookup

935

* @pvec: Where the resulting entries are placed

937

* @pvec: Where the resulting entries are placed

936

* @mapping: The address_space to search

938

* @mapping: The address_space to search

937

* @start: The starting entry index

939

* @start: The starting entry index

938

* @nr_entries: The maximum number of entries

940

* @nr_entries: The maximum number of entries

939

* @indices: The cache indices corresponding to the entries in @pvec

941

* @indices: The cache indices corresponding to the entries in @pvec

940

*

942

*

941

* pagevec_lookup_entries() will search for and return a group of up

943

* pagevec_lookup_entries() will search for and return a group of up

942

* to @nr_entries pages and shadow entries in the mapping. All

944

* to @nr_entries pages and shadow entries in the mapping. All

943

* entries are placed in @pvec. pagevec_lookup_entries() takes a

945

* entries are placed in @pvec. pagevec_lookup_entries() takes a

944

* reference against actual pages in @pvec.

946

* reference against actual pages in @pvec.

945

*

947

*

946

* The search returns a group of mapping-contiguous entries with

948

* The search returns a group of mapping-contiguous entries with

947

* ascending indexes. There may be holes in the indices due to

949

* ascending indexes. There may be holes in the indices due to

948

* not-present entries.

950

* not-present entries.

949

*

951

*

950

* pagevec_lookup_entries() returns the number of entries which were

952

* pagevec_lookup_entries() returns the number of entries which were

951

* found.

953

* found.

952

*/

954

*/

953

unsigned pagevec_lookup_entries(struct pagevec *pvec,

955

unsigned pagevec_lookup_entries(struct pagevec *pvec,

954

struct address_space *mapping,

956

struct address_space *mapping,

955

pgoff_t start, unsigned nr_pages,

957

pgoff_t start, unsigned nr_pages,

956

pgoff_t *indices)

958

pgoff_t *indices)

957

{

959

{

958

pvec->nr = find_get_entries(mapping, start, nr_pages,

960

pvec->nr = find_get_entries(mapping, start, nr_pages,

959

pvec->pages, indices);

961

pvec->pages, indices);

960

return pagevec_count(pvec);

962

return pagevec_count(pvec);

961

}

963

}

962

964

963

/**

965

/**

964

* pagevec_remove_exceptionals - pagevec exceptionals pruning

966

* pagevec_remove_exceptionals - pagevec exceptionals pruning

965

* @pvec: The pagevec to prune

967

* @pvec: The pagevec to prune

966

*

968

*

967

* pagevec_lookup_entries() fills both pages and exceptional radix

969

* pagevec_lookup_entries() fills both pages and exceptional radix

968

* tree entries into the pagevec. This function prunes all

970

* tree entries into the pagevec. This function prunes all

969

* exceptionals from @pvec without leaving holes, so that it can be

971

* exceptionals from @pvec without leaving holes, so that it can be

970

* passed on to page-only pagevec operations.

972

* passed on to page-only pagevec operations.

971

*/

973

*/

972

void pagevec_remove_exceptionals(struct pagevec *pvec)

974

void pagevec_remove_exceptionals(struct pagevec *pvec)

973

{

975

{

974

int i, j;

976

int i, j;

975

977

976

for (i = 0, j = 0; i < pagevec_count(pvec); i++) {

978

for (i = 0, j = 0; i < pagevec_count(pvec); i++) {

977

struct page *page = pvec->pages[i];

979

struct page *page = pvec->pages[i];

978

if (!radix_tree_exceptional_entry(page))

980

if (!radix_tree_exceptional_entry(page))

979

pvec->pages[j++] = page;

981

pvec->pages[j++] = page;

980

}

982

}

981

pvec->nr = j;

983

pvec->nr = j;

982

}

984

}

983

985

984

/**

986

/**

985

* pagevec_lookup - gang pagecache lookup

987

* pagevec_lookup - gang pagecache lookup

986

* @pvec: Where the resulting pages are placed

988

* @pvec: Where the resulting pages are placed

987

* @mapping: The address_space to search

989

* @mapping: The address_space to search

988

* @start: The starting page index

990

* @start: The starting page index

989

* @nr_pages: The maximum number of pages

991

* @nr_pages: The maximum number of pages

990

*

992

*

991

* pagevec_lookup() will search for and return a group of up to @nr_pages pages

993

* pagevec_lookup() will search for and return a group of up to @nr_pages pages

992

* in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a

994

* in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a

993

* reference against the pages in @pvec.

995

* reference against the pages in @pvec.

994

*

996

*

995

* The search returns a group of mapping-contiguous pages with ascending

997

* The search returns a group of mapping-contiguous pages with ascending

996

* indexes. There may be holes in the indices due to not-present pages.

998

* indexes. There may be holes in the indices due to not-present pages.

997

*

999

*

998

* pagevec_lookup() returns the number of pages which were found.

1000

* pagevec_lookup() returns the number of pages which were found.

999

*/

1001

*/

1000

unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,

1002

unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,

1001

pgoff_t start, unsigned nr_pages)

1003

pgoff_t start, unsigned nr_pages)

1002

{

1004

{

1003

pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);

1005

pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);

1004

return pagevec_count(pvec);

1006

return pagevec_count(pvec);

1005

}

1007

}

1006

EXPORT_SYMBOL(pagevec_lookup);

1008

EXPORT_SYMBOL(pagevec_lookup);

1007

1009

1008

unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,

1010

unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,

1009

pgoff_t *index, int tag, unsigned nr_pages)

1011

pgoff_t *index, int tag, unsigned nr_pages)

1010

{

1012

{

1011

pvec->nr = find_get_pages_tag(mapping, index, tag,

1013

pvec->nr = find_get_pages_tag(mapping, index, tag,

1012

nr_pages, pvec->pages);

1014

nr_pages, pvec->pages);

1013

return pagevec_count(pvec);

1015

return pagevec_count(pvec);

1014

}

1016

}

1015

EXPORT_SYMBOL(pagevec_lookup_tag);

1017

EXPORT_SYMBOL(pagevec_lookup_tag);

1016

1018

1017

/*

1019

/*

1018

* Perform any setup for the swap system

1020

* Perform any setup for the swap system

1019

*/

1021

*/

1020

void __init swap_setup(void)

1022

void __init swap_setup(void)

1021

{

1023

{

1022

unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);

1024

unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);

1023

#ifdef CONFIG_SWAP

1025

#ifdef CONFIG_SWAP

1024

int i;

1026

int i;

1025

1027

1026

bdi_init(swapper_spaces[0].backing_dev_info);

1028

bdi_init(swapper_spaces[0].backing_dev_info);

1027

for (i = 0; i < MAX_SWAPFILES; i++) {

1029

for (i = 0; i < MAX_SWAPFILES; i++) {

1028

spin_lock_init(&swapper_spaces[i].tree_lock);

1030

spin_lock_init(&swapper_spaces[i].tree_lock);

1029

INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);

1031

INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);

1030

}

1032

}

1031

#endif

1033

#endif

1032

1034

1033

/* Use a smaller cluster for small-memory machines */

1035

/* Use a smaller cluster for small-memory machines */

1034

if (megs < 16)

1036

if (megs < 16)

1035

page_cluster = 2;

1037

page_cluster = 2;

1036

else

1038

else

1037

page_cluster = 3;

1039

page_cluster = 3;

1038

/*

1040

/*

1039

* Right now other parts of the system means that we

1041

* Right now other parts of the system means that we

1040

* _really_ don't want to cluster much more

1042

* _really_ don't want to cluster much more

1041

*/

1043

*/

1042

}

1044

}

1043

1045

GITLAB

mm: do not use unnecessary atomic operations when adding pages to the LRU

 /*
  *  linux/mm/swap.c
  *
  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  */
 /*
  * This file contains the default values for the operation of the
  * Linux VM subsystem. Fine-tuning documentation can be found in
  * Documentation/sysctl/vm.txt.
  * Started 18.12.91
  * Swap aging added 23.2.95, Stephen Tweedie.
  * Buffermem limits added 12.3.98, Rik van Riel.
  */
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/mm_inline.h>
 #include <linux/percpu_counter.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/backing-dev.h>
 #include <linux/memcontrol.h>
 #include <linux/gfp.h>
 #include <linux/uio.h>
 #include <linux/hugetlb.h>
 #include "internal.h"
 #define CREATE_TRACE_POINTS
 #include <trace/events/pagemap.h>
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
 /*
  * This path almost never happens for VM activity - pages are normally
  * freed via pagevecs.  But it gets used by networking.
  */
 static void __page_cache_release(struct page *page)
 {
 	if (PageLRU(page)) {
 		struct zone *zone = page_zone(page);
 		struct lruvec *lruvec;
 		unsigned long flags;
 		spin_lock_irqsave(&zone->lru_lock, flags);
 		lruvec = mem_cgroup_page_lruvec(page, zone);
 		VM_BUG_ON(!PageLRU(page));
 		__ClearPageLRU(page);
 		del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
 }
 static void __put_single_page(struct page *page)
 {
 	__page_cache_release(page);
 	free_hot_cold_page(page, false);
 }
 static void __put_compound_page(struct page *page)
 {
 	compound_page_dtor *dtor;
 	__page_cache_release(page);
 	dtor = get_compound_page_dtor(page);
 	(*dtor)(page);
 }
 static void put_compound_page(struct page *page)
 {
 	if (unlikely(PageTail(page))) {
 		/* __split_huge_page_refcount can run under us */
 		struct page *page_head = compound_head(page);
 		if (likely(page != page_head &&
 			   get_page_unless_zero(page_head))) {
 			unsigned long flags;
 			/*
 			 * THP can not break up slab pages so avoid taking
 			 * compound_lock().  Slab performs non-atomic bit ops
 			 * on page->flags for better performance.  In particular
 			 * slab_unlock() in slub used to be a hot path.  It is
 			 * still hot on arches that do not support
 			 * this_cpu_cmpxchg_double().
 			 */
 			if (PageSlab(page_head) || PageHeadHuge(page_head)) {
 				if (likely(PageTail(page))) {
 					/*
 					 * __split_huge_page_refcount
 					 * cannot race here.
 					 */
 					VM_BUG_ON(!PageHead(page_head));
 					atomic_dec(&page->_mapcount);
 					if (put_page_testzero(page_head))
 						VM_BUG_ON(1);
 					if (put_page_testzero(page_head))
 						__put_compound_page(page_head);
 					return;
 				} else
 					/*
 					 * __split_huge_page_refcount
 					 * run before us, "page" was a
 					 * THP tail. The split
 					 * page_head has been freed
 					 * and reallocated as slab or
 					 * hugetlbfs page of smaller
 					 * order (only possible if
 					 * reallocated as slab on
 					 * x86).
 					 */
 					goto skip_lock;
 			}
 			/*
 			 * page_head wasn't a dangling pointer but it
 			 * may not be a head page anymore by the time
 			 * we obtain the lock. That is ok as long as it
 			 * can't be freed from under us.
 			 */
 			flags = compound_lock_irqsave(page_head);
 			if (unlikely(!PageTail(page))) {
 				/* __split_huge_page_refcount run before us */
 				compound_unlock_irqrestore(page_head, flags);
 skip_lock:
 				if (put_page_testzero(page_head)) {
 					/*
 					 * The head page may have been
 					 * freed and reallocated as a
 					 * compound page of smaller
 					 * order and then freed again.
 					 * All we know is that it
 					 * cannot have become: a THP
 					 * page, a compound page of
 					 * higher order, a tail page.
 					 * That is because we still
 					 * hold the refcount of the
 					 * split THP tail and
 					 * page_head was the THP head
 					 * before the split.
 					 */
 					if (PageHead(page_head))
 						__put_compound_page(page_head);
 					else
 						__put_single_page(page_head);
 				}
 out_put_single:
 				if (put_page_testzero(page))
 					__put_single_page(page);
 				return;
 			}
 			VM_BUG_ON(page_head != page->first_page);
 			/*
 			 * We can release the refcount taken by
 			 * get_page_unless_zero() now that
 			 * __split_huge_page_refcount() is blocked on
 			 * the compound_lock.
 			 */
 			if (put_page_testzero(page_head))
 				VM_BUG_ON(1);
 			/* __split_huge_page_refcount will wait now */
 			VM_BUG_ON(page_mapcount(page) <= 0);
 			atomic_dec(&page->_mapcount);
 			VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
 			VM_BUG_ON(atomic_read(&page->_count) != 0);
 			compound_unlock_irqrestore(page_head, flags);
 			if (put_page_testzero(page_head)) {
 				if (PageHead(page_head))
 					__put_compound_page(page_head);
 				else
 					__put_single_page(page_head);
 			}
 		} else {
 			/* page_head is a dangling pointer */
 			VM_BUG_ON(PageTail(page));
 			goto out_put_single;
 		}
 	} else if (put_page_testzero(page)) {
 		if (PageHead(page))
 			__put_compound_page(page);
 		else
 			__put_single_page(page);
 	}
 }
 void put_page(struct page *page)
 {
 	if (unlikely(PageCompound(page)))
 		put_compound_page(page);
 	else if (put_page_testzero(page))
 		__put_single_page(page);
 }
 EXPORT_SYMBOL(put_page);
 /*
  * This function is exported but must not be called by anything other
  * than get_page(). It implements the slow path of get_page().
  */
 bool __get_page_tail(struct page *page)
 {
 	/*
 	 * This takes care of get_page() if run on a tail page
 	 * returned by one of the get_user_pages/follow_page variants.
 	 * get_user_pages/follow_page itself doesn't need the compound
 	 * lock because it runs __get_page_tail_foll() under the
 	 * proper PT lock that already serializes against
 	 * split_huge_page().
 	 */
 	unsigned long flags;
 	bool got = false;
 	struct page *page_head = compound_head(page);
 	if (likely(page != page_head && get_page_unless_zero(page_head))) {
 		/* Ref to put_compound_page() comment. */
 		if (PageSlab(page_head) || PageHeadHuge(page_head)) {
 			if (likely(PageTail(page))) {
 				/*
 				 * This is a hugetlbfs page or a slab
 				 * page. __split_huge_page_refcount
 				 * cannot race here.
 				 */
 				VM_BUG_ON(!PageHead(page_head));
 				__get_page_tail_foll(page, false);
 				return true;
 			} else {
 				/*
 				 * __split_huge_page_refcount run
 				 * before us, "page" was a THP
 				 * tail. The split page_head has been
 				 * freed and reallocated as slab or
 				 * hugetlbfs page of smaller order
 				 * (only possible if reallocated as
 				 * slab on x86).
 				 */
 				put_page(page_head);
 				return false;
 			}
 		}
 		/*
 		 * page_head wasn't a dangling pointer but it
 		 * may not be a head page anymore by the time
 		 * we obtain the lock. That is ok as long as it
 		 * can't be freed from under us.
 		 */
 		flags = compound_lock_irqsave(page_head);
 		/* here __split_huge_page_refcount won't run anymore */
 		if (likely(PageTail(page))) {
 			__get_page_tail_foll(page, false);
 			got = true;
 		}
 		compound_unlock_irqrestore(page_head, flags);
 		if (unlikely(!got))
 			put_page(page_head);
 	}
 	return got;
 }
 EXPORT_SYMBOL(__get_page_tail);
 /**
  * put_pages_list() - release a list of pages
  * @pages: list of pages threaded on page->lru
  *
  * Release a list of pages which are strung together on page.lru.  Currently
  * used by read_cache_pages() and related error recovery code.
  */
 void put_pages_list(struct list_head *pages)
 {
 	while (!list_empty(pages)) {
 		struct page *victim;
 		victim = list_entry(pages->prev, struct page, lru);
 		list_del(&victim->lru);
 		page_cache_release(victim);
 	}
 }
 EXPORT_SYMBOL(put_pages_list);
 /*
  * get_kernel_pages() - pin kernel pages in memory
  * @kiov:	An array of struct kvec structures
  * @nr_segs:	number of segments to pin
  * @write:	pinning for read/write, currently ignored
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_segs long.
  *
  * Returns number of pages pinned. This may be fewer than the number
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
  * were pinned, returns -errno. Each page returned must be released
  * with a put_page() call when it is finished with.
  */
 int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
 		struct page **pages)
 {
 	int seg;
 	for (seg = 0; seg < nr_segs; seg++) {
 		if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
 			return seg;
 		pages[seg] = kmap_to_page(kiov[seg].iov_base);
 		page_cache_get(pages[seg]);
 	}
 	return seg;
 }
 EXPORT_SYMBOL_GPL(get_kernel_pages);
 /*
  * get_kernel_page() - pin a kernel page in memory
  * @start:	starting kernel address
  * @write:	pinning for read/write, currently ignored
  * @pages:	array that receives pointer to the page pinned.
  *		Must be at least nr_segs long.
  *
  * Returns 1 if page is pinned. If the page was not pinned, returns
  * -errno. The page returned must be released with a put_page() call
  * when it is finished with.
  */
 int get_kernel_page(unsigned long start, int write, struct page **pages)
 {
 	const struct kvec kiov = {
 		.iov_base = (void *)start,
 		.iov_len = PAGE_SIZE
 	};
 	return get_kernel_pages(&kiov, 1, write, pages);
 }
 EXPORT_SYMBOL_GPL(get_kernel_page);
 static void pagevec_lru_move_fn(struct pagevec *pvec,
 	void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
 	void *arg)
 {
 	int i;
 	struct zone *zone = NULL;
 	struct lruvec *lruvec;
 	unsigned long flags = 0;
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 		struct zone *pagezone = page_zone(page);
 		if (pagezone != zone) {
 			if (zone)
 				spin_unlock_irqrestore(&zone->lru_lock, flags);
 			zone = pagezone;
 			spin_lock_irqsave(&zone->lru_lock, flags);
 		}
 		lruvec = mem_cgroup_page_lruvec(page, zone);
 		(*move_fn)(page, lruvec, arg);
 	}
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	release_pages(pvec->pages, pvec->nr, pvec->cold);
 	pagevec_reinit(pvec);
 }
 static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
 				 void *arg)
 {
 	int *pgmoved = arg;
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		enum lru_list lru = page_lru_base_type(page);
 		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		(*pgmoved)++;
 	}
 }
 /*
  * pagevec_move_tail() must be called with IRQ disabled.
  * Otherwise this may cause nasty races.
  */
 static void pagevec_move_tail(struct pagevec *pvec)
 {
 	int pgmoved = 0;
 	pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
 	__count_vm_events(PGROTATED, pgmoved);
 }
 /*
  * Writeback is about to end against a page which has been marked for immediate
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
  * inactive list.
  */
 void rotate_reclaimable_page(struct page *page)
 {
 	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
 	    !PageUnevictable(page) && PageLRU(page)) {
 		struct pagevec *pvec;
 		unsigned long flags;
 		page_cache_get(page);
 		local_irq_save(flags);
 		pvec = &__get_cpu_var(lru_rotate_pvecs);
 		if (!pagevec_add(pvec, page))
 			pagevec_move_tail(pvec);
 		local_irq_restore(flags);
 	}
 }
 static void update_page_reclaim_stat(struct lruvec *lruvec,
 				     int file, int rotated)
 {
 	struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
 	reclaim_stat->recent_scanned[file]++;
 	if (rotated)
 		reclaim_stat->recent_rotated[file]++;
 }
 static void __activate_page(struct page *page, struct lruvec *lruvec,
 			    void *arg)
 {
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		int file = page_is_file_cache(page);
 		int lru = page_lru_base_type(page);
 		del_page_from_lru_list(page, lruvec, lru);
 		SetPageActive(page);
 		lru += LRU_ACTIVE;
 		add_page_to_lru_list(page, lruvec, lru);
 		trace_mm_lru_activate(page, page_to_pfn(page));
 		__count_vm_event(PGACTIVATE);
 		update_page_reclaim_stat(lruvec, file, 1);
 	}
 }
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
 static void activate_page_drain(int cpu)
 {
 	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, __activate_page, NULL);
 }
 static bool need_activate_page_drain(int cpu)
 {
 	return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
 }
 void activate_page(struct page *page)
 {
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
 		page_cache_get(page);
 		if (!pagevec_add(pvec, page))
 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
 		put_cpu_var(activate_page_pvecs);
 	}
 }
 #else
 static inline void activate_page_drain(int cpu)
 {
 }
 static bool need_activate_page_drain(int cpu)
 {
 	return false;
 }
 void activate_page(struct page *page)
 {
 	struct zone *zone = page_zone(page);
 	spin_lock_irq(&zone->lru_lock);
 	__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
 	spin_unlock_irq(&zone->lru_lock);
 }
 #endif
 static void __lru_cache_activate_page(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 	int i;
 	/*
 	 * Search backwards on the optimistic assumption that the page being
 	 * activated has just been added to this pagevec. Note that only
 	 * the local pagevec is examined as a !PageLRU page could be in the
 	 * process of being released, reclaimed, migrated or on a remote
 	 * pagevec that is currently being drained. Furthermore, marking
 	 * a remote pagevec's page PageActive potentially hits a race where
 	 * a page is marked PageActive just after it is added to the inactive
 	 * list causing accounting errors and BUG_ON checks to trigger.
 	 */
 	for (i = pagevec_count(pvec) - 1; i >= 0; i--) {
 		struct page *pagevec_page = pvec->pages[i];
 		if (pagevec_page == page) {
 			SetPageActive(page);
 			break;
 		}
 	}
 	put_cpu_var(lru_add_pvec);
 }
 /*
  * Mark a page as having seen activity.
  *
  * inactive,unreferenced	->	inactive,referenced
  * inactive,referenced		->	active,unreferenced
  * active,unreferenced		->	active,referenced
  */
 void mark_page_accessed(struct page *page)
 {
 	if (!PageActive(page) && !PageUnevictable(page) &&
 			PageReferenced(page)) {
 		/*
 		 * If the page is on the LRU, queue it for activation via
 		 * activate_page_pvecs. Otherwise, assume the page is on a
 		 * pagevec, mark it active and it'll be moved to the active
 		 * LRU on the next drain.
 		 */
 		if (PageLRU(page))
 			activate_page(page);
 		else
 			__lru_cache_activate_page(page);
 		ClearPageReferenced(page);
 	} else if (!PageReferenced(page)) {
 		SetPageReferenced(page);
 	}
 }
 EXPORT_SYMBOL(mark_page_accessed);
 static void __lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 	page_cache_get(page);
 	if (!pagevec_space(pvec))
 		__pagevec_lru_add(pvec);
 	pagevec_add(pvec, page);
 	put_cpu_var(lru_add_pvec);
 }
 /**
  * lru_cache_add: add a page to the page lists
  * @page: the page to add
  */
 void lru_cache_add_anon(struct page *page)
 {
-	ClearPageActive(page);
+	if (PageActive(page))
+		ClearPageActive(page);
 	__lru_cache_add(page);
 }
 void lru_cache_add_file(struct page *page)
 {
-	ClearPageActive(page);
+	if (PageActive(page))
+		ClearPageActive(page);
 	__lru_cache_add(page);
 }
 EXPORT_SYMBOL(lru_cache_add_file);
 /**
  * lru_cache_add - add a page to a page list
  * @page: the page to be added to the LRU.
  *
  * Queue the page for addition to the LRU via pagevec. The decision on whether
  * to add the page to the [in]active [file|anon] list is deferred until the
  * pagevec is drained. This gives a chance for the caller of lru_cache_add()
  * have the page added to the active list using mark_page_accessed().
  */
 void lru_cache_add(struct page *page)
 {
 	VM_BUG_ON(PageActive(page) && PageUnevictable(page));
 	VM_BUG_ON(PageLRU(page));
 	__lru_cache_add(page);
 }
 /**
  * add_page_to_unevictable_list - add a page to the unevictable list
  * @page:  the page to be added to the unevictable list
  *
  * Add page directly to its zone's unevictable list.  To avoid races with
  * tasks that might be making the page evictable, through eg. munlock,
  * munmap or exit, while it's not on the lru, we want to add the page
  * while it's locked or otherwise "invisible" to other tasks.  This is
  * difficult to do when using the pagevec cache, so bypass that.
  */
 void add_page_to_unevictable_list(struct page *page)
 {
 	struct zone *zone = page_zone(page);
 	struct lruvec *lruvec;
 	spin_lock_irq(&zone->lru_lock);
 	lruvec = mem_cgroup_page_lruvec(page, zone);
 	ClearPageActive(page);
 	SetPageUnevictable(page);
 	SetPageLRU(page);
 	add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
 	spin_unlock_irq(&zone->lru_lock);
 }
 /*
  * If the page can not be invalidated, it is moved to the
  * inactive list to speed up its reclaim.  It is moved to the
  * head of the list, rather than the tail, to give the flusher
  * threads some time to write it out, as this is much more
  * effective than the single-page writeout from reclaim.
  *
  * If the page isn't page_mapped and dirty/writeback, the page
  * could reclaim asap using PG_reclaim.
  *
  * 1. active, mapped page -> none
  * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
  * 3. inactive, mapped page -> none
  * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
  * 5. inactive, clean -> inactive, tail
  * 6. Others -> none
  *
  * In 4, why it moves inactive's head, the VM expects the page would
  * be write it out by flusher threads as this is much more effective
  * than the single-page writeout from reclaim.
  */
 static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
 			      void *arg)
 {
 	int lru, file;
 	bool active;
 	if (!PageLRU(page))
 		return;
 	if (PageUnevictable(page))
 		return;
 	/* Some processes are using the page */
 	if (page_mapped(page))
 		return;
 	active = PageActive(page);
 	file = page_is_file_cache(page);
 	lru = page_lru_base_type(page);
 	del_page_from_lru_list(page, lruvec, lru + active);
 	ClearPageActive(page);
 	ClearPageReferenced(page);
 	add_page_to_lru_list(page, lruvec, lru);
 	if (PageWriteback(page) || PageDirty(page)) {
 		/*
 		 * PG_reclaim could be raced with end_page_writeback
 		 * It can make readahead confusing.  But race window
 		 * is _really_ small and  it's non-critical problem.
 		 */
 		SetPageReclaim(page);
 	} else {
 		/*
 		 * The page's writeback ends up during pagevec
 		 * We moves tha page into tail of inactive.
 		 */
 		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		__count_vm_event(PGROTATED);
 	}
 	if (active)
 		__count_vm_event(PGDEACTIVATE);
 	update_page_reclaim_stat(lruvec, file, 0);
 }
 /*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
  * disabled; or "cpu" is being hot-unplugged, and is already dead.
  */
 void lru_add_drain_cpu(int cpu)
 {
 	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
 	if (pagevec_count(pvec))
 		__pagevec_lru_add(pvec);
 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
 	if (pagevec_count(pvec)) {
 		unsigned long flags;
 		/* No harm done if a racing interrupt already did this */
 		local_irq_save(flags);
 		pagevec_move_tail(pvec);
 		local_irq_restore(flags);
 	}
 	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 	activate_page_drain(cpu);
 }
 /**
  * deactivate_page - forcefully deactivate a page
  * @page: page to deactivate
  *
  * This function hints the VM that @page is a good reclaim candidate,
  * for example if its invalidation fails due to the page being dirty
  * or under writeback.
  */
 void deactivate_page(struct page *page)
 {
 	/*
 	 * In a workload with many unevictable page such as mprotect, unevictable
 	 * page deactivation for accelerating reclaim is pointless.
 	 */
 	if (PageUnevictable(page))
 		return;
 	if (likely(get_page_unless_zero(page))) {
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
 		if (!pagevec_add(pvec, page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 		put_cpu_var(lru_deactivate_pvecs);
 	}
 }
 void lru_add_drain(void)
 {
 	lru_add_drain_cpu(get_cpu());
 	put_cpu();
 }
 static void lru_add_drain_per_cpu(struct work_struct *dummy)
 {
 	lru_add_drain();
 }
 static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 void lru_add_drain_all(void)
 {
 	static DEFINE_MUTEX(lock);
 	static struct cpumask has_work;
 	int cpu;
 	mutex_lock(&lock);
 	get_online_cpus();
 	cpumask_clear(&has_work);
 	for_each_online_cpu(cpu) {
 		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
 		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
 		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
 		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
 		    need_activate_page_drain(cpu)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
 			schedule_work_on(cpu, work);
 			cpumask_set_cpu(cpu, &has_work);
 		}
 	}
 	for_each_cpu(cpu, &has_work)
 		flush_work(&per_cpu(lru_add_drain_work, cpu));
 	put_online_cpus();
 	mutex_unlock(&lock);
 }
 /*
  * Batched page_cache_release().  Decrement the reference count on all the
  * passed pages.  If it fell to zero then remove the page from the LRU and
  * free it.
  *
  * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
  * for the remainder of the operation.
  *
  * The locking in this function is against shrink_inactive_list(): we recheck
  * the page count inside the lock to see whether shrink_inactive_list()
  * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
  * will free it.
  */
 void release_pages(struct page **pages, int nr, bool cold)
 {
 	int i;
 	LIST_HEAD(pages_to_free);
 	struct zone *zone = NULL;
 	struct lruvec *lruvec;
 	unsigned long uninitialized_var(flags);
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 		if (unlikely(PageCompound(page))) {
 			if (zone) {
 				spin_unlock_irqrestore(&zone->lru_lock, flags);
 				zone = NULL;
 			}
 			put_compound_page(page);
 			continue;
 		}
 		if (!put_page_testzero(page))
 			continue;
 		if (PageLRU(page)) {
 			struct zone *pagezone = page_zone(page);
 			if (pagezone != zone) {
 				if (zone)
 					spin_unlock_irqrestore(&zone->lru_lock,
 									flags);
 				zone = pagezone;
 				spin_lock_irqsave(&zone->lru_lock, flags);
 			}
 			lruvec = mem_cgroup_page_lruvec(page, zone);
 			VM_BUG_ON(!PageLRU(page));
 			__ClearPageLRU(page);
 			del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		}
 		/* Clear Active bit in case of parallel mark_page_accessed */
 		__ClearPageActive(page);
 		list_add(&page->lru, &pages_to_free);
 	}
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	free_hot_cold_page_list(&pages_to_free, cold);
 }
 EXPORT_SYMBOL(release_pages);
 /*
  * The pages which we're about to release may be in the deferred lru-addition
  * queues.  That would prevent them from really being freed right now.  That's
  * OK from a correctness point of view but is inefficient - those pages may be
  * cache-warm and we want to give them back to the page allocator ASAP.
  *
  * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
  * and __pagevec_lru_add_active() call release_pages() directly to avoid
  * mutual recursion.
  */
 void __pagevec_release(struct pagevec *pvec)
 {
 	lru_add_drain();
 	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
 	pagevec_reinit(pvec);
 }
 EXPORT_SYMBOL(__pagevec_release);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /* used by __split_huge_page_refcount() */
 void lru_add_page_tail(struct page *page, struct page *page_tail,
 		       struct lruvec *lruvec, struct list_head *list)
 {
 	const int file = 0;
 	VM_BUG_ON(!PageHead(page));
 	VM_BUG_ON(PageCompound(page_tail));
 	VM_BUG_ON(PageLRU(page_tail));
 	VM_BUG_ON(NR_CPUS != 1 &&
 		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
 	if (!list)
 		SetPageLRU(page_tail);
 	if (likely(PageLRU(page)))
 		list_add_tail(&page_tail->lru, &page->lru);
 	else if (list) {
 		/* page reclaim is reclaiming a huge page */
 		get_page(page_tail);
 		list_add_tail(&page_tail->lru, list);
 	} else {
 		struct list_head *list_head;
 		/*
 		 * Head page has not yet been counted, as an hpage,
 		 * so we must account for each subpage individually.
 		 *
 		 * Use the standard add function to put page_tail on the list,
 		 * but then correct its position so they all end up in order.
 		 */
 		add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));
 		list_head = page_tail->lru.prev;
 		list_move_tail(&page_tail->lru, list_head);
 	}
 	if (!PageUnevictable(page))
 		update_page_reclaim_stat(lruvec, file, PageActive(page_tail));
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
 				 void *arg)
 {
 	int file = page_is_file_cache(page);
 	int active = PageActive(page);
 	enum lru_list lru = page_lru(page);
 	VM_BUG_ON(PageLRU(page));
 	SetPageLRU(page);
 	add_page_to_lru_list(page, lruvec, lru);
 	update_page_reclaim_stat(lruvec, file, active);
 	trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
 }
 /*
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
 void __pagevec_lru_add(struct pagevec *pvec)
 {
 	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL);
 }
 EXPORT_SYMBOL(__pagevec_lru_add);
 /**
  * pagevec_lookup_entries - gang pagecache lookup
  * @pvec:	Where the resulting entries are placed
  * @mapping:	The address_space to search
  * @start:	The starting entry index
  * @nr_entries:	The maximum number of entries
  * @indices:	The cache indices corresponding to the entries in @pvec
  *
  * pagevec_lookup_entries() will search for and return a group of up
  * to @nr_entries pages and shadow entries in the mapping.  All
  * entries are placed in @pvec.  pagevec_lookup_entries() takes a
  * reference against actual pages in @pvec.
  *
  * The search returns a group of mapping-contiguous entries with
  * ascending indexes.  There may be holes in the indices due to
  * not-present entries.
  *
  * pagevec_lookup_entries() returns the number of entries which were
  * found.
  */
 unsigned pagevec_lookup_entries(struct pagevec *pvec,
 				struct address_space *mapping,
 				pgoff_t start, unsigned nr_pages,
 				pgoff_t *indices)
 {
 	pvec->nr = find_get_entries(mapping, start, nr_pages,
 				    pvec->pages, indices);
 	return pagevec_count(pvec);
 }
 /**
  * pagevec_remove_exceptionals - pagevec exceptionals pruning
  * @pvec:	The pagevec to prune
  *
  * pagevec_lookup_entries() fills both pages and exceptional radix
  * tree entries into the pagevec.  This function prunes all
  * exceptionals from @pvec without leaving holes, so that it can be
  * passed on to page-only pagevec operations.
  */
 void pagevec_remove_exceptionals(struct pagevec *pvec)
 {
 	int i, j;
 	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 		if (!radix_tree_exceptional_entry(page))
 			pvec->pages[j++] = page;
 	}
 	pvec->nr = j;
 }
 /**
  * pagevec_lookup - gang pagecache lookup
  * @pvec:	Where the resulting pages are placed
  * @mapping:	The address_space to search
  * @start:	The starting page index
  * @nr_pages:	The maximum number of pages
  *
  * pagevec_lookup() will search for and return a group of up to @nr_pages pages
  * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
  * reference against the pages in @pvec.
  *
  * The search returns a group of mapping-contiguous pages with ascending
  * indexes.  There may be holes in the indices due to not-present pages.
  *
  * pagevec_lookup() returns the number of pages which were found.
  */
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages)
 {
 	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
 	return pagevec_count(pvec);
 }
 EXPORT_SYMBOL(pagevec_lookup);
 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t *index, int tag, unsigned nr_pages)
 {
 	pvec->nr = find_get_pages_tag(mapping, index, tag,
 					nr_pages, pvec->pages);
 	return pagevec_count(pvec);
 }
 EXPORT_SYMBOL(pagevec_lookup_tag);
 /*
  * Perform any setup for the swap system
  */
 void __init swap_setup(void)
 {
 	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
 #ifdef CONFIG_SWAP
 	int i;
 	bdi_init(swapper_spaces[0].backing_dev_info);
 	for (i = 0; i < MAX_SWAPFILES; i++) {
 		spin_lock_init(&swapper_spaces[i].tree_lock);
 		INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
 	}
 #endif
 	/* Use a smaller cluster for small-memory machines */
 	if (megs < 16)
 		page_cluster = 2;
 	else
 		page_cluster = 3;
 	/*
 	 * Right now other parts of the system means that we
 	 * _really_ don't want to cluster much more
 	 */
 }