Blame view

mm/readahead.c 15.1 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
  /*
   * mm/readahead.c - address_space-level file readahead.
   *
   * Copyright (C) 2002, Linus Torvalds
   *
e1f8e8744   Francois Cami   Remove Andrew Mor...
6
   * 09Apr2002	Andrew Morton
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
7
8
9
10
11
   *		Initial version.
   */
  
  #include <linux/kernel.h>
  #include <linux/fs.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
12
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
  #include <linux/mm.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
14
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
  #include <linux/blkdev.h>
  #include <linux/backing-dev.h>
8bde37f08   Andrew Morton   [PATCH] io-accoun...
17
  #include <linux/task_io_accounting_ops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/pagevec.h>
f5ff8422b   Jens Axboe   Fix warnings with...
19
  #include <linux/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
23
24
25
26
27
28
  /*
   * Initialise a struct file's readahead state.  Assumes that the caller has
   * memset *ra to zero.
   */
  void
  file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
  {
  	ra->ra_pages = mapping->backing_dev_info->ra_pages;
f4e6b498d   Fengguang Wu   readahead: combin...
29
  	ra->prev_pos = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
  }
d41cc702c   Steven Whitehouse   [GFS2] Export fil...
31
  EXPORT_SYMBOL_GPL(file_ra_state_init);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33
  #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
03fb3d2af   David Howells   FS-Cache: Release...
34
35
  /*
   * see if a page needs releasing upon read_cache_pages() failure
266cf658e   David Howells   FS-Cache: Recruit...
36
37
38
39
   * - the caller of read_cache_pages() may have set PG_private or PG_fscache
   *   before calling, such as the NFS fs marking pages that are cached locally
   *   on disk, thus we need to give the fs a chance to clean up in the event of
   *   an error
03fb3d2af   David Howells   FS-Cache: Release...
40
41
42
43
   */
  static void read_cache_pages_invalidate_page(struct address_space *mapping,
  					     struct page *page)
  {
266cf658e   David Howells   FS-Cache: Recruit...
44
  	if (page_has_private(page)) {
03fb3d2af   David Howells   FS-Cache: Release...
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  		if (!trylock_page(page))
  			BUG();
  		page->mapping = mapping;
  		do_invalidatepage(page, 0);
  		page->mapping = NULL;
  		unlock_page(page);
  	}
  	page_cache_release(page);
  }
  
  /*
   * release a list of pages, invalidating them first if need be
   */
  static void read_cache_pages_invalidate_pages(struct address_space *mapping,
  					      struct list_head *pages)
  {
  	struct page *victim;
  
  	while (!list_empty(pages)) {
  		victim = list_to_page(pages);
  		list_del(&victim->lru);
  		read_cache_pages_invalidate_page(mapping, victim);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
69
  /**
bd40cddae   Randy Dunlap   [PATCH] kernel-do...
70
   * read_cache_pages - populate an address space with some pages & start reads against them
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
72
73
74
75
76
77
78
79
80
81
82
   * @mapping: the address_space
   * @pages: The address of a list_head which contains the target pages.  These
   *   pages have their ->index populated and are otherwise uninitialised.
   * @filler: callback routine for filling a single page.
   * @data: private data for the callback routine.
   *
   * Hides the details of the LRU cache etc from the filesystems.
   */
  int read_cache_pages(struct address_space *mapping, struct list_head *pages,
  			int (*filler)(void *, struct page *), void *data)
  {
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83
  	int ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
84
85
86
  	while (!list_empty(pages)) {
  		page = list_to_page(pages);
  		list_del(&page->lru);
eb2be1893   Nick Piggin   mm: buffered writ...
87
88
  		if (add_to_page_cache_lru(page, mapping,
  					page->index, GFP_KERNEL)) {
03fb3d2af   David Howells   FS-Cache: Release...
89
  			read_cache_pages_invalidate_page(mapping, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90
91
  			continue;
  		}
eb2be1893   Nick Piggin   mm: buffered writ...
92
  		page_cache_release(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
  		ret = filler(data, page);
eb2be1893   Nick Piggin   mm: buffered writ...
94
  		if (unlikely(ret)) {
03fb3d2af   David Howells   FS-Cache: Release...
95
  			read_cache_pages_invalidate_pages(mapping, pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96
97
  			break;
  		}
8bde37f08   Andrew Morton   [PATCH] io-accoun...
98
  		task_io_account_read(PAGE_CACHE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
100
101
102
103
104
105
106
107
  	return ret;
  }
  
  EXPORT_SYMBOL(read_cache_pages);
  
  static int read_pages(struct address_space *mapping, struct file *filp,
  		struct list_head *pages, unsigned nr_pages)
  {
5b417b187   Jens Axboe   read-ahead: use p...
108
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
  	unsigned page_idx;
994fc28c7   Zach Brown   [PATCH] add AOP_T...
110
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111

5b417b187   Jens Axboe   read-ahead: use p...
112
  	blk_start_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
114
  	if (mapping->a_ops->readpages) {
  		ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
029e332ea   OGAWA Hirofumi   [PATCH] Cleanup r...
115
116
  		/* Clean up the remaining pages */
  		put_pages_list(pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
118
  		goto out;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
120
121
  	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
  		struct page *page = list_to_page(pages);
  		list_del(&page->lru);
eb2be1893   Nick Piggin   mm: buffered writ...
122
  		if (!add_to_page_cache_lru(page, mapping,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123
  					page->index, GFP_KERNEL)) {
9f1a3cfcf   Zach Brown   [PATCH] AOP_TRUNC...
124
  			mapping->a_ops->readpage(filp, page);
eb2be1893   Nick Piggin   mm: buffered writ...
125
126
  		}
  		page_cache_release(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
  	}
994fc28c7   Zach Brown   [PATCH] add AOP_T...
128
  	ret = 0;
5b417b187   Jens Axboe   read-ahead: use p...
129

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
  out:
5b417b187   Jens Axboe   read-ahead: use p...
131
  	blk_finish_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
133
134
135
  	return ret;
  }
  
  /*
d30a11004   Wu Fengguang   readahead: record...
136
   * __do_page_cache_readahead() actually reads a chunk of disk.  It allocates all
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
138
139
140
141
   * the pages first, then submits them all for I/O. This avoids the very bad
   * behaviour which would occur if page allocations are causing VM writeback.
   * We really don't want to intermingle reads and writes like that.
   *
   * Returns the number of pages requested, or the maximum amount of I/O allowed.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142
143
144
   */
  static int
  __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
46fc3e7b4   Fengguang Wu   readahead: add lo...
145
146
  			pgoff_t offset, unsigned long nr_to_read,
  			unsigned long lookahead_size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
147
148
149
150
151
152
153
154
155
156
157
  {
  	struct inode *inode = mapping->host;
  	struct page *page;
  	unsigned long end_index;	/* The last page we want to read */
  	LIST_HEAD(page_pool);
  	int page_idx;
  	int ret = 0;
  	loff_t isize = i_size_read(inode);
  
  	if (isize == 0)
  		goto out;
46fc3e7b4   Fengguang Wu   readahead: add lo...
158
  	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
159
160
161
162
  
  	/*
  	 * Preallocate as many pages as we will need.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163
  	for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
7361f4d8c   Andrew Morton   [PATCH] readahead...
164
  		pgoff_t page_offset = offset + page_idx;
c743d96b6   Fengguang Wu   readahead: remove...
165

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
167
  		if (page_offset > end_index)
  			break;
001281881   Nick Piggin   mm: use lockless ...
168
  		rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
169
  		page = radix_tree_lookup(&mapping->page_tree, page_offset);
001281881   Nick Piggin   mm: use lockless ...
170
  		rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
171
172
  		if (page)
  			continue;
7b1de5868   Wu Fengguang   readahead: readah...
173
  		page = page_cache_alloc_readahead(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
175
176
177
  		if (!page)
  			break;
  		page->index = page_offset;
  		list_add(&page->lru, &page_pool);
46fc3e7b4   Fengguang Wu   readahead: add lo...
178
179
  		if (page_idx == nr_to_read - lookahead_size)
  			SetPageReadahead(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
180
181
  		ret++;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
  
  	/*
  	 * Now start the IO.  We ignore I/O errors - if the page is not
  	 * uptodate then the caller will launch readpage again, and
  	 * will then handle the error.
  	 */
  	if (ret)
  		read_pages(mapping, filp, &page_pool, ret);
  	BUG_ON(!list_empty(&page_pool));
  out:
  	return ret;
  }
  
  /*
   * Chunk the readahead into 2 megabyte units, so that we don't pin too much
   * memory at once.
   */
  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
7361f4d8c   Andrew Morton   [PATCH] readahead...
200
  		pgoff_t offset, unsigned long nr_to_read)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201
202
203
204
205
  {
  	int ret = 0;
  
  	if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
  		return -EINVAL;
f7e839dd3   Wu Fengguang   readahead: move m...
206
  	nr_to_read = max_sane_readahead(nr_to_read);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
208
209
210
211
212
213
214
  	while (nr_to_read) {
  		int err;
  
  		unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
  
  		if (this_chunk > nr_to_read)
  			this_chunk = nr_to_read;
  		err = __do_page_cache_readahead(mapping, filp,
46fc3e7b4   Fengguang Wu   readahead: add lo...
215
  						offset, this_chunk, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
216
217
218
219
220
221
222
223
224
225
226
227
  		if (err < 0) {
  			ret = err;
  			break;
  		}
  		ret += err;
  		offset += this_chunk;
  		nr_to_read -= this_chunk;
  	}
  	return ret;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
228
229
230
231
232
   * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a
   * sensible upper limit.
   */
  unsigned long max_sane_readahead(unsigned long nr)
  {
4f98a2fee   Rik van Riel   vmscan: split LRU...
233
  	return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
05a0416be   Christoph Lameter   [PATCH] Drop __ge...
234
  		+ node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235
  }
5ce1110b9   Fengguang Wu   readahead: data s...
236
237
238
239
  
  /*
   * Submit IO for the read-ahead request in file_ra_state.
   */
d30a11004   Wu Fengguang   readahead: record...
240
  unsigned long ra_submit(struct file_ra_state *ra,
5ce1110b9   Fengguang Wu   readahead: data s...
241
242
  		       struct address_space *mapping, struct file *filp)
  {
5ce1110b9   Fengguang Wu   readahead: data s...
243
  	int actual;
5ce1110b9   Fengguang Wu   readahead: data s...
244
  	actual = __do_page_cache_readahead(mapping, filp,
f9acc8c7b   Fengguang Wu   readahead: sanify...
245
  					ra->start, ra->size, ra->async_size);
5ce1110b9   Fengguang Wu   readahead: data s...
246
247
248
  
  	return actual;
  }
122a21d11   Fengguang Wu   readahead: on-dem...
249
250
  
  /*
c743d96b6   Fengguang Wu   readahead: remove...
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
   * Set the initial window size, round to next power of 2 and square
   * for small size, x 4 for medium, and x 2 for large
   * for 128k (32 page) max ra
   * 1-8 page = 32k initial, > 8 page = 128k initial
   */
  static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
  {
  	unsigned long newsize = roundup_pow_of_two(size);
  
  	if (newsize <= max / 32)
  		newsize = newsize * 4;
  	else if (newsize <= max / 4)
  		newsize = newsize * 2;
  	else
  		newsize = max;
  
  	return newsize;
  }
  
  /*
122a21d11   Fengguang Wu   readahead: on-dem...
271
272
273
   *  Get the previous window size, ramp it up, and
   *  return it as the new window size.
   */
c743d96b6   Fengguang Wu   readahead: remove...
274
  static unsigned long get_next_ra_size(struct file_ra_state *ra,
122a21d11   Fengguang Wu   readahead: on-dem...
275
276
  						unsigned long max)
  {
f9acc8c7b   Fengguang Wu   readahead: sanify...
277
  	unsigned long cur = ra->size;
122a21d11   Fengguang Wu   readahead: on-dem...
278
279
280
  	unsigned long newsize;
  
  	if (cur < max / 16)
c743d96b6   Fengguang Wu   readahead: remove...
281
  		newsize = 4 * cur;
122a21d11   Fengguang Wu   readahead: on-dem...
282
  	else
c743d96b6   Fengguang Wu   readahead: remove...
283
  		newsize = 2 * cur;
122a21d11   Fengguang Wu   readahead: on-dem...
284
285
286
287
288
289
290
291
292
293
  
  	return min(newsize, max);
  }
  
  /*
   * On-demand readahead design.
   *
   * The fields in struct file_ra_state represent the most-recently-executed
   * readahead attempt:
   *
f9acc8c7b   Fengguang Wu   readahead: sanify...
294
295
296
297
   *                        |<----- async_size ---------|
   *     |------------------- size -------------------->|
   *     |==================#===========================|
   *     ^start             ^page marked with PG_readahead
122a21d11   Fengguang Wu   readahead: on-dem...
298
299
300
301
   *
   * To overlap application thinking time and disk I/O time, we do
   * `readahead pipelining': Do not wait until the application consumed all
   * readahead pages and stalled on the missing page at readahead_index;
f9acc8c7b   Fengguang Wu   readahead: sanify...
302
303
304
   * Instead, submit an asynchronous readahead I/O as soon as there are
   * only async_size pages left in the readahead window. Normally async_size
   * will be equal to size, for maximum pipelining.
122a21d11   Fengguang Wu   readahead: on-dem...
305
306
307
   *
   * In interleaved sequential reads, concurrent streams on the same fd can
   * be invalidating each other's readahead state. So we flag the new readahead
f9acc8c7b   Fengguang Wu   readahead: sanify...
308
   * page at (start+size-async_size) with PG_readahead, and use it as readahead
122a21d11   Fengguang Wu   readahead: on-dem...
309
310
311
   * indicator. The flag won't be set on already cached pages, to avoid the
   * readahead-for-nothing fuss, saving pointless page cache lookups.
   *
f4e6b498d   Fengguang Wu   readahead: combin...
312
   * prev_pos tracks the last visited byte in the _previous_ read request.
122a21d11   Fengguang Wu   readahead: on-dem...
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
   * It should be maintained by the caller, and will be used for detecting
   * small random reads. Note that the readahead algorithm checks loosely
   * for sequential patterns. Hence interleaved reads might be served as
   * sequential ones.
   *
   * There is a special-case: if the first page which the application tries to
   * read happens to be the first page of the file, it is assumed that a linear
   * read is about to happen and the window is immediately set to the initial size
   * based on I/O request size and the max_readahead.
   *
   * The code ramps up the readahead size aggressively at first, but slow down as
   * it approaches max_readhead.
   */
  
  /*
10be0b372   Wu Fengguang   readahead: introd...
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
   * Count contiguously cached pages from @offset-1 to @offset-@max,
   * this count is a conservative estimation of
   * 	- length of the sequential read sequence, or
   * 	- thrashing threshold in memory tight systems
   */
  static pgoff_t count_history_pages(struct address_space *mapping,
  				   struct file_ra_state *ra,
  				   pgoff_t offset, unsigned long max)
  {
  	pgoff_t head;
  
  	rcu_read_lock();
  	head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max);
  	rcu_read_unlock();
  
  	return offset - 1 - head;
  }
  
  /*
   * page cache context based read-ahead
   */
  static int try_context_readahead(struct address_space *mapping,
  				 struct file_ra_state *ra,
  				 pgoff_t offset,
  				 unsigned long req_size,
  				 unsigned long max)
  {
  	pgoff_t size;
  
  	size = count_history_pages(mapping, ra, offset, max);
  
  	/*
  	 * no history pages:
  	 * it could be a random read
  	 */
  	if (!size)
  		return 0;
  
  	/*
  	 * starts from beginning of file:
  	 * it is a strong indication of long-run stream (or whole-file-read)
  	 */
  	if (size >= offset)
  		size *= 2;
  
  	ra->start = offset;
  	ra->size = get_init_ra_size(size + req_size, max);
  	ra->async_size = ra->size;
  
  	return 1;
  }
  
  /*
122a21d11   Fengguang Wu   readahead: on-dem...
381
382
383
384
385
   * A minimal readahead algorithm for trivial sequential/random reads.
   */
  static unsigned long
  ondemand_readahead(struct address_space *mapping,
  		   struct file_ra_state *ra, struct file *filp,
cf914a7d6   Rusty Russell   readahead: split ...
386
  		   bool hit_readahead_marker, pgoff_t offset,
122a21d11   Fengguang Wu   readahead: on-dem...
387
388
  		   unsigned long req_size)
  {
fc31d16ad   Wu Fengguang   readahead: apply ...
389
  	unsigned long max = max_sane_readahead(ra->ra_pages);
045a2529a   Wu Fengguang   readahead: move t...
390
391
392
393
394
395
  
  	/*
  	 * start of file
  	 */
  	if (!offset)
  		goto initial_readahead;
122a21d11   Fengguang Wu   readahead: on-dem...
396
397
  
  	/*
f9acc8c7b   Fengguang Wu   readahead: sanify...
398
  	 * It's the expected callback offset, assume sequential access.
122a21d11   Fengguang Wu   readahead: on-dem...
399
400
  	 * Ramp up sizes, and push forward the readahead window.
  	 */
045a2529a   Wu Fengguang   readahead: move t...
401
402
  	if ((offset == (ra->start + ra->size - ra->async_size) ||
  	     offset == (ra->start + ra->size))) {
f9acc8c7b   Fengguang Wu   readahead: sanify...
403
404
405
406
  		ra->start += ra->size;
  		ra->size = get_next_ra_size(ra, max);
  		ra->async_size = ra->size;
  		goto readit;
122a21d11   Fengguang Wu   readahead: on-dem...
407
  	}
122a21d11   Fengguang Wu   readahead: on-dem...
408
  	/*
6b10c6c9f   Fengguang Wu   readahead: basic ...
409
410
411
412
413
414
415
  	 * Hit a marked page without valid readahead state.
  	 * E.g. interleaved reads.
  	 * Query the pagecache for async_size, which normally equals to
  	 * readahead size. Ramp it up and use it as the new readahead size.
  	 */
  	if (hit_readahead_marker) {
  		pgoff_t start;
30002ed2e   Nick Piggin   mm: readahead sca...
416
  		rcu_read_lock();
caca7cb74   Wu Fengguang   readahead: remove...
417
  		start = radix_tree_next_hole(&mapping->page_tree, offset+1,max);
30002ed2e   Nick Piggin   mm: readahead sca...
418
  		rcu_read_unlock();
6b10c6c9f   Fengguang Wu   readahead: basic ...
419
420
421
422
423
424
  
  		if (!start || start - offset > max)
  			return 0;
  
  		ra->start = start;
  		ra->size = start - offset;	/* old async_size */
160334a0c   Wu Fengguang   readahead: increa...
425
  		ra->size += req_size;
6b10c6c9f   Fengguang Wu   readahead: basic ...
426
427
428
429
430
431
  		ra->size = get_next_ra_size(ra, max);
  		ra->async_size = ra->size;
  		goto readit;
  	}
  
  	/*
045a2529a   Wu Fengguang   readahead: move t...
432
  	 * oversize read
122a21d11   Fengguang Wu   readahead: on-dem...
433
  	 */
045a2529a   Wu Fengguang   readahead: move t...
434
435
436
437
438
439
440
441
442
443
  	if (req_size > max)
  		goto initial_readahead;
  
  	/*
  	 * sequential cache miss
  	 */
  	if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
  		goto initial_readahead;
  
  	/*
10be0b372   Wu Fengguang   readahead: introd...
444
445
446
447
448
449
450
  	 * Query the page cache and look for the traces(cached history pages)
  	 * that a sequential stream would leave behind.
  	 */
  	if (try_context_readahead(mapping, ra, offset, req_size, max))
  		goto readit;
  
  	/*
045a2529a   Wu Fengguang   readahead: move t...
451
452
453
454
455
456
  	 * standalone, small random read
  	 * Read as is, and do not pollute the readahead state.
  	 */
  	return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
  
  initial_readahead:
f9acc8c7b   Fengguang Wu   readahead: sanify...
457
458
459
  	ra->start = offset;
  	ra->size = get_init_ra_size(req_size, max);
  	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
122a21d11   Fengguang Wu   readahead: on-dem...
460

f9acc8c7b   Fengguang Wu   readahead: sanify...
461
  readit:
51daa88eb   Wu Fengguang   readahead: remove...
462
463
464
465
466
467
468
469
470
  	/*
  	 * Will this read hit the readahead marker made by itself?
  	 * If so, trigger the readahead marker hit now, and merge
  	 * the resulted next readahead window into the current one.
  	 */
  	if (offset == ra->start && ra->size == ra->async_size) {
  		ra->async_size = get_next_ra_size(ra, max);
  		ra->size += ra->async_size;
  	}
122a21d11   Fengguang Wu   readahead: on-dem...
471
472
473
474
  	return ra_submit(ra, mapping, filp);
  }
  
  /**
cf914a7d6   Rusty Russell   readahead: split ...
475
   * page_cache_sync_readahead - generic file readahead
122a21d11   Fengguang Wu   readahead: on-dem...
476
477
478
   * @mapping: address_space which holds the pagecache and I/O vectors
   * @ra: file_ra_state which holds the readahead state
   * @filp: passed on to ->readpage() and ->readpages()
cf914a7d6   Rusty Russell   readahead: split ...
479
   * @offset: start offset into @mapping, in pagecache page-sized units
122a21d11   Fengguang Wu   readahead: on-dem...
480
   * @req_size: hint: total size of the read which the caller is performing in
cf914a7d6   Rusty Russell   readahead: split ...
481
   *            pagecache pages
122a21d11   Fengguang Wu   readahead: on-dem...
482
   *
cf914a7d6   Rusty Russell   readahead: split ...
483
484
485
486
   * page_cache_sync_readahead() should be called when a cache miss happened:
   * it will submit the read.  The readahead logic may decide to piggyback more
   * pages onto the read request if access patterns suggest it will improve
   * performance.
122a21d11   Fengguang Wu   readahead: on-dem...
487
   */
cf914a7d6   Rusty Russell   readahead: split ...
488
489
490
  void page_cache_sync_readahead(struct address_space *mapping,
  			       struct file_ra_state *ra, struct file *filp,
  			       pgoff_t offset, unsigned long req_size)
122a21d11   Fengguang Wu   readahead: on-dem...
491
492
493
  {
  	/* no read-ahead */
  	if (!ra->ra_pages)
cf914a7d6   Rusty Russell   readahead: split ...
494
  		return;
0141450f6   Wu Fengguang   readahead: introd...
495
  	/* be dumb */
70655c06b   Wu Fengguang   readahead: fix NU...
496
  	if (filp && (filp->f_mode & FMODE_RANDOM)) {
0141450f6   Wu Fengguang   readahead: introd...
497
498
499
  		force_page_cache_readahead(mapping, filp, offset, req_size);
  		return;
  	}
cf914a7d6   Rusty Russell   readahead: split ...
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
  	/* do read-ahead */
  	ondemand_readahead(mapping, ra, filp, false, offset, req_size);
  }
  EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
  
  /**
   * page_cache_async_readahead - file readahead for marked pages
   * @mapping: address_space which holds the pagecache and I/O vectors
   * @ra: file_ra_state which holds the readahead state
   * @filp: passed on to ->readpage() and ->readpages()
   * @page: the page at @offset which has the PG_readahead flag set
   * @offset: start offset into @mapping, in pagecache page-sized units
   * @req_size: hint: total size of the read which the caller is performing in
   *            pagecache pages
   *
bf8abe8b9   Huang Shijie   readahead.c: fix ...
515
   * page_cache_async_readahead() should be called when a page is used which
f7850d932   Randy Dunlap   mm/readahead: fix...
516
   * has the PG_readahead flag; this is a marker to suggest that the application
cf914a7d6   Rusty Russell   readahead: split ...
517
   * has used up enough of the readahead window that we should start pulling in
f7850d932   Randy Dunlap   mm/readahead: fix...
518
519
   * more pages.
   */
cf914a7d6   Rusty Russell   readahead: split ...
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
  void
  page_cache_async_readahead(struct address_space *mapping,
  			   struct file_ra_state *ra, struct file *filp,
  			   struct page *page, pgoff_t offset,
  			   unsigned long req_size)
  {
  	/* no read-ahead */
  	if (!ra->ra_pages)
  		return;
  
  	/*
  	 * Same bit is used for PG_readahead and PG_reclaim.
  	 */
  	if (PageWriteback(page))
  		return;
  
  	ClearPageReadahead(page);
  
  	/*
  	 * Defer asynchronous read-ahead on IO congestion.
  	 */
  	if (bdi_read_congested(mapping->backing_dev_info))
  		return;
122a21d11   Fengguang Wu   readahead: on-dem...
543
544
  
  	/* do read-ahead */
cf914a7d6   Rusty Russell   readahead: split ...
545
  	ondemand_readahead(mapping, ra, filp, true, offset, req_size);
122a21d11   Fengguang Wu   readahead: on-dem...
546
  }
cf914a7d6   Rusty Russell   readahead: split ...
547
  EXPORT_SYMBOL_GPL(page_cache_async_readahead);