Blame view

mm/readahead.c 15.4 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
  /*
   * mm/readahead.c - address_space-level file readahead.
   *
   * Copyright (C) 2002, Linus Torvalds
   *
e1f8e8744   Francois Cami   Remove Andrew Mor...
6
   * 09Apr2002	Andrew Morton
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
7
8
9
10
11
12
13
14
15
   *		Initial version.
   */
  
  #include <linux/kernel.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/blkdev.h>
  #include <linux/backing-dev.h>
8bde37f08   Andrew Morton   [PATCH] io-accoun...
16
  #include <linux/task_io_accounting_ops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
  #include <linux/pagevec.h>
f5ff8422b   Jens Axboe   Fix warnings with...
18
  #include <linux/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
23
24
25
26
27
  /*
   * Initialise a struct file's readahead state.  Assumes that the caller has
   * memset *ra to zero.
   */
  void
  file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
  {
  	ra->ra_pages = mapping->backing_dev_info->ra_pages;
f4e6b498d   Fengguang Wu   readahead: combin...
28
  	ra->prev_pos = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  }
d41cc702c   Steven Whitehouse   [GFS2] Export fil...
30
  EXPORT_SYMBOL_GPL(file_ra_state_init);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
03fb3d2af   David Howells   FS-Cache: Release...
33
34
  /*
   * see if a page needs releasing upon read_cache_pages() failure
266cf658e   David Howells   FS-Cache: Recruit...
35
36
37
38
   * - the caller of read_cache_pages() may have set PG_private or PG_fscache
   *   before calling, such as the NFS fs marking pages that are cached locally
   *   on disk, thus we need to give the fs a chance to clean up in the event of
   *   an error
03fb3d2af   David Howells   FS-Cache: Release...
39
40
41
42
   */
  static void read_cache_pages_invalidate_page(struct address_space *mapping,
  					     struct page *page)
  {
266cf658e   David Howells   FS-Cache: Recruit...
43
  	if (page_has_private(page)) {
03fb3d2af   David Howells   FS-Cache: Release...
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
  		if (!trylock_page(page))
  			BUG();
  		page->mapping = mapping;
  		do_invalidatepage(page, 0);
  		page->mapping = NULL;
  		unlock_page(page);
  	}
  	page_cache_release(page);
  }
  
  /*
   * release a list of pages, invalidating them first if need be
   */
  static void read_cache_pages_invalidate_pages(struct address_space *mapping,
  					      struct list_head *pages)
  {
  	struct page *victim;
  
  	while (!list_empty(pages)) {
  		victim = list_to_page(pages);
  		list_del(&victim->lru);
  		read_cache_pages_invalidate_page(mapping, victim);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
68
  /**
bd40cddae   Randy Dunlap   [PATCH] kernel-do...
69
   * read_cache_pages - populate an address space with some pages & start reads against them
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70
71
72
73
74
75
76
77
78
79
80
81
   * @mapping: the address_space
   * @pages: The address of a list_head which contains the target pages.  These
   *   pages have their ->index populated and are otherwise uninitialised.
   * @filler: callback routine for filling a single page.
   * @data: private data for the callback routine.
   *
   * Hides the details of the LRU cache etc from the filesystems.
   */
  int read_cache_pages(struct address_space *mapping, struct list_head *pages,
  			int (*filler)(void *, struct page *), void *data)
  {
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
  	int ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83
84
85
  	while (!list_empty(pages)) {
  		page = list_to_page(pages);
  		list_del(&page->lru);
eb2be1893   Nick Piggin   mm: buffered writ...
86
87
  		if (add_to_page_cache_lru(page, mapping,
  					page->index, GFP_KERNEL)) {
03fb3d2af   David Howells   FS-Cache: Release...
88
  			read_cache_pages_invalidate_page(mapping, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
90
  			continue;
  		}
eb2be1893   Nick Piggin   mm: buffered writ...
91
  		page_cache_release(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
  		ret = filler(data, page);
eb2be1893   Nick Piggin   mm: buffered writ...
93
  		if (unlikely(ret)) {
03fb3d2af   David Howells   FS-Cache: Release...
94
  			read_cache_pages_invalidate_pages(mapping, pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95
96
  			break;
  		}
8bde37f08   Andrew Morton   [PATCH] io-accoun...
97
  		task_io_account_read(PAGE_CACHE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99
100
101
102
103
104
105
106
107
  	return ret;
  }
  
  EXPORT_SYMBOL(read_cache_pages);
  
  static int read_pages(struct address_space *mapping, struct file *filp,
  		struct list_head *pages, unsigned nr_pages)
  {
  	unsigned page_idx;
994fc28c7   Zach Brown   [PATCH] add AOP_T...
108
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
110
111
  
  	if (mapping->a_ops->readpages) {
  		ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
029e332ea   OGAWA Hirofumi   [PATCH] Cleanup r...
112
113
  		/* Clean up the remaining pages */
  		put_pages_list(pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
114
115
  		goto out;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
116
117
118
  	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
  		struct page *page = list_to_page(pages);
  		list_del(&page->lru);
eb2be1893   Nick Piggin   mm: buffered writ...
119
  		if (!add_to_page_cache_lru(page, mapping,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
120
  					page->index, GFP_KERNEL)) {
9f1a3cfcf   Zach Brown   [PATCH] AOP_TRUNC...
121
  			mapping->a_ops->readpage(filp, page);
eb2be1893   Nick Piggin   mm: buffered writ...
122
123
  		}
  		page_cache_release(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
  	}
994fc28c7   Zach Brown   [PATCH] add AOP_T...
125
  	ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
126
127
128
129
130
  out:
  	return ret;
  }
  
  /*
d30a11004   Wu Fengguang   readahead: record...
131
   * __do_page_cache_readahead() actually reads a chunk of disk.  It allocates all
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
133
134
135
136
   * the pages first, then submits them all for I/O. This avoids the very bad
   * behaviour which would occur if page allocations are causing VM writeback.
   * We really don't want to intermingle reads and writes like that.
   *
   * Returns the number of pages requested, or the maximum amount of I/O allowed.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
138
139
   */
  static int
  __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
46fc3e7b4   Fengguang Wu   readahead: add lo...
140
141
  			pgoff_t offset, unsigned long nr_to_read,
  			unsigned long lookahead_size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142
143
144
145
146
147
148
149
150
151
152
  {
  	struct inode *inode = mapping->host;
  	struct page *page;
  	unsigned long end_index;	/* The last page we want to read */
  	LIST_HEAD(page_pool);
  	int page_idx;
  	int ret = 0;
  	loff_t isize = i_size_read(inode);
  
  	if (isize == 0)
  		goto out;
46fc3e7b4   Fengguang Wu   readahead: add lo...
153
  	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
155
156
157
  
  	/*
  	 * Preallocate as many pages as we will need.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
  	for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
7361f4d8c   Andrew Morton   [PATCH] readahead...
159
  		pgoff_t page_offset = offset + page_idx;
c743d96b6   Fengguang Wu   readahead: remove...
160

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
161
162
  		if (page_offset > end_index)
  			break;
001281881   Nick Piggin   mm: use lockless ...
163
  		rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
  		page = radix_tree_lookup(&mapping->page_tree, page_offset);
001281881   Nick Piggin   mm: use lockless ...
165
  		rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
167
  		if (page)
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
  		page = page_cache_alloc_cold(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
169
170
171
172
  		if (!page)
  			break;
  		page->index = page_offset;
  		list_add(&page->lru, &page_pool);
46fc3e7b4   Fengguang Wu   readahead: add lo...
173
174
  		if (page_idx == nr_to_read - lookahead_size)
  			SetPageReadahead(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
  		ret++;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
  
  	/*
  	 * Now start the IO.  We ignore I/O errors - if the page is not
  	 * uptodate then the caller will launch readpage again, and
  	 * will then handle the error.
  	 */
  	if (ret)
  		read_pages(mapping, filp, &page_pool, ret);
  	BUG_ON(!list_empty(&page_pool));
  out:
  	return ret;
  }
  
  /*
   * Chunk the readahead into 2 megabyte units, so that we don't pin too much
   * memory at once.
   */
  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
7361f4d8c   Andrew Morton   [PATCH] readahead...
195
  		pgoff_t offset, unsigned long nr_to_read)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
196
197
198
199
200
  {
  	int ret = 0;
  
  	if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
  		return -EINVAL;
f7e839dd3   Wu Fengguang   readahead: move m...
201
  	nr_to_read = max_sane_readahead(nr_to_read);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
204
205
206
207
208
209
  	while (nr_to_read) {
  		int err;
  
  		unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
  
  		if (this_chunk > nr_to_read)
  			this_chunk = nr_to_read;
  		err = __do_page_cache_readahead(mapping, filp,
46fc3e7b4   Fengguang Wu   readahead: add lo...
210
  						offset, this_chunk, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
212
213
214
215
216
217
218
219
220
221
222
  		if (err < 0) {
  			ret = err;
  			break;
  		}
  		ret += err;
  		offset += this_chunk;
  		nr_to_read -= this_chunk;
  	}
  	return ret;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
224
225
226
227
   * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a
   * sensible upper limit.
   */
  unsigned long max_sane_readahead(unsigned long nr)
  {
4f98a2fee   Rik van Riel   vmscan: split LRU...
228
  	return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
05a0416be   Christoph Lameter   [PATCH] Drop __ge...
229
  		+ node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230
  }
5ce1110b9   Fengguang Wu   readahead: data s...
231
232
233
234
  
  /*
   * Submit IO for the read-ahead request in file_ra_state.
   */
d30a11004   Wu Fengguang   readahead: record...
235
  unsigned long ra_submit(struct file_ra_state *ra,
5ce1110b9   Fengguang Wu   readahead: data s...
236
237
  		       struct address_space *mapping, struct file *filp)
  {
5ce1110b9   Fengguang Wu   readahead: data s...
238
  	int actual;
5ce1110b9   Fengguang Wu   readahead: data s...
239
  	actual = __do_page_cache_readahead(mapping, filp,
f9acc8c7b   Fengguang Wu   readahead: sanify...
240
  					ra->start, ra->size, ra->async_size);
5ce1110b9   Fengguang Wu   readahead: data s...
241
242
243
  
  	return actual;
  }
122a21d11   Fengguang Wu   readahead: on-dem...
244
245
  
  /*
c743d96b6   Fengguang Wu   readahead: remove...
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
   * Set the initial window size, round to next power of 2 and square
   * for small size, x 4 for medium, and x 2 for large
   * for 128k (32 page) max ra
   * 1-8 page = 32k initial, > 8 page = 128k initial
   */
  static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
  {
  	unsigned long newsize = roundup_pow_of_two(size);
  
  	if (newsize <= max / 32)
  		newsize = newsize * 4;
  	else if (newsize <= max / 4)
  		newsize = newsize * 2;
  	else
  		newsize = max;
  
  	return newsize;
  }
  
  /*
122a21d11   Fengguang Wu   readahead: on-dem...
266
267
268
   *  Get the previous window size, ramp it up, and
   *  return it as the new window size.
   */
c743d96b6   Fengguang Wu   readahead: remove...
269
  static unsigned long get_next_ra_size(struct file_ra_state *ra,
122a21d11   Fengguang Wu   readahead: on-dem...
270
271
  						unsigned long max)
  {
f9acc8c7b   Fengguang Wu   readahead: sanify...
272
  	unsigned long cur = ra->size;
122a21d11   Fengguang Wu   readahead: on-dem...
273
274
275
  	unsigned long newsize;
  
  	if (cur < max / 16)
c743d96b6   Fengguang Wu   readahead: remove...
276
  		newsize = 4 * cur;
122a21d11   Fengguang Wu   readahead: on-dem...
277
  	else
c743d96b6   Fengguang Wu   readahead: remove...
278
  		newsize = 2 * cur;
122a21d11   Fengguang Wu   readahead: on-dem...
279
280
281
282
283
284
285
286
287
288
  
  	return min(newsize, max);
  }
  
  /*
   * On-demand readahead design.
   *
   * The fields in struct file_ra_state represent the most-recently-executed
   * readahead attempt:
   *
f9acc8c7b   Fengguang Wu   readahead: sanify...
289
290
291
292
   *                        |<----- async_size ---------|
   *     |------------------- size -------------------->|
   *     |==================#===========================|
   *     ^start             ^page marked with PG_readahead
122a21d11   Fengguang Wu   readahead: on-dem...
293
294
295
296
   *
   * To overlap application thinking time and disk I/O time, we do
   * `readahead pipelining': Do not wait until the application consumed all
   * readahead pages and stalled on the missing page at readahead_index;
f9acc8c7b   Fengguang Wu   readahead: sanify...
297
298
299
   * Instead, submit an asynchronous readahead I/O as soon as there are
   * only async_size pages left in the readahead window. Normally async_size
   * will be equal to size, for maximum pipelining.
122a21d11   Fengguang Wu   readahead: on-dem...
300
301
302
   *
   * In interleaved sequential reads, concurrent streams on the same fd can
   * be invalidating each other's readahead state. So we flag the new readahead
f9acc8c7b   Fengguang Wu   readahead: sanify...
303
   * page at (start+size-async_size) with PG_readahead, and use it as readahead
122a21d11   Fengguang Wu   readahead: on-dem...
304
305
306
   * indicator. The flag won't be set on already cached pages, to avoid the
   * readahead-for-nothing fuss, saving pointless page cache lookups.
   *
f4e6b498d   Fengguang Wu   readahead: combin...
307
   * prev_pos tracks the last visited byte in the _previous_ read request.
122a21d11   Fengguang Wu   readahead: on-dem...
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
   * It should be maintained by the caller, and will be used for detecting
   * small random reads. Note that the readahead algorithm checks loosely
   * for sequential patterns. Hence interleaved reads might be served as
   * sequential ones.
   *
   * There is a special-case: if the first page which the application tries to
   * read happens to be the first page of the file, it is assumed that a linear
   * read is about to happen and the window is immediately set to the initial size
   * based on I/O request size and the max_readahead.
   *
   * The code ramps up the readahead size aggressively at first, but slow down as
   * it approaches max_readhead.
   */
  
  /*
10be0b372   Wu Fengguang   readahead: introd...
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
   * Count contiguously cached pages from @offset-1 to @offset-@max,
   * this count is a conservative estimation of
   * 	- length of the sequential read sequence, or
   * 	- thrashing threshold in memory tight systems
   */
  static pgoff_t count_history_pages(struct address_space *mapping,
  				   struct file_ra_state *ra,
  				   pgoff_t offset, unsigned long max)
  {
  	pgoff_t head;
  
  	rcu_read_lock();
  	head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max);
  	rcu_read_unlock();
  
  	return offset - 1 - head;
  }
  
  /*
   * page cache context based read-ahead
   */
  static int try_context_readahead(struct address_space *mapping,
  				 struct file_ra_state *ra,
  				 pgoff_t offset,
  				 unsigned long req_size,
  				 unsigned long max)
  {
  	pgoff_t size;
  
  	size = count_history_pages(mapping, ra, offset, max);
  
  	/*
  	 * no history pages:
  	 * it could be a random read
  	 */
  	if (!size)
  		return 0;
  
  	/*
  	 * starts from beginning of file:
  	 * it is a strong indication of long-run stream (or whole-file-read)
  	 */
  	if (size >= offset)
  		size *= 2;
  
  	ra->start = offset;
  	ra->size = get_init_ra_size(size + req_size, max);
  	ra->async_size = ra->size;
  
  	return 1;
  }
  
  /*
122a21d11   Fengguang Wu   readahead: on-dem...
376
377
378
379
380
   * A minimal readahead algorithm for trivial sequential/random reads.
   */
  static unsigned long
  ondemand_readahead(struct address_space *mapping,
  		   struct file_ra_state *ra, struct file *filp,
cf914a7d6   Rusty Russell   readahead: split ...
381
  		   bool hit_readahead_marker, pgoff_t offset,
122a21d11   Fengguang Wu   readahead: on-dem...
382
383
  		   unsigned long req_size)
  {
fc31d16ad   Wu Fengguang   readahead: apply ...
384
  	unsigned long max = max_sane_readahead(ra->ra_pages);
045a2529a   Wu Fengguang   readahead: move t...
385
386
387
388
389
390
  
  	/*
  	 * start of file
  	 */
  	if (!offset)
  		goto initial_readahead;
122a21d11   Fengguang Wu   readahead: on-dem...
391
392
  
  	/*
f9acc8c7b   Fengguang Wu   readahead: sanify...
393
  	 * It's the expected callback offset, assume sequential access.
122a21d11   Fengguang Wu   readahead: on-dem...
394
395
  	 * Ramp up sizes, and push forward the readahead window.
  	 */
045a2529a   Wu Fengguang   readahead: move t...
396
397
  	if ((offset == (ra->start + ra->size - ra->async_size) ||
  	     offset == (ra->start + ra->size))) {
f9acc8c7b   Fengguang Wu   readahead: sanify...
398
399
400
401
  		ra->start += ra->size;
  		ra->size = get_next_ra_size(ra, max);
  		ra->async_size = ra->size;
  		goto readit;
122a21d11   Fengguang Wu   readahead: on-dem...
402
  	}
122a21d11   Fengguang Wu   readahead: on-dem...
403
  	/*
6b10c6c9f   Fengguang Wu   readahead: basic ...
404
405
406
407
408
409
410
  	 * Hit a marked page without valid readahead state.
  	 * E.g. interleaved reads.
  	 * Query the pagecache for async_size, which normally equals to
  	 * readahead size. Ramp it up and use it as the new readahead size.
  	 */
  	if (hit_readahead_marker) {
  		pgoff_t start;
30002ed2e   Nick Piggin   mm: readahead sca...
411
  		rcu_read_lock();
caca7cb74   Wu Fengguang   readahead: remove...
412
  		start = radix_tree_next_hole(&mapping->page_tree, offset+1,max);
30002ed2e   Nick Piggin   mm: readahead sca...
413
  		rcu_read_unlock();
6b10c6c9f   Fengguang Wu   readahead: basic ...
414
415
416
417
418
419
  
  		if (!start || start - offset > max)
  			return 0;
  
  		ra->start = start;
  		ra->size = start - offset;	/* old async_size */
160334a0c   Wu Fengguang   readahead: increa...
420
  		ra->size += req_size;
6b10c6c9f   Fengguang Wu   readahead: basic ...
421
422
423
424
425
426
  		ra->size = get_next_ra_size(ra, max);
  		ra->async_size = ra->size;
  		goto readit;
  	}
  
  	/*
045a2529a   Wu Fengguang   readahead: move t...
427
  	 * oversize read
122a21d11   Fengguang Wu   readahead: on-dem...
428
  	 */
045a2529a   Wu Fengguang   readahead: move t...
429
430
431
432
433
434
435
436
437
438
  	if (req_size > max)
  		goto initial_readahead;
  
  	/*
  	 * sequential cache miss
  	 */
  	if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
  		goto initial_readahead;
  
  	/*
10be0b372   Wu Fengguang   readahead: introd...
439
440
441
442
443
444
445
  	 * Query the page cache and look for the traces(cached history pages)
  	 * that a sequential stream would leave behind.
  	 */
  	if (try_context_readahead(mapping, ra, offset, req_size, max))
  		goto readit;
  
  	/*
045a2529a   Wu Fengguang   readahead: move t...
446
447
448
449
450
451
  	 * standalone, small random read
  	 * Read as is, and do not pollute the readahead state.
  	 */
  	return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
  
  initial_readahead:
f9acc8c7b   Fengguang Wu   readahead: sanify...
452
453
454
  	ra->start = offset;
  	ra->size = get_init_ra_size(req_size, max);
  	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
122a21d11   Fengguang Wu   readahead: on-dem...
455

f9acc8c7b   Fengguang Wu   readahead: sanify...
456
  readit:
51daa88eb   Wu Fengguang   readahead: remove...
457
458
459
460
461
462
463
464
465
  	/*
  	 * Will this read hit the readahead marker made by itself?
  	 * If so, trigger the readahead marker hit now, and merge
  	 * the resulted next readahead window into the current one.
  	 */
  	if (offset == ra->start && ra->size == ra->async_size) {
  		ra->async_size = get_next_ra_size(ra, max);
  		ra->size += ra->async_size;
  	}
122a21d11   Fengguang Wu   readahead: on-dem...
466
467
468
469
  	return ra_submit(ra, mapping, filp);
  }
  
  /**
cf914a7d6   Rusty Russell   readahead: split ...
470
   * page_cache_sync_readahead - generic file readahead
122a21d11   Fengguang Wu   readahead: on-dem...
471
472
473
   * @mapping: address_space which holds the pagecache and I/O vectors
   * @ra: file_ra_state which holds the readahead state
   * @filp: passed on to ->readpage() and ->readpages()
cf914a7d6   Rusty Russell   readahead: split ...
474
   * @offset: start offset into @mapping, in pagecache page-sized units
122a21d11   Fengguang Wu   readahead: on-dem...
475
   * @req_size: hint: total size of the read which the caller is performing in
cf914a7d6   Rusty Russell   readahead: split ...
476
   *            pagecache pages
122a21d11   Fengguang Wu   readahead: on-dem...
477
   *
cf914a7d6   Rusty Russell   readahead: split ...
478
479
480
481
   * page_cache_sync_readahead() should be called when a cache miss happened:
   * it will submit the read.  The readahead logic may decide to piggyback more
   * pages onto the read request if access patterns suggest it will improve
   * performance.
122a21d11   Fengguang Wu   readahead: on-dem...
482
   */
cf914a7d6   Rusty Russell   readahead: split ...
483
484
485
  void page_cache_sync_readahead(struct address_space *mapping,
  			       struct file_ra_state *ra, struct file *filp,
  			       pgoff_t offset, unsigned long req_size)
122a21d11   Fengguang Wu   readahead: on-dem...
486
487
488
  {
  	/* no read-ahead */
  	if (!ra->ra_pages)
cf914a7d6   Rusty Russell   readahead: split ...
489
  		return;
0141450f6   Wu Fengguang   readahead: introd...
490
491
492
493
494
  	/* be dumb */
  	if (filp->f_mode & FMODE_RANDOM) {
  		force_page_cache_readahead(mapping, filp, offset, req_size);
  		return;
  	}
cf914a7d6   Rusty Russell   readahead: split ...
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
  	/* do read-ahead */
  	ondemand_readahead(mapping, ra, filp, false, offset, req_size);
  }
  EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
  
  /**
   * page_cache_async_readahead - file readahead for marked pages
   * @mapping: address_space which holds the pagecache and I/O vectors
   * @ra: file_ra_state which holds the readahead state
   * @filp: passed on to ->readpage() and ->readpages()
   * @page: the page at @offset which has the PG_readahead flag set
   * @offset: start offset into @mapping, in pagecache page-sized units
   * @req_size: hint: total size of the read which the caller is performing in
   *            pagecache pages
   *
   * page_cache_async_ondemand() should be called when a page is used which
f7850d932   Randy Dunlap   mm/readahead: fix...
511
   * has the PG_readahead flag; this is a marker to suggest that the application
cf914a7d6   Rusty Russell   readahead: split ...
512
   * has used up enough of the readahead window that we should start pulling in
f7850d932   Randy Dunlap   mm/readahead: fix...
513
514
   * more pages.
   */
cf914a7d6   Rusty Russell   readahead: split ...
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
  void
  page_cache_async_readahead(struct address_space *mapping,
  			   struct file_ra_state *ra, struct file *filp,
  			   struct page *page, pgoff_t offset,
  			   unsigned long req_size)
  {
  	/* no read-ahead */
  	if (!ra->ra_pages)
  		return;
  
  	/*
  	 * Same bit is used for PG_readahead and PG_reclaim.
  	 */
  	if (PageWriteback(page))
  		return;
  
  	ClearPageReadahead(page);
  
  	/*
  	 * Defer asynchronous read-ahead on IO congestion.
  	 */
  	if (bdi_read_congested(mapping->backing_dev_info))
  		return;
122a21d11   Fengguang Wu   readahead: on-dem...
538
539
  
  	/* do read-ahead */
cf914a7d6   Rusty Russell   readahead: split ...
540
  	ondemand_readahead(mapping, ra, filp, true, offset, req_size);
65a80b4c6   Hisashi Hifumi   readahead: add bl...
541
542
543
544
545
546
547
548
549
550
551
552
  
  #ifdef CONFIG_BLOCK
  	/*
  	 * Normally the current page is !uptodate and lock_page() will be
  	 * immediately called to implicitly unplug the device. However this
  	 * is not always true for RAID conifgurations, where data arrives
  	 * not strictly in their submission order. In this case we need to
  	 * explicitly kick off the IO.
  	 */
  	if (PageUptodate(page))
  		blk_run_backing_dev(mapping->backing_dev_info, NULL);
  #endif
122a21d11   Fengguang Wu   readahead: on-dem...
553
  }
cf914a7d6   Rusty Russell   readahead: split ...
554
  EXPORT_SYMBOL_GPL(page_cache_async_readahead);