Blame view

mm/fadvise.c 4.87 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
  /*
   * mm/fadvise.c
   *
   * Copyright (C) 2002, Linus Torvalds
   *
e1f8e8744   Francois Cami   Remove Andrew Mor...
7
   * 11Jan2003	Andrew Morton
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
9
10
11
12
13
14
15
16
17
18
   *		Initial version.
   */
  
  #include <linux/kernel.h>
  #include <linux/file.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
  #include <linux/pagemap.h>
  #include <linux/backing-dev.h>
  #include <linux/pagevec.h>
  #include <linux/fadvise.h>
ebcf28e1c   Andrew Morton   [PATCH] fadvise()...
19
  #include <linux/writeback.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/syscalls.h>
67d46b296   Mel Gorman   mm/fadvise.c: dra...
21
  #include <linux/swap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
23
24
25
26
27
28
  
  #include <asm/unistd.h>
  
  /*
   * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
   * deactivate the pages and clear PG_Referenced.
   */
4a0fd5bf0   Al Viro   teach SYSCALL_DEF...
29
  SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
  {
2903ff019   Al Viro   switch simple cas...
31
  	struct fd f = fdget(fd);
e748dcd09   Matthew Wilcox   vfs: remove get_x...
32
  	struct inode *inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33
34
  	struct address_space *mapping;
  	struct backing_dev_info *bdi;
ebcf28e1c   Andrew Morton   [PATCH] fadvise()...
35
  	loff_t endbyte;			/* inclusive */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
37
38
39
  	pgoff_t start_index;
  	pgoff_t end_index;
  	unsigned long nrpages;
  	int ret = 0;
2903ff019   Al Viro   switch simple cas...
40
  	if (!f.file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
41
  		return -EBADF;
e748dcd09   Matthew Wilcox   vfs: remove get_x...
42
43
  	inode = file_inode(f.file);
  	if (S_ISFIFO(inode->i_mode)) {
87ba81dba   Valentine Barshak   [PATCH] fadvise: ...
44
45
46
  		ret = -ESPIPE;
  		goto out;
  	}
2903ff019   Al Viro   switch simple cas...
47
  	mapping = f.file->f_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
49
50
51
  	if (!mapping || len < 0) {
  		ret = -EINVAL;
  		goto out;
  	}
3a77d2148   Shakeel Butt   mm: fadvise: avoi...
52
53
54
  	bdi = inode_to_bdi(mapping->host);
  
  	if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) {
b5beb1caf   Masatake YAMATO   check ADVICE of f...
55
56
57
58
59
60
61
62
63
64
65
66
  		switch (advice) {
  		case POSIX_FADV_NORMAL:
  		case POSIX_FADV_RANDOM:
  		case POSIX_FADV_SEQUENTIAL:
  		case POSIX_FADV_WILLNEED:
  		case POSIX_FADV_NOREUSE:
  		case POSIX_FADV_DONTNEED:
  			/* no bad return value, but ignore advice */
  			break;
  		default:
  			ret = -EINVAL;
  		}
fe77ba6f4   Carsten Otte   [PATCH] xip: madv...
67
  		goto out;
b5beb1caf   Masatake YAMATO   check ADVICE of f...
68
  	}
fe77ba6f4   Carsten Otte   [PATCH] xip: madv...
69

4570403f6   Andrey Ryabinin   mm/fadvise.c: fix...
70
71
72
73
74
75
  	/*
  	 * Careful about overflows. Len == 0 means "as much as possible".  Use
  	 * unsigned math because signed overflows are undefined and UBSan
  	 * complains.
  	 */
  	endbyte = (u64)offset + (u64)len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
76
77
  	if (!len || endbyte < len)
  		endbyte = -1;
ebcf28e1c   Andrew Morton   [PATCH] fadvise()...
78
79
  	else
  		endbyte--;		/* inclusive */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
81
82
  	switch (advice) {
  	case POSIX_FADV_NORMAL:
2903ff019   Al Viro   switch simple cas...
83
84
85
86
  		f.file->f_ra.ra_pages = bdi->ra_pages;
  		spin_lock(&f.file->f_lock);
  		f.file->f_mode &= ~FMODE_RANDOM;
  		spin_unlock(&f.file->f_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
  		break;
  	case POSIX_FADV_RANDOM:
2903ff019   Al Viro   switch simple cas...
89
90
91
  		spin_lock(&f.file->f_lock);
  		f.file->f_mode |= FMODE_RANDOM;
  		spin_unlock(&f.file->f_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
93
  		break;
  	case POSIX_FADV_SEQUENTIAL:
2903ff019   Al Viro   switch simple cas...
94
95
96
97
  		f.file->f_ra.ra_pages = bdi->ra_pages * 2;
  		spin_lock(&f.file->f_lock);
  		f.file->f_mode &= ~FMODE_RANDOM;
  		spin_unlock(&f.file->f_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98
99
  		break;
  	case POSIX_FADV_WILLNEED:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
100
  		/* First and last PARTIAL page! */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
101
102
  		start_index = offset >> PAGE_SHIFT;
  		end_index = endbyte >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
105
106
107
  
  		/* Careful about overflow on the "+1" */
  		nrpages = end_index - start_index + 1;
  		if (!nrpages)
  			nrpages = ~0UL;
3d3727cdb   KOSAKI Motohiro   mm, fadvise: don'...
108
109
110
111
112
  
  		/*
  		 * Ignore return value because fadvise() shall return
  		 * success even if filesystem can't retrieve a hint,
  		 */
2903ff019   Al Viro   switch simple cas...
113
  		force_page_cache_readahead(mapping, f.file, start_index,
3d3727cdb   KOSAKI Motohiro   mm, fadvise: don'...
114
  					   nrpages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
  		break;
60c371bc7   Andrew Morton   [PATCH] fadvise()...
116
117
  	case POSIX_FADV_NOREUSE:
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
  	case POSIX_FADV_DONTNEED:
703c27088   Tejun Heo   writeback: implem...
119
  		if (!inode_write_congested(mapping->host))
ad8a1b558   Shawn Bohrer   fadvise: only ini...
120
121
  			__filemap_fdatawrite_range(mapping, offset, endbyte,
  						   WB_SYNC_NONE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122

441c228f8   Mel Gorman   mm: fadvise: docu...
123
124
125
126
127
  		/*
  		 * First and last FULL page! Partial pages are deliberately
  		 * preserved on the expectation that it is better to preserve
  		 * needed memory than to discard unneeded memory.
  		 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
128
129
  		start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
  		end_index = (endbyte >> PAGE_SHIFT);
3b1d9626f   shidao.ytt   mm/fadvise: disca...
130
131
132
133
134
135
136
137
138
  		/*
  		 * The page at end_index will be inclusively discarded according
  		 * by invalidate_mapping_pages(), so subtracting 1 from
  		 * end_index means we will skip the last page.  But if endbyte
  		 * is page aligned or is at the end of file, we should not skip
  		 * that page - discarding the last page is safe enough.
  		 */
  		if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
  				endbyte != inode->i_size - 1) {
18aba41cb   Oleg Drokin   mm/fadvise.c: do ...
139
140
141
142
143
144
145
146
147
148
  			/* First page is tricky as 0 - 1 = -1, but pgoff_t
  			 * is unsigned, so the end_index >= start_index
  			 * check below would be true and we'll discard the whole
  			 * file cache which is not what was asked.
  			 */
  			if (end_index == 0)
  				break;
  
  			end_index--;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149

67d46b296   Mel Gorman   mm/fadvise.c: dra...
150
  		if (end_index >= start_index) {
4dd72b4a4   Johannes Weiner   mm: fadvise: avoi...
151
152
153
154
155
156
157
158
159
160
161
162
163
164
  			unsigned long count;
  
  			/*
  			 * It's common to FADV_DONTNEED right after
  			 * the read or write that instantiates the
  			 * pages, in which case there will be some
  			 * sitting on the local LRU cache. Try to
  			 * avoid the expensive remote drain and the
  			 * second cache tree walk below by flushing
  			 * them out right away.
  			 */
  			lru_add_drain();
  
  			count = invalidate_mapping_pages(mapping,
67d46b296   Mel Gorman   mm/fadvise.c: dra...
165
166
167
168
169
170
171
172
173
174
175
  						start_index, end_index);
  
  			/*
  			 * If fewer pages were invalidated than expected then
  			 * it is possible that some of the pages were on
  			 * a per-cpu pagevec for a remote CPU. Drain all
  			 * pagevecs and try again.
  			 */
  			if (count < (end_index - start_index + 1)) {
  				lru_add_drain_all();
  				invalidate_mapping_pages(mapping, start_index,
ebcf28e1c   Andrew Morton   [PATCH] fadvise()...
176
  						end_index);
67d46b296   Mel Gorman   mm/fadvise.c: dra...
177
178
  			}
  		}
ebcf28e1c   Andrew Morton   [PATCH] fadvise()...
179
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
180
181
182
183
  	default:
  		ret = -EINVAL;
  	}
  out:
2903ff019   Al Viro   switch simple cas...
184
  	fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
186
187
188
  	return ret;
  }
  
  #ifdef __ARCH_WANT_SYS_FADVISE64
4a0fd5bf0   Al Viro   teach SYSCALL_DEF...
189
  SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
191
192
193
194
  {
  	return sys_fadvise64_64(fd, offset, len, advice);
  }
  
  #endif