Blame view

mm/shmem.c 106 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
  /*
   * Resizable virtual memory filesystem for Linux.
   *
   * Copyright (C) 2000 Linus Torvalds.
   *		 2000 Transmeta Corp.
   *		 2000-2001 Christoph Rohland
   *		 2000-2001 SAP AG
   *		 2002 Red Hat Inc.
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
9
10
   * Copyright (C) 2002-2011 Hugh Dickins.
   * Copyright (C) 2011 Google Inc.
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
11
   * Copyright (C) 2002-2005 VERITAS Software Corporation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
17
   * Copyright (C) 2004 Andi Kleen, SuSE Labs
   *
   * Extended attribute support for tmpfs:
   * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
   *
853ac43ab   Matt Mackall   shmem: unify regu...
18
19
20
   * tiny-shmem:
   * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
   * This file is released under the GPL.
   */
853ac43ab   Matt Mackall   shmem: unify regu...
23
24
25
26
  #include <linux/fs.h>
  #include <linux/init.h>
  #include <linux/vfs.h>
  #include <linux/mount.h>
250297edf   Andrew Morton   mm/shmem.c: remov...
27
  #include <linux/ramfs.h>
caefba174   Hugh Dickins   shmem: respect MA...
28
  #include <linux/pagemap.h>
853ac43ab   Matt Mackall   shmem: unify regu...
29
30
  #include <linux/file.h>
  #include <linux/mm.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
31
  #include <linux/export.h>
853ac43ab   Matt Mackall   shmem: unify regu...
32
  #include <linux/swap.h>
e2e40f2c1   Christoph Hellwig   fs: move struct k...
33
  #include <linux/uio.h>
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
34
  #include <linux/khugepaged.h>
853ac43ab   Matt Mackall   shmem: unify regu...
35
36
37
38
  
  static struct vfsmount *shm_mnt;
  
  #ifdef CONFIG_SHMEM
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
39
40
41
42
43
  /*
   * This virtual memory filesystem is heavily based on the ramfs. It
   * extends ramfs by the ability to use swap and honor resource limits
   * which makes it a completely usable filesystem.
   */
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
44
  #include <linux/xattr.h>
a56942551   Christoph Hellwig   knfsd: exportfs: ...
45
  #include <linux/exportfs.h>
1c7c474c3   Christoph Hellwig   make generic_acl ...
46
  #include <linux/posix_acl.h>
feda821e7   Christoph Hellwig   fs: remove generi...
47
  #include <linux/posix_acl_xattr.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
  #include <linux/mman.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
50
51
52
  #include <linux/string.h>
  #include <linux/slab.h>
  #include <linux/backing-dev.h>
  #include <linux/shmem_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
  #include <linux/writeback.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
  #include <linux/blkdev.h>
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
55
  #include <linux/pagevec.h>
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
56
  #include <linux/percpu_counter.h>
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
57
  #include <linux/falloc.h>
708e3508c   Hugh Dickins   tmpfs: clone shme...
58
  #include <linux/splice.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59
60
61
62
  #include <linux/security.h>
  #include <linux/swapops.h>
  #include <linux/mempolicy.h>
  #include <linux/namei.h>
b00dc3ad7   Hugh Dickins   [PATCH] tmpfs: fi...
63
  #include <linux/ctype.h>
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
64
  #include <linux/migrate.h>
c1f60a5a4   Christoph Lameter   [PATCH] reduce MA...
65
  #include <linux/highmem.h>
680d794ba   akpm@linux-foundation.org   mount options: fi...
66
  #include <linux/seq_file.h>
925629278   Mimi Zohar   integrity: specia...
67
  #include <linux/magic.h>
9183df25f   David Herrmann   shm: add memfd_cr...
68
  #include <linux/syscalls.h>
40e041a2c   David Herrmann   shm: add sealing API
69
  #include <linux/fcntl.h>
9183df25f   David Herrmann   shm: add memfd_cr...
70
  #include <uapi/linux/memfd.h>
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
71

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
72
  #include <asm/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
  #include <asm/pgtable.h>
dd56b0464   Mel Gorman   mm: page_alloc: h...
74
  #include "internal.h"
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
75
76
  #define BLOCKS_PER_PAGE  (PAGE_SIZE/512)
  #define VM_ACCT(size)    (PAGE_ALIGN(size) >> PAGE_SHIFT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
77

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
78
79
  /* Pretend that each entry is of this size in directory's i_size */
  #define BOGO_DIRENT_SIZE 20
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
80
81
  /* Symlink up to this size is kmalloc'ed instead of using a swappable page */
  #define SHORT_SYMLINK_LEN 128
1aac14003   Hugh Dickins   tmpfs: quit when ...
82
  /*
f00cdc6df   Hugh Dickins   shmem: fix faulti...
83
84
85
   * shmem_fallocate communicates with shmem_fault or shmem_writepage via
   * inode->i_private (with i_mutex making sure that it has only one user at
   * a time): we would prefer not to enlarge the shmem inode just for that.
1aac14003   Hugh Dickins   tmpfs: quit when ...
86
87
   */
  struct shmem_falloc {
8e205f779   Hugh Dickins   shmem: fix faulti...
88
  	wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
1aac14003   Hugh Dickins   tmpfs: quit when ...
89
90
91
92
93
  	pgoff_t start;		/* start of range currently being fallocated */
  	pgoff_t next;		/* the next page offset to be fallocated */
  	pgoff_t nr_falloced;	/* how many new pages have been fallocated */
  	pgoff_t nr_unswapped;	/* how often writepage refused to swap out */
  };
b76db7354   Andrew Morton   mount-options-fix...
94
  #ifdef CONFIG_TMPFS
680d794ba   akpm@linux-foundation.org   mount options: fi...
95
96
97
98
99
100
101
102
103
  static unsigned long shmem_default_max_blocks(void)
  {
  	return totalram_pages / 2;
  }
  
  static unsigned long shmem_default_max_inodes(void)
  {
  	return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
  }
b76db7354   Andrew Morton   mount-options-fix...
104
  #endif
680d794ba   akpm@linux-foundation.org   mount options: fi...
105

bde05d1cc   Hugh Dickins   shmem: replace pa...
106
107
108
  static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
  static int shmem_replace_page(struct page **pagep, gfp_t gfp,
  				struct shmem_inode_info *info, pgoff_t index);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
109
  static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
110
111
  		struct page **pagep, enum sgp_type sgp,
  		gfp_t gfp, struct mm_struct *fault_mm, int *fault_type);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
112

f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
113
  int shmem_getpage(struct inode *inode, pgoff_t index,
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
114
  		struct page **pagep, enum sgp_type sgp)
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
115
116
  {
  	return shmem_getpage_gfp(inode, index, pagep, sgp,
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
117
  		mapping_gfp_mask(inode->i_mapping), NULL, NULL);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
118
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
120
121
122
123
124
125
126
127
128
129
130
131
132
  static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
  {
  	return sb->s_fs_info;
  }
  
  /*
   * shmem_file_setup pre-accounts the whole fixed size of a VM object,
   * for shared memory and for shared anonymous (/dev/zero) mappings
   * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
   * consistent with the pre-accounting of private mappings ...
   */
  static inline int shmem_acct_size(unsigned long flags, loff_t size)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
133
  	return (flags & VM_NORESERVE) ?
191c54244   Al Viro   mm: collapse secu...
134
  		0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135
136
137
138
  }
  
  static inline void shmem_unacct_size(unsigned long flags, loff_t size)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
139
  	if (!(flags & VM_NORESERVE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
141
  		vm_unacct_memory(VM_ACCT(size));
  }
771425179   Konstantin Khlebnikov   shmem: update mem...
142
143
144
145
146
147
148
149
150
151
152
153
  static inline int shmem_reacct_size(unsigned long flags,
  		loff_t oldsize, loff_t newsize)
  {
  	if (!(flags & VM_NORESERVE)) {
  		if (VM_ACCT(newsize) > VM_ACCT(oldsize))
  			return security_vm_enough_memory_mm(current->mm,
  					VM_ACCT(newsize) - VM_ACCT(oldsize));
  		else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
  			vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
  	}
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
155
  /*
   * ... whereas tmpfs objects are accounted incrementally as
75edd345e   Hugh Dickins   tmpfs: preliminar...
156
   * pages are allocated, in order to allow large sparse files.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
157
158
159
   * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
   * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
   */
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
160
  static inline int shmem_acct_block(unsigned long flags, long pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
161
  {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
162
163
164
165
166
  	if (!(flags & VM_NORESERVE))
  		return 0;
  
  	return security_vm_enough_memory_mm(current->mm,
  			pages * VM_ACCT(PAGE_SIZE));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
167
168
169
170
  }
  
  static inline void shmem_unacct_blocks(unsigned long flags, long pages)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
171
  	if (flags & VM_NORESERVE)
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
172
  		vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
  }
759b9775c   Hugh Dickins   [PATCH] shmem and...
174
  static const struct super_operations shmem_ops;
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
175
  static const struct address_space_operations shmem_aops;
15ad7cdcf   Helge Deller   [PATCH] struct se...
176
  static const struct file_operations shmem_file_operations;
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
177
178
179
  static const struct inode_operations shmem_inode_operations;
  static const struct inode_operations shmem_dir_inode_operations;
  static const struct inode_operations shmem_special_inode_operations;
f0f37e2f7   Alexey Dobriyan   const: mark struc...
180
  static const struct vm_operations_struct shmem_vm_ops;
779750d20   Kirill A. Shutemov   shmem: split huge...
181
  static struct file_system_type shmem_fs_type;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
  static LIST_HEAD(shmem_swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
184
  static DEFINE_MUTEX(shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185

5b04c6890   Pavel Emelyanov   shmem: factor out...
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
  static int shmem_reserve_inode(struct super_block *sb)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	if (sbinfo->max_inodes) {
  		spin_lock(&sbinfo->stat_lock);
  		if (!sbinfo->free_inodes) {
  			spin_unlock(&sbinfo->stat_lock);
  			return -ENOSPC;
  		}
  		sbinfo->free_inodes--;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	return 0;
  }
  
  static void shmem_free_inode(struct super_block *sb)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	if (sbinfo->max_inodes) {
  		spin_lock(&sbinfo->stat_lock);
  		sbinfo->free_inodes++;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  }
467118102   Randy Dunlap   mm/shmem and tiny...
210
  /**
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
211
   * shmem_recalc_inode - recalculate the block usage of an inode
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
   * @inode: inode to recalc
   *
   * We have to calculate the free blocks since the mm can drop
   * undirtied hole pages behind our back.
   *
   * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
   * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
   *
   * It has to be called with the spinlock held.
   */
  static void shmem_recalc_inode(struct inode *inode)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	long freed;
  
  	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
  	if (freed > 0) {
54af60421   Hugh Dickins   tmpfs: convert sh...
229
230
231
  		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  		if (sbinfo->max_blocks)
  			percpu_counter_add(&sbinfo->used_blocks, -freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
  		info->alloced -= freed;
54af60421   Hugh Dickins   tmpfs: convert sh...
233
  		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
  		shmem_unacct_blocks(info->flags, freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235
236
  	}
  }
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
237
238
239
240
  bool shmem_charge(struct inode *inode, long pages)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
241
  	unsigned long flags;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
242
243
244
  
  	if (shmem_acct_block(info->flags, pages))
  		return false;
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
245
  	spin_lock_irqsave(&info->lock, flags);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
246
247
248
  	info->alloced += pages;
  	inode->i_blocks += pages * BLOCKS_PER_PAGE;
  	shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
249
  	spin_unlock_irqrestore(&info->lock, flags);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
250
251
252
253
254
255
256
  	inode->i_mapping->nrpages += pages;
  
  	if (!sbinfo->max_blocks)
  		return true;
  	if (percpu_counter_compare(&sbinfo->used_blocks,
  				sbinfo->max_blocks - pages) > 0) {
  		inode->i_mapping->nrpages -= pages;
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
257
  		spin_lock_irqsave(&info->lock, flags);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
258
259
  		info->alloced -= pages;
  		shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
260
  		spin_unlock_irqrestore(&info->lock, flags);
71664665c   Hugh Dickins   huge tmpfs: fix C...
261
  		shmem_unacct_blocks(info->flags, pages);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
262
263
264
265
266
267
268
269
270
271
  		return false;
  	}
  	percpu_counter_add(&sbinfo->used_blocks, pages);
  	return true;
  }
  
  void shmem_uncharge(struct inode *inode, long pages)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
272
  	unsigned long flags;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
273

4595ef88d   Kirill A. Shutemov   shmem: make shmem...
274
  	spin_lock_irqsave(&info->lock, flags);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
275
276
277
  	info->alloced -= pages;
  	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
  	shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
278
  	spin_unlock_irqrestore(&info->lock, flags);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
279
280
281
  
  	if (sbinfo->max_blocks)
  		percpu_counter_sub(&sbinfo->used_blocks, pages);
71664665c   Hugh Dickins   huge tmpfs: fix C...
282
  	shmem_unacct_blocks(info->flags, pages);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
283
  }
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
284
285
286
287
288
289
290
  /*
   * Replace item expected in radix tree by a new item, while holding tree lock.
   */
  static int shmem_radix_tree_replace(struct address_space *mapping,
  			pgoff_t index, void *expected, void *replacement)
  {
  	void **pslot;
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
291
  	void *item;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
292
293
  
  	VM_BUG_ON(!expected);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
294
  	VM_BUG_ON(!replacement);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
295
  	pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
296
297
298
  	if (!pslot)
  		return -ENOENT;
  	item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
299
300
  	if (item != expected)
  		return -ENOENT;
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
301
  	radix_tree_replace_slot(pslot, replacement);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
302
303
304
305
  	return 0;
  }
  
  /*
d18992286   Hugh Dickins   shmem: fix negati...
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
   * Sometimes, before we decide whether to proceed or to fail, we must check
   * that an entry was not already brought back from swap by a racing thread.
   *
   * Checking page is not enough: by the time a SwapCache page is locked, it
   * might be reused, and again be SwapCache, using the same swap as before.
   */
  static bool shmem_confirm_swap(struct address_space *mapping,
  			       pgoff_t index, swp_entry_t swap)
  {
  	void *item;
  
  	rcu_read_lock();
  	item = radix_tree_lookup(&mapping->page_tree, index);
  	rcu_read_unlock();
  	return item == swp_to_radix_entry(swap);
  }
  
  /*
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
   * Definitions for "huge tmpfs": tmpfs mounted with the huge= option
   *
   * SHMEM_HUGE_NEVER:
   *	disables huge pages for the mount;
   * SHMEM_HUGE_ALWAYS:
   *	enables huge pages for the mount;
   * SHMEM_HUGE_WITHIN_SIZE:
   *	only allocate huge pages if the page will be fully within i_size,
   *	also respect fadvise()/madvise() hints;
   * SHMEM_HUGE_ADVISE:
   *	only allocate huge pages if requested with fadvise()/madvise();
   */
  
  #define SHMEM_HUGE_NEVER	0
  #define SHMEM_HUGE_ALWAYS	1
  #define SHMEM_HUGE_WITHIN_SIZE	2
  #define SHMEM_HUGE_ADVISE	3
  
  /*
   * Special values.
   * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled:
   *
   * SHMEM_HUGE_DENY:
   *	disables huge on shm_mnt and all mounts, for emergency use;
   * SHMEM_HUGE_FORCE:
   *	enables huge on shm_mnt and all mounts, w/o needing option, for testing;
   *
   */
  #define SHMEM_HUGE_DENY		(-1)
  #define SHMEM_HUGE_FORCE	(-2)
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
354
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
  /* ifdef here to avoid bloating shmem.o when not necessary */
  
  int shmem_huge __read_mostly;
  
  static int shmem_parse_huge(const char *str)
  {
  	if (!strcmp(str, "never"))
  		return SHMEM_HUGE_NEVER;
  	if (!strcmp(str, "always"))
  		return SHMEM_HUGE_ALWAYS;
  	if (!strcmp(str, "within_size"))
  		return SHMEM_HUGE_WITHIN_SIZE;
  	if (!strcmp(str, "advise"))
  		return SHMEM_HUGE_ADVISE;
  	if (!strcmp(str, "deny"))
  		return SHMEM_HUGE_DENY;
  	if (!strcmp(str, "force"))
  		return SHMEM_HUGE_FORCE;
  	return -EINVAL;
  }
  
  static const char *shmem_format_huge(int huge)
  {
  	switch (huge) {
  	case SHMEM_HUGE_NEVER:
  		return "never";
  	case SHMEM_HUGE_ALWAYS:
  		return "always";
  	case SHMEM_HUGE_WITHIN_SIZE:
  		return "within_size";
  	case SHMEM_HUGE_ADVISE:
  		return "advise";
  	case SHMEM_HUGE_DENY:
  		return "deny";
  	case SHMEM_HUGE_FORCE:
  		return "force";
  	default:
  		VM_BUG_ON(1);
  		return "bad_val";
  	}
  }
779750d20   Kirill A. Shutemov   shmem: split huge...
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
  static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
  		struct shrink_control *sc, unsigned long nr_to_split)
  {
  	LIST_HEAD(list), *pos, *next;
  	struct inode *inode;
  	struct shmem_inode_info *info;
  	struct page *page;
  	unsigned long batch = sc ? sc->nr_to_scan : 128;
  	int removed = 0, split = 0;
  
  	if (list_empty(&sbinfo->shrinklist))
  		return SHRINK_STOP;
  
  	spin_lock(&sbinfo->shrinklist_lock);
  	list_for_each_safe(pos, next, &sbinfo->shrinklist) {
  		info = list_entry(pos, struct shmem_inode_info, shrinklist);
  
  		/* pin the inode */
  		inode = igrab(&info->vfs_inode);
  
  		/* inode is about to be evicted */
  		if (!inode) {
  			list_del_init(&info->shrinklist);
  			removed++;
  			goto next;
  		}
  
  		/* Check if there's anything to gain */
  		if (round_up(inode->i_size, PAGE_SIZE) ==
  				round_up(inode->i_size, HPAGE_PMD_SIZE)) {
  			list_del_init(&info->shrinklist);
  			removed++;
  			iput(inode);
  			goto next;
  		}
  
  		list_move(&info->shrinklist, &list);
  next:
  		if (!--batch)
  			break;
  	}
  	spin_unlock(&sbinfo->shrinklist_lock);
  
  	list_for_each_safe(pos, next, &list) {
  		int ret;
  
  		info = list_entry(pos, struct shmem_inode_info, shrinklist);
  		inode = &info->vfs_inode;
  
  		if (nr_to_split && split >= nr_to_split) {
  			iput(inode);
  			continue;
  		}
  
  		page = find_lock_page(inode->i_mapping,
  				(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
  		if (!page)
  			goto drop;
  
  		if (!PageTransHuge(page)) {
  			unlock_page(page);
  			put_page(page);
  			goto drop;
  		}
  
  		ret = split_huge_page(page);
  		unlock_page(page);
  		put_page(page);
  
  		if (ret) {
  			/* split failed: leave it on the list */
  			iput(inode);
  			continue;
  		}
  
  		split++;
  drop:
  		list_del_init(&info->shrinklist);
  		removed++;
  		iput(inode);
  	}
  
  	spin_lock(&sbinfo->shrinklist_lock);
  	list_splice_tail(&list, &sbinfo->shrinklist);
  	sbinfo->shrinklist_len -= removed;
  	spin_unlock(&sbinfo->shrinklist_lock);
  
  	return split;
  }
  
  static long shmem_unused_huge_scan(struct super_block *sb,
  		struct shrink_control *sc)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  
  	if (!READ_ONCE(sbinfo->shrinklist_len))
  		return SHRINK_STOP;
  
  	return shmem_unused_huge_shrink(sbinfo, sc, 0);
  }
  
  static long shmem_unused_huge_count(struct super_block *sb,
  		struct shrink_control *sc)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	return READ_ONCE(sbinfo->shrinklist_len);
  }
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
503
  #else /* !CONFIG_TRANSPARENT_HUGE_PAGECACHE */
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
504
505
  
  #define shmem_huge SHMEM_HUGE_DENY
779750d20   Kirill A. Shutemov   shmem: split huge...
506
507
508
509
510
  static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
  		struct shrink_control *sc, unsigned long nr_to_split)
  {
  	return 0;
  }
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
511
  #endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
512
513
  
  /*
46f65ec15   Hugh Dickins   tmpfs: convert sh...
514
515
516
517
   * Like add_to_page_cache_locked, but error if expected item has gone.
   */
  static int shmem_add_to_page_cache(struct page *page,
  				   struct address_space *mapping,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
518
  				   pgoff_t index, void *expected)
46f65ec15   Hugh Dickins   tmpfs: convert sh...
519
  {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
520
  	int error, nr = hpage_nr_pages(page);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
521

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
522
523
  	VM_BUG_ON_PAGE(PageTail(page), page);
  	VM_BUG_ON_PAGE(index != round_down(index, nr), page);
309381fea   Sasha Levin   mm: dump page whe...
524
525
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
526
  	VM_BUG_ON(expected && PageTransHuge(page));
46f65ec15   Hugh Dickins   tmpfs: convert sh...
527

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
528
  	page_ref_add(page, nr);
b065b4321   Hugh Dickins   shmem: cleanup sh...
529
530
531
532
  	page->mapping = mapping;
  	page->index = index;
  
  	spin_lock_irq(&mapping->tree_lock);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
  	if (PageTransHuge(page)) {
  		void __rcu **results;
  		pgoff_t idx;
  		int i;
  
  		error = 0;
  		if (radix_tree_gang_lookup_slot(&mapping->page_tree,
  					&results, &idx, index, 1) &&
  				idx < index + HPAGE_PMD_NR) {
  			error = -EEXIST;
  		}
  
  		if (!error) {
  			for (i = 0; i < HPAGE_PMD_NR; i++) {
  				error = radix_tree_insert(&mapping->page_tree,
  						index + i, page + i);
  				VM_BUG_ON(error);
  			}
  			count_vm_event(THP_FILE_ALLOC);
  		}
  	} else if (!expected) {
b065b4321   Hugh Dickins   shmem: cleanup sh...
554
  		error = radix_tree_insert(&mapping->page_tree, index, page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
555
  	} else {
b065b4321   Hugh Dickins   shmem: cleanup sh...
556
557
  		error = shmem_radix_tree_replace(mapping, index, expected,
  								 page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
558
  	}
46f65ec15   Hugh Dickins   tmpfs: convert sh...
559
  	if (!error) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
560
561
  		mapping->nrpages += nr;
  		if (PageTransHuge(page))
11fb99898   Mel Gorman   mm: move most fil...
562
563
564
  			__inc_node_page_state(page, NR_SHMEM_THPS);
  		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
  		__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
b065b4321   Hugh Dickins   shmem: cleanup sh...
565
566
567
568
  		spin_unlock_irq(&mapping->tree_lock);
  	} else {
  		page->mapping = NULL;
  		spin_unlock_irq(&mapping->tree_lock);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
569
  		page_ref_sub(page, nr);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
570
  	}
46f65ec15   Hugh Dickins   tmpfs: convert sh...
571
572
573
574
  	return error;
  }
  
  /*
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
575
576
577
578
579
580
   * Like delete_from_page_cache, but substitutes swap for page.
   */
  static void shmem_delete_from_page_cache(struct page *page, void *radswap)
  {
  	struct address_space *mapping = page->mapping;
  	int error;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
581
  	VM_BUG_ON_PAGE(PageCompound(page), page);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
582
583
584
585
  	spin_lock_irq(&mapping->tree_lock);
  	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
  	page->mapping = NULL;
  	mapping->nrpages--;
11fb99898   Mel Gorman   mm: move most fil...
586
587
  	__dec_node_page_state(page, NR_FILE_PAGES);
  	__dec_node_page_state(page, NR_SHMEM);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
588
  	spin_unlock_irq(&mapping->tree_lock);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
589
  	put_page(page);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
590
591
592
593
  	BUG_ON(error);
  }
  
  /*
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
594
595
596
597
598
   * Remove swap entry from radix tree, free the swap and its page cache.
   */
  static int shmem_free_swap(struct address_space *mapping,
  			   pgoff_t index, void *radswap)
  {
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
599
  	void *old;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
600
601
  
  	spin_lock_irq(&mapping->tree_lock);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
602
  	old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
603
  	spin_unlock_irq(&mapping->tree_lock);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
604
605
606
607
  	if (old != radswap)
  		return -ENOENT;
  	free_swap_and_cache(radix_to_swp_entry(radswap));
  	return 0;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
608
609
610
  }
  
  /*
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
611
   * Determine (in bytes) how many of the shmem object's pages mapped by the
48131e03c   Vlastimil Babka   mm, proc: reduce ...
612
   * given offsets are swapped out.
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
613
614
615
616
   *
   * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
   * as long as the inode doesn't go away and racy results are not a problem.
   */
48131e03c   Vlastimil Babka   mm, proc: reduce ...
617
618
  unsigned long shmem_partial_swap_usage(struct address_space *mapping,
  						pgoff_t start, pgoff_t end)
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
619
  {
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
620
621
622
  	struct radix_tree_iter iter;
  	void **slot;
  	struct page *page;
48131e03c   Vlastimil Babka   mm, proc: reduce ...
623
  	unsigned long swapped = 0;
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
624
625
  
  	rcu_read_lock();
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
626
627
628
629
630
  	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
  		if (iter.index >= end)
  			break;
  
  		page = radix_tree_deref_slot(slot);
2cf938aae   Matthew Wilcox   mm: use radix_tre...
631
632
633
634
  		if (radix_tree_deref_retry(page)) {
  			slot = radix_tree_iter_retry(&iter);
  			continue;
  		}
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
635
636
637
638
639
640
  
  		if (radix_tree_exceptional_entry(page))
  			swapped++;
  
  		if (need_resched()) {
  			cond_resched_rcu();
7165092fe   Matthew Wilcox   radix-tree,shmem:...
641
  			slot = radix_tree_iter_next(&iter);
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
642
643
644
645
646
647
648
649
650
  		}
  	}
  
  	rcu_read_unlock();
  
  	return swapped << PAGE_SHIFT;
  }
  
  /*
48131e03c   Vlastimil Babka   mm, proc: reduce ...
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
   * Determine (in bytes) how many of the shmem object's pages mapped by the
   * given vma is swapped out.
   *
   * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
   * as long as the inode doesn't go away and racy results are not a problem.
   */
  unsigned long shmem_swap_usage(struct vm_area_struct *vma)
  {
  	struct inode *inode = file_inode(vma->vm_file);
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	struct address_space *mapping = inode->i_mapping;
  	unsigned long swapped;
  
  	/* Be careful as we don't hold info->lock */
  	swapped = READ_ONCE(info->swapped);
  
  	/*
  	 * The easier cases are when the shmem object has nothing in swap, or
  	 * the vma maps it whole. Then we can simply use the stats that we
  	 * already track.
  	 */
  	if (!swapped)
  		return 0;
  
  	if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
  		return swapped << PAGE_SHIFT;
  
  	/* Here comes the more involved part */
  	return shmem_partial_swap_usage(mapping,
  			linear_page_index(vma, vma->vm_start),
  			linear_page_index(vma, vma->vm_end));
  }
  
  /*
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
   * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
   */
  void shmem_unlock_mapping(struct address_space *mapping)
  {
  	struct pagevec pvec;
  	pgoff_t indices[PAGEVEC_SIZE];
  	pgoff_t index = 0;
  
  	pagevec_init(&pvec, 0);
  	/*
  	 * Minor point, but we might as well stop if someone else SHM_LOCKs it.
  	 */
  	while (!mapping_unevictable(mapping)) {
  		/*
  		 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
  		 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
  		 */
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
702
703
  		pvec.nr = find_get_entries(mapping, index,
  					   PAGEVEC_SIZE, pvec.pages, indices);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
704
705
706
  		if (!pvec.nr)
  			break;
  		index = indices[pvec.nr - 1] + 1;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
707
  		pagevec_remove_exceptionals(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
708
709
710
711
  		check_move_unevictable_pages(pvec.pages, pvec.nr);
  		pagevec_release(&pvec);
  		cond_resched();
  	}
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
712
713
714
715
  }
  
  /*
   * Remove range of pages and swap entries from radix tree, and free them.
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
716
   * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
717
   */
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
718
719
  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
  								 bool unfalloc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720
  {
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
721
  	struct address_space *mapping = inode->i_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
722
  	struct shmem_inode_info *info = SHMEM_I(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
723
724
725
726
  	pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	pgoff_t end = (lend + 1) >> PAGE_SHIFT;
  	unsigned int partial_start = lstart & (PAGE_SIZE - 1);
  	unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
727
  	struct pagevec pvec;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
728
729
  	pgoff_t indices[PAGEVEC_SIZE];
  	long nr_swaps_freed = 0;
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
730
  	pgoff_t index;
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
731
  	int i;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
732
733
  	if (lend == -1)
  		end = -1;	/* unsigned, so actually very big */
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
734
735
736
  
  	pagevec_init(&pvec, 0);
  	index = start;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
737
  	while (index < end) {
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
738
739
740
  		pvec.nr = find_get_entries(mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE),
  			pvec.pages, indices);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
741
742
  		if (!pvec.nr)
  			break;
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
743
744
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
745
  			index = indices[i];
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
746
  			if (index >= end)
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
747
  				break;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
748
  			if (radix_tree_exceptional_entry(page)) {
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
749
750
  				if (unfalloc)
  					continue;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
751
752
  				nr_swaps_freed += !shmem_free_swap(mapping,
  								index, page);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
753
  				continue;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
754
  			}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
755
  			VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
756
  			if (!trylock_page(page))
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
757
  				continue;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
  
  			if (PageTransTail(page)) {
  				/* Middle of THP: zero out the page */
  				clear_highpage(page);
  				unlock_page(page);
  				continue;
  			} else if (PageTransHuge(page)) {
  				if (index == round_down(end, HPAGE_PMD_NR)) {
  					/*
  					 * Range ends in the middle of THP:
  					 * zero out the page
  					 */
  					clear_highpage(page);
  					unlock_page(page);
  					continue;
  				}
  				index += HPAGE_PMD_NR - 1;
  				i += HPAGE_PMD_NR - 1;
  			}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
777
  			if (!unfalloc || !PageUptodate(page)) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
778
779
  				VM_BUG_ON_PAGE(PageTail(page), page);
  				if (page_mapping(page) == mapping) {
309381fea   Sasha Levin   mm: dump page whe...
780
  					VM_BUG_ON_PAGE(PageWriteback(page), page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
781
782
  					truncate_inode_page(mapping, page);
  				}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
783
  			}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
784
785
  			unlock_page(page);
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
786
  		pagevec_remove_exceptionals(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
787
  		pagevec_release(&pvec);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
788
789
790
  		cond_resched();
  		index++;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
791

83e4fa9c1   Hugh Dickins   tmpfs: support fa...
792
  	if (partial_start) {
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
793
  		struct page *page = NULL;
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
794
  		shmem_getpage(inode, start - 1, &page, SGP_READ);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
795
  		if (page) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
796
  			unsigned int top = PAGE_SIZE;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
797
798
799
800
801
802
803
  			if (start > end) {
  				top = partial_end;
  				partial_end = 0;
  			}
  			zero_user_segment(page, partial_start, top);
  			set_page_dirty(page);
  			unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
804
  			put_page(page);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
805
806
807
808
  		}
  	}
  	if (partial_end) {
  		struct page *page = NULL;
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
809
  		shmem_getpage(inode, end, &page, SGP_READ);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
810
811
  		if (page) {
  			zero_user_segment(page, 0, partial_end);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
812
813
  			set_page_dirty(page);
  			unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
814
  			put_page(page);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
815
816
  		}
  	}
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
817
818
  	if (start >= end)
  		return;
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
819
820
  
  	index = start;
b1a366500   Hugh Dickins   shmem: fix splici...
821
  	while (index < end) {
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
822
  		cond_resched();
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
823
824
  
  		pvec.nr = find_get_entries(mapping, index,
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
825
  				min(end - index, (pgoff_t)PAGEVEC_SIZE),
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
826
  				pvec.pages, indices);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
827
  		if (!pvec.nr) {
b1a366500   Hugh Dickins   shmem: fix splici...
828
829
  			/* If all gone or hole-punch or unfalloc, we're done */
  			if (index == start || end != -1)
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
830
  				break;
b1a366500   Hugh Dickins   shmem: fix splici...
831
  			/* But if truncating, restart to make sure all gone */
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
832
833
834
  			index = start;
  			continue;
  		}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
835
836
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
837
  			index = indices[i];
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
838
  			if (index >= end)
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
839
  				break;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
840
  			if (radix_tree_exceptional_entry(page)) {
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
841
842
  				if (unfalloc)
  					continue;
b1a366500   Hugh Dickins   shmem: fix splici...
843
844
845
846
847
848
  				if (shmem_free_swap(mapping, index, page)) {
  					/* Swap was replaced by page: retry */
  					index--;
  					break;
  				}
  				nr_swaps_freed++;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
849
850
  				continue;
  			}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
851
  			lock_page(page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
  
  			if (PageTransTail(page)) {
  				/* Middle of THP: zero out the page */
  				clear_highpage(page);
  				unlock_page(page);
  				/*
  				 * Partial thp truncate due 'start' in middle
  				 * of THP: don't need to look on these pages
  				 * again on !pvec.nr restart.
  				 */
  				if (index != round_down(end, HPAGE_PMD_NR))
  					start++;
  				continue;
  			} else if (PageTransHuge(page)) {
  				if (index == round_down(end, HPAGE_PMD_NR)) {
  					/*
  					 * Range ends in the middle of THP:
  					 * zero out the page
  					 */
  					clear_highpage(page);
  					unlock_page(page);
  					continue;
  				}
  				index += HPAGE_PMD_NR - 1;
  				i += HPAGE_PMD_NR - 1;
  			}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
878
  			if (!unfalloc || !PageUptodate(page)) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
879
880
  				VM_BUG_ON_PAGE(PageTail(page), page);
  				if (page_mapping(page) == mapping) {
309381fea   Sasha Levin   mm: dump page whe...
881
  					VM_BUG_ON_PAGE(PageWriteback(page), page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
882
  					truncate_inode_page(mapping, page);
b1a366500   Hugh Dickins   shmem: fix splici...
883
884
885
886
887
  				} else {
  					/* Page was replaced by swap: retry */
  					unlock_page(page);
  					index--;
  					break;
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
888
  				}
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
889
  			}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
890
891
  			unlock_page(page);
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
892
  		pagevec_remove_exceptionals(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
893
  		pagevec_release(&pvec);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
894
895
  		index++;
  	}
94c1e62df   Hugh Dickins   tmpfs: take contr...
896

4595ef88d   Kirill A. Shutemov   shmem: make shmem...
897
  	spin_lock_irq(&info->lock);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
898
  	info->swapped -= nr_swaps_freed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
899
  	shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
900
  	spin_unlock_irq(&info->lock);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
901
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
902

1635f6a74   Hugh Dickins   tmpfs: undo fallo...
903
904
905
  void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
  {
  	shmem_undo_range(inode, lstart, lend, false);
078cd8279   Deepa Dinamani   fs: Replace CURRE...
906
  	inode->i_ctime = inode->i_mtime = current_time(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
907
  }
94c1e62df   Hugh Dickins   tmpfs: take contr...
908
  EXPORT_SYMBOL_GPL(shmem_truncate_range);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
909

44a30220b   Yu Zhao   shmem: recalculat...
910
911
912
913
914
  static int shmem_getattr(struct vfsmount *mnt, struct dentry *dentry,
  			 struct kstat *stat)
  {
  	struct inode *inode = dentry->d_inode;
  	struct shmem_inode_info *info = SHMEM_I(inode);
d0424c429   Hugh Dickins   tmpfs: avoid a li...
915
  	if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
916
  		spin_lock_irq(&info->lock);
d0424c429   Hugh Dickins   tmpfs: avoid a li...
917
  		shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
918
  		spin_unlock_irq(&info->lock);
d0424c429   Hugh Dickins   tmpfs: avoid a li...
919
  	}
44a30220b   Yu Zhao   shmem: recalculat...
920
  	generic_fillattr(inode, stat);
44a30220b   Yu Zhao   shmem: recalculat...
921
922
  	return 0;
  }
94c1e62df   Hugh Dickins   tmpfs: take contr...
923
  static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
924
  {
75c3cfa85   David Howells   VFS: assorted wei...
925
  	struct inode *inode = d_inode(dentry);
40e041a2c   David Herrmann   shm: add sealing API
926
  	struct shmem_inode_info *info = SHMEM_I(inode);
779750d20   Kirill A. Shutemov   shmem: split huge...
927
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
928
  	int error;
31051c85b   Jan Kara   fs: Give dentry t...
929
  	error = setattr_prepare(dentry, attr);
db78b877f   Christoph Hellwig   always call inode...
930
931
  	if (error)
  		return error;
94c1e62df   Hugh Dickins   tmpfs: take contr...
932
933
934
  	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
  		loff_t oldsize = inode->i_size;
  		loff_t newsize = attr->ia_size;
3889e6e76   npiggin@suse.de   tmpfs: convert to...
935

40e041a2c   David Herrmann   shm: add sealing API
936
937
938
939
  		/* protected by i_mutex */
  		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
  		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
  			return -EPERM;
94c1e62df   Hugh Dickins   tmpfs: take contr...
940
  		if (newsize != oldsize) {
771425179   Konstantin Khlebnikov   shmem: update mem...
941
942
943
944
  			error = shmem_reacct_size(SHMEM_I(inode)->flags,
  					oldsize, newsize);
  			if (error)
  				return error;
94c1e62df   Hugh Dickins   tmpfs: take contr...
945
  			i_size_write(inode, newsize);
078cd8279   Deepa Dinamani   fs: Replace CURRE...
946
  			inode->i_ctime = inode->i_mtime = current_time(inode);
94c1e62df   Hugh Dickins   tmpfs: take contr...
947
  		}
afa2db2fb   Josef Bacik   tmpfs: truncate p...
948
  		if (newsize <= oldsize) {
94c1e62df   Hugh Dickins   tmpfs: take contr...
949
  			loff_t holebegin = round_up(newsize, PAGE_SIZE);
d0424c429   Hugh Dickins   tmpfs: avoid a li...
950
951
952
953
954
955
  			if (oldsize > holebegin)
  				unmap_mapping_range(inode->i_mapping,
  							holebegin, 0, 1);
  			if (info->alloced)
  				shmem_truncate_range(inode,
  							newsize, (loff_t)-1);
94c1e62df   Hugh Dickins   tmpfs: take contr...
956
  			/* unmap again to remove racily COWed private pages */
d0424c429   Hugh Dickins   tmpfs: avoid a li...
957
958
959
  			if (oldsize > holebegin)
  				unmap_mapping_range(inode->i_mapping,
  							holebegin, 0, 1);
779750d20   Kirill A. Shutemov   shmem: split huge...
960
961
962
963
964
965
966
967
968
969
970
971
972
973
  
  			/*
  			 * Part of the huge page can be beyond i_size: subject
  			 * to shrink under memory pressure.
  			 */
  			if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
  				spin_lock(&sbinfo->shrinklist_lock);
  				if (list_empty(&info->shrinklist)) {
  					list_add_tail(&info->shrinklist,
  							&sbinfo->shrinklist);
  					sbinfo->shrinklist_len++;
  				}
  				spin_unlock(&sbinfo->shrinklist_lock);
  			}
94c1e62df   Hugh Dickins   tmpfs: take contr...
974
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975
  	}
db78b877f   Christoph Hellwig   always call inode...
976
  	setattr_copy(inode, attr);
db78b877f   Christoph Hellwig   always call inode...
977
  	if (attr->ia_valid & ATTR_MODE)
feda821e7   Christoph Hellwig   fs: remove generi...
978
  		error = posix_acl_chmod(inode, inode->i_mode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
979
980
  	return error;
  }
1f895f75d   Al Viro   switch shmem.c to...
981
  static void shmem_evict_inode(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
982
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
983
  	struct shmem_inode_info *info = SHMEM_I(inode);
779750d20   Kirill A. Shutemov   shmem: split huge...
984
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
985

3889e6e76   npiggin@suse.de   tmpfs: convert to...
986
  	if (inode->i_mapping->a_ops == &shmem_aops) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
987
988
  		shmem_unacct_size(info->flags, inode->i_size);
  		inode->i_size = 0;
3889e6e76   npiggin@suse.de   tmpfs: convert to...
989
  		shmem_truncate_range(inode, 0, (loff_t)-1);
779750d20   Kirill A. Shutemov   shmem: split huge...
990
991
992
993
994
995
996
997
  		if (!list_empty(&info->shrinklist)) {
  			spin_lock(&sbinfo->shrinklist_lock);
  			if (!list_empty(&info->shrinklist)) {
  				list_del_init(&info->shrinklist);
  				sbinfo->shrinklist_len--;
  			}
  			spin_unlock(&sbinfo->shrinklist_lock);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
998
  		if (!list_empty(&info->swaplist)) {
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
999
  			mutex_lock(&shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1000
  			list_del_init(&info->swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
1001
  			mutex_unlock(&shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002
  		}
3ed47db34   Al Viro   make sure that fr...
1003
  	}
b09e0fa4b   Eric Paris   tmpfs: implement ...
1004

38f386574   Aristeu Rozanski   xattr: extract si...
1005
  	simple_xattrs_free(&info->xattrs);
0f3c42f52   Hugh Dickins   tmpfs: change fin...
1006
  	WARN_ON(inode->i_blocks);
5b04c6890   Pavel Emelyanov   shmem: factor out...
1007
  	shmem_free_inode(inode->i_sb);
dbd5768f8   Jan Kara   vfs: Rename end_w...
1008
  	clear_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1009
  }
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1010
1011
1012
  /*
   * If swap found in inode, free it and move page from swapcache to filecache.
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1013
  static int shmem_unuse_inode(struct shmem_inode_info *info,
bde05d1cc   Hugh Dickins   shmem: replace pa...
1014
  			     swp_entry_t swap, struct page **pagep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1015
  {
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
1016
  	struct address_space *mapping = info->vfs_inode.i_mapping;
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1017
  	void *radswap;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1018
  	pgoff_t index;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1019
1020
  	gfp_t gfp;
  	int error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1021

46f65ec15   Hugh Dickins   tmpfs: convert sh...
1022
  	radswap = swp_to_radix_entry(swap);
e504f3fdd   Hugh Dickins   tmpfs radix_tree:...
1023
  	index = radix_tree_locate_item(&mapping->page_tree, radswap);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1024
  	if (index == -1)
00501b531   Johannes Weiner   mm: memcontrol: r...
1025
  		return -EAGAIN;	/* tell shmem_unuse we found nothing */
2e0e26c76   Hugh Dickins   tmpfs: open a win...
1026

1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
1027
1028
  	/*
  	 * Move _head_ to start search for next from here.
1f895f75d   Al Viro   switch shmem.c to...
1029
  	 * But be careful: shmem_evict_inode checks list_empty without taking
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
1030
  	 * mutex, and there's an instant in list_move_tail when info->swaplist
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
1031
  	 * would appear empty, if it were the only one on shmem_swaplist.
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
1032
1033
1034
  	 */
  	if (shmem_swaplist.next != &info->swaplist)
  		list_move_tail(&shmem_swaplist, &info->swaplist);
2e0e26c76   Hugh Dickins   tmpfs: open a win...
1035

bde05d1cc   Hugh Dickins   shmem: replace pa...
1036
1037
1038
1039
1040
1041
1042
  	gfp = mapping_gfp_mask(mapping);
  	if (shmem_should_replace_page(*pagep, gfp)) {
  		mutex_unlock(&shmem_swaplist_mutex);
  		error = shmem_replace_page(pagep, gfp, info, index);
  		mutex_lock(&shmem_swaplist_mutex);
  		/*
  		 * We needed to drop mutex to make that restrictive page
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
  		 * allocation, but the inode might have been freed while we
  		 * dropped it: although a racing shmem_evict_inode() cannot
  		 * complete without emptying the radix_tree, our page lock
  		 * on this swapcache page is not enough to prevent that -
  		 * free_swap_and_cache() of our swap entry will only
  		 * trylock_page(), removing swap from radix_tree whatever.
  		 *
  		 * We must not proceed to shmem_add_to_page_cache() if the
  		 * inode has been freed, but of course we cannot rely on
  		 * inode or mapping or info to check that.  However, we can
  		 * safely check if our swap entry is still in use (and here
  		 * it can't have got reused for another page): if it's still
  		 * in use, then the inode cannot have been freed yet, and we
  		 * can safely proceed (if it's no longer in use, that tells
  		 * nothing about the inode, but we don't need to unuse swap).
bde05d1cc   Hugh Dickins   shmem: replace pa...
1058
1059
1060
1061
  		 */
  		if (!page_swapcount(*pagep))
  			error = -ENOENT;
  	}
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1062
  	/*
778dd893a   Hugh Dickins   tmpfs: fix race b...
1063
1064
1065
  	 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
  	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
  	 * beneath us (pagelock doesn't help until the page is in pagecache).
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1066
  	 */
bde05d1cc   Hugh Dickins   shmem: replace pa...
1067
1068
  	if (!error)
  		error = shmem_add_to_page_cache(*pagep, mapping, index,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
1069
  						radswap);
48f170fb7   Hugh Dickins   tmpfs: simplify u...
1070
  	if (error != -ENOMEM) {
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1071
1072
1073
1074
  		/*
  		 * Truncation and eviction use free_swap_and_cache(), which
  		 * only does trylock page: if we raced, best clean up here.
  		 */
bde05d1cc   Hugh Dickins   shmem: replace pa...
1075
1076
  		delete_from_swap_cache(*pagep);
  		set_page_dirty(*pagep);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1077
  		if (!error) {
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1078
  			spin_lock_irq(&info->lock);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1079
  			info->swapped--;
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1080
  			spin_unlock_irq(&info->lock);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1081
1082
  			swap_free(swap);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1083
  	}
2e0e26c76   Hugh Dickins   tmpfs: open a win...
1084
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1085
1086
1087
  }
  
  /*
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1088
   * Search through swapped inodes to find and replace swap by page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1089
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1090
  int shmem_unuse(swp_entry_t swap, struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1091
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1092
  	struct list_head *this, *next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
  	struct shmem_inode_info *info;
00501b531   Johannes Weiner   mm: memcontrol: r...
1094
  	struct mem_cgroup *memcg;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1095
1096
1097
1098
  	int error = 0;
  
  	/*
  	 * There's a faint possibility that swap page was replaced before
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1099
  	 * caller locked it: caller will come back later with the right page.
bde05d1cc   Hugh Dickins   shmem: replace pa...
1100
  	 */
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1101
  	if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
bde05d1cc   Hugh Dickins   shmem: replace pa...
1102
  		goto out;
778dd893a   Hugh Dickins   tmpfs: fix race b...
1103
1104
1105
1106
1107
  
  	/*
  	 * Charge page using GFP_KERNEL while we can wait, before taking
  	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
  	 * Charged back to the user (not to caller) when swap account is used.
778dd893a   Hugh Dickins   tmpfs: fix race b...
1108
  	 */
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1109
1110
  	error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
  			false);
778dd893a   Hugh Dickins   tmpfs: fix race b...
1111
1112
  	if (error)
  		goto out;
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1113
  	/* No radix_tree_preload: swap entry keeps a place for page in tree */
00501b531   Johannes Weiner   mm: memcontrol: r...
1114
  	error = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1115

cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
1116
  	mutex_lock(&shmem_swaplist_mutex);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1117
1118
  	list_for_each_safe(this, next, &shmem_swaplist) {
  		info = list_entry(this, struct shmem_inode_info, swaplist);
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
1119
  		if (info->swapped)
00501b531   Johannes Weiner   mm: memcontrol: r...
1120
  			error = shmem_unuse_inode(info, swap, &page);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1121
1122
  		else
  			list_del_init(&info->swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
1123
  		cond_resched();
00501b531   Johannes Weiner   mm: memcontrol: r...
1124
  		if (error != -EAGAIN)
778dd893a   Hugh Dickins   tmpfs: fix race b...
1125
  			break;
00501b531   Johannes Weiner   mm: memcontrol: r...
1126
  		/* found nothing in this: move on to search the next */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1127
  	}
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
1128
  	mutex_unlock(&shmem_swaplist_mutex);
778dd893a   Hugh Dickins   tmpfs: fix race b...
1129

00501b531   Johannes Weiner   mm: memcontrol: r...
1130
1131
1132
  	if (error) {
  		if (error != -ENOMEM)
  			error = 0;
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1133
  		mem_cgroup_cancel_charge(page, memcg, false);
00501b531   Johannes Weiner   mm: memcontrol: r...
1134
  	} else
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1135
  		mem_cgroup_commit_charge(page, memcg, true, false);
778dd893a   Hugh Dickins   tmpfs: fix race b...
1136
  out:
aaa468653   Hugh Dickins   swap_info: note S...
1137
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1138
  	put_page(page);
778dd893a   Hugh Dickins   tmpfs: fix race b...
1139
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1140
1141
1142
1143
1144
1145
1146
1147
  }
  
  /*
   * Move the page from the page cache to the swap cache.
   */
  static int shmem_writepage(struct page *page, struct writeback_control *wbc)
  {
  	struct shmem_inode_info *info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1148
  	struct address_space *mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1149
  	struct inode *inode;
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1150
1151
  	swp_entry_t swap;
  	pgoff_t index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1152

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1153
  	VM_BUG_ON_PAGE(PageCompound(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1154
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1155
1156
1157
1158
1159
1160
  	mapping = page->mapping;
  	index = page->index;
  	inode = mapping->host;
  	info = SHMEM_I(inode);
  	if (info->flags & VM_LOCKED)
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1161
  	if (!total_swap_pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1162
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1163
  	/*
97b713ba3   Christoph Hellwig   fs: kill BDI_CAP_...
1164
1165
1166
1167
1168
  	 * Our capabilities prevent regular writeback or sync from ever calling
  	 * shmem_writepage; but a stacking filesystem might use ->writepage of
  	 * its underlying filesystem, in which case tmpfs should write out to
  	 * swap only in response to memory pressure, and not for the writeback
  	 * threads or sync.
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1169
  	 */
48f170fb7   Hugh Dickins   tmpfs: simplify u...
1170
1171
1172
1173
  	if (!wbc->for_reclaim) {
  		WARN_ON_ONCE(1);	/* Still happens? Tell us about it! */
  		goto redirty;
  	}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1174
1175
1176
1177
1178
  
  	/*
  	 * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
  	 * value into swapfile.c, the only way we can correctly account for a
  	 * fallocated page arriving here is now to initialize it and write it.
1aac14003   Hugh Dickins   tmpfs: quit when ...
1179
1180
1181
1182
1183
1184
  	 *
  	 * That's okay for a page already fallocated earlier, but if we have
  	 * not yet completed the fallocation, then (a) we want to keep track
  	 * of this page in case we have to undo it, and (b) it may not be a
  	 * good idea to continue anyway, once we're pushing into swap.  So
  	 * reactivate the page, and let shmem_fallocate() quit when too many.
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1185
1186
  	 */
  	if (!PageUptodate(page)) {
1aac14003   Hugh Dickins   tmpfs: quit when ...
1187
1188
1189
1190
1191
  		if (inode->i_private) {
  			struct shmem_falloc *shmem_falloc;
  			spin_lock(&inode->i_lock);
  			shmem_falloc = inode->i_private;
  			if (shmem_falloc &&
8e205f779   Hugh Dickins   shmem: fix faulti...
1192
  			    !shmem_falloc->waitq &&
1aac14003   Hugh Dickins   tmpfs: quit when ...
1193
1194
1195
1196
1197
1198
1199
1200
1201
  			    index >= shmem_falloc->start &&
  			    index < shmem_falloc->next)
  				shmem_falloc->nr_unswapped++;
  			else
  				shmem_falloc = NULL;
  			spin_unlock(&inode->i_lock);
  			if (shmem_falloc)
  				goto redirty;
  		}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1202
1203
1204
1205
  		clear_highpage(page);
  		flush_dcache_page(page);
  		SetPageUptodate(page);
  	}
48f170fb7   Hugh Dickins   tmpfs: simplify u...
1206
1207
1208
  	swap = get_swap_page();
  	if (!swap.val)
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1209

37e843511   Vladimir Davydov   mm: memcontrol: c...
1210
1211
  	if (mem_cgroup_try_charge_swap(page, swap))
  		goto free_swap;
b1dea800a   Hugh Dickins   tmpfs: fix race b...
1212
1213
  	/*
  	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1214
1215
  	 * if it's not already there.  Do it now before the page is
  	 * moved to swap cache, when its pagelock no longer protects
b1dea800a   Hugh Dickins   tmpfs: fix race b...
1216
  	 * the inode from eviction.  But don't unlock the mutex until
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1217
1218
  	 * we've incremented swapped, because shmem_unuse_inode() will
  	 * prune a !swapped inode from the swaplist under this mutex.
b1dea800a   Hugh Dickins   tmpfs: fix race b...
1219
  	 */
48f170fb7   Hugh Dickins   tmpfs: simplify u...
1220
1221
1222
  	mutex_lock(&shmem_swaplist_mutex);
  	if (list_empty(&info->swaplist))
  		list_add_tail(&info->swaplist, &shmem_swaplist);
b1dea800a   Hugh Dickins   tmpfs: fix race b...
1223

48f170fb7   Hugh Dickins   tmpfs: simplify u...
1224
  	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1225
  		spin_lock_irq(&info->lock);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1226
  		shmem_recalc_inode(inode);
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1227
  		info->swapped++;
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1228
  		spin_unlock_irq(&info->lock);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1229

267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1230
1231
  		swap_shmem_alloc(swap);
  		shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1232
  		mutex_unlock(&shmem_swaplist_mutex);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1233
  		BUG_ON(page_mapped(page));
9fab5619b   Hugh Dickins   shmem: writepage ...
1234
  		swap_writepage(page, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1235
1236
  		return 0;
  	}
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1237
  	mutex_unlock(&shmem_swaplist_mutex);
37e843511   Vladimir Davydov   mm: memcontrol: c...
1238
  free_swap:
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
1239
  	swapcache_free(swap);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1240
1241
  redirty:
  	set_page_dirty(page);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1242
1243
1244
1245
  	if (wbc->for_reclaim)
  		return AOP_WRITEPAGE_ACTIVATE;	/* Return with page locked */
  	unlock_page(page);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1246
  }
75edd345e   Hugh Dickins   tmpfs: preliminar...
1247
  #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1248
  static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
680d794ba   akpm@linux-foundation.org   mount options: fi...
1249
  {
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
1250
  	char buffer[64];
680d794ba   akpm@linux-foundation.org   mount options: fi...
1251

71fe804b6   Lee Schermerhorn   mempolicy: use st...
1252
  	if (!mpol || mpol->mode == MPOL_DEFAULT)
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
1253
  		return;		/* show nothing */
680d794ba   akpm@linux-foundation.org   mount options: fi...
1254

a7a88b237   Hugh Dickins   mempolicy: remove...
1255
  	mpol_to_str(buffer, sizeof(buffer), mpol);
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
1256
1257
  
  	seq_printf(seq, ",mpol=%s", buffer);
680d794ba   akpm@linux-foundation.org   mount options: fi...
1258
  }
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
  
  static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  {
  	struct mempolicy *mpol = NULL;
  	if (sbinfo->mpol) {
  		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
  		mpol = sbinfo->mpol;
  		mpol_get(mpol);
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	return mpol;
  }
75edd345e   Hugh Dickins   tmpfs: preliminar...
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
  #else /* !CONFIG_NUMA || !CONFIG_TMPFS */
  static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
  {
  }
  static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  {
  	return NULL;
  }
  #endif /* CONFIG_NUMA && CONFIG_TMPFS */
  #ifndef CONFIG_NUMA
  #define vm_policy vm_private_data
  #endif
680d794ba   akpm@linux-foundation.org   mount options: fi...
1283

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
  static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
  		struct shmem_inode_info *info, pgoff_t index)
  {
  	/* Create a pseudo vma that just contains the policy */
  	vma->vm_start = 0;
  	/* Bias interleave by inode number to distribute better across nodes */
  	vma->vm_pgoff = index + info->vfs_inode.i_ino;
  	vma->vm_ops = NULL;
  	vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
  }
  
  static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma)
  {
  	/* Drop reference taken by mpol_shared_policy_lookup() */
  	mpol_cond_put(vma->vm_policy);
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1300
1301
  static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
  			struct shmem_inode_info *info, pgoff_t index)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1302
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1303
  	struct vm_area_struct pvma;
18a2f371f   Mel Gorman   tmpfs: fix shared...
1304
  	struct page *page;
52cd3b074   Lee Schermerhorn   mempolicy: rework...
1305

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1306
  	shmem_pseudo_vma_init(&pvma, info, index);
18a2f371f   Mel Gorman   tmpfs: fix shared...
1307
  	page = swapin_readahead(swap, gfp, &pvma, 0);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1308
  	shmem_pseudo_vma_destroy(&pvma);
18a2f371f   Mel Gorman   tmpfs: fix shared...
1309

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1310
1311
1312
1313
1314
1315
1316
1317
1318
  	return page;
  }
  
  static struct page *shmem_alloc_hugepage(gfp_t gfp,
  		struct shmem_inode_info *info, pgoff_t index)
  {
  	struct vm_area_struct pvma;
  	struct inode *inode = &info->vfs_inode;
  	struct address_space *mapping = inode->i_mapping;
4620a06e4   Geert Uytterhoeven   shmem: Fix link e...
1319
  	pgoff_t idx, hindex;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1320
1321
  	void __rcu **results;
  	struct page *page;
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
1322
  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1323
  		return NULL;
4620a06e4   Geert Uytterhoeven   shmem: Fix link e...
1324
  	hindex = round_down(index, HPAGE_PMD_NR);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1325
1326
1327
1328
1329
1330
1331
  	rcu_read_lock();
  	if (radix_tree_gang_lookup_slot(&mapping->page_tree, &results, &idx,
  				hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
  		rcu_read_unlock();
  		return NULL;
  	}
  	rcu_read_unlock();
18a2f371f   Mel Gorman   tmpfs: fix shared...
1332

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1333
1334
1335
1336
1337
1338
  	shmem_pseudo_vma_init(&pvma, info, hindex);
  	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
  			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
  	shmem_pseudo_vma_destroy(&pvma);
  	if (page)
  		prep_transhuge_page(page);
18a2f371f   Mel Gorman   tmpfs: fix shared...
1339
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1340
  }
02098feaa   Hugh Dickins   swapin needs gfp_...
1341
  static struct page *shmem_alloc_page(gfp_t gfp,
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1342
  			struct shmem_inode_info *info, pgoff_t index)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1343
1344
  {
  	struct vm_area_struct pvma;
18a2f371f   Mel Gorman   tmpfs: fix shared...
1345
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1346

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
  	shmem_pseudo_vma_init(&pvma, info, index);
  	page = alloc_page_vma(gfp, &pvma, 0);
  	shmem_pseudo_vma_destroy(&pvma);
  
  	return page;
  }
  
  static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
  		struct shmem_inode_info *info, struct shmem_sb_info *sbinfo,
  		pgoff_t index, bool huge)
  {
  	struct page *page;
  	int nr;
  	int err = -ENOSPC;
52cd3b074   Lee Schermerhorn   mempolicy: rework...
1361

e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
1362
  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
  		huge = false;
  	nr = huge ? HPAGE_PMD_NR : 1;
  
  	if (shmem_acct_block(info->flags, nr))
  		goto failed;
  	if (sbinfo->max_blocks) {
  		if (percpu_counter_compare(&sbinfo->used_blocks,
  					sbinfo->max_blocks - nr) > 0)
  			goto unacct;
  		percpu_counter_add(&sbinfo->used_blocks, nr);
  	}
  
  	if (huge)
  		page = shmem_alloc_hugepage(gfp, info, index);
  	else
  		page = shmem_alloc_page(gfp, info, index);
75edd345e   Hugh Dickins   tmpfs: preliminar...
1379
1380
1381
  	if (page) {
  		__SetPageLocked(page);
  		__SetPageSwapBacked(page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1382
  		return page;
75edd345e   Hugh Dickins   tmpfs: preliminar...
1383
  	}
18a2f371f   Mel Gorman   tmpfs: fix shared...
1384

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1385
1386
1387
1388
1389
1390
1391
  	err = -ENOMEM;
  	if (sbinfo->max_blocks)
  		percpu_counter_add(&sbinfo->used_blocks, -nr);
  unacct:
  	shmem_unacct_blocks(info->flags, nr);
  failed:
  	return ERR_PTR(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
  }
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1393

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1394
  /*
bde05d1cc   Hugh Dickins   shmem: replace pa...
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
   * When a page is moved from swapcache to shmem filecache (either by the
   * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
   * shmem_unuse_inode()), it may have been read in earlier from swap, in
   * ignorance of the mapping it belongs to.  If that mapping has special
   * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
   * we may need to copy to a suitable page before moving to filecache.
   *
   * In a future release, this may well be extended to respect cpuset and
   * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
   * but for now it is a simple matter of zone.
   */
  static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
  {
  	return page_zonenum(page) > gfp_zone(gfp);
  }
  
  static int shmem_replace_page(struct page **pagep, gfp_t gfp,
  				struct shmem_inode_info *info, pgoff_t index)
  {
  	struct page *oldpage, *newpage;
  	struct address_space *swap_mapping;
  	pgoff_t swap_index;
  	int error;
  
  	oldpage = *pagep;
  	swap_index = page_private(oldpage);
  	swap_mapping = page_mapping(oldpage);
  
  	/*
  	 * We have arrived here because our zones are constrained, so don't
  	 * limit chance of success by further cpuset and node constraints.
  	 */
  	gfp &= ~GFP_CONSTRAINT_MASK;
  	newpage = shmem_alloc_page(gfp, info, index);
  	if (!newpage)
  		return -ENOMEM;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1431

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1432
  	get_page(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1433
  	copy_highpage(newpage, oldpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1434
  	flush_dcache_page(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1435

9956edf37   Hugh Dickins   shmem: fix pagefl...
1436
1437
  	__SetPageLocked(newpage);
  	__SetPageSwapBacked(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1438
  	SetPageUptodate(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1439
  	set_page_private(newpage, swap_index);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1440
1441
1442
1443
1444
1445
1446
1447
1448
  	SetPageSwapCache(newpage);
  
  	/*
  	 * Our caller will very soon move newpage out of swapcache, but it's
  	 * a nice clean interface for us to replace oldpage by newpage there.
  	 */
  	spin_lock_irq(&swap_mapping->tree_lock);
  	error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
  								   newpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1449
  	if (!error) {
11fb99898   Mel Gorman   mm: move most fil...
1450
1451
  		__inc_node_page_state(newpage, NR_FILE_PAGES);
  		__dec_node_page_state(oldpage, NR_FILE_PAGES);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1452
  	}
bde05d1cc   Hugh Dickins   shmem: replace pa...
1453
  	spin_unlock_irq(&swap_mapping->tree_lock);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1454

0142ef6cd   Hugh Dickins   shmem: replace_pa...
1455
1456
1457
1458
1459
1460
1461
1462
  	if (unlikely(error)) {
  		/*
  		 * Is this possible?  I think not, now that our callers check
  		 * both PageSwapCache and page_private after getting page lock;
  		 * but be defensive.  Reverse old to newpage for clear and free.
  		 */
  		oldpage = newpage;
  	} else {
6a93ca8fd   Johannes Weiner   mm: migrate: do n...
1463
  		mem_cgroup_migrate(oldpage, newpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1464
1465
1466
  		lru_cache_add_anon(newpage);
  		*pagep = newpage;
  	}
bde05d1cc   Hugh Dickins   shmem: replace pa...
1467
1468
1469
1470
1471
  
  	ClearPageSwapCache(oldpage);
  	set_page_private(oldpage, 0);
  
  	unlock_page(oldpage);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1472
1473
  	put_page(oldpage);
  	put_page(oldpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1474
  	return error;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1475
1476
1477
  }
  
  /*
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1478
   * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1479
1480
1481
   *
   * If we allocate a new one we do not mark it dirty. That's up to the
   * vm. If we swap it in we mark it dirty since we also free the swap
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1482
1483
1484
1485
   * entry since a page cannot live in both the swap and page cache.
   *
   * fault_mm and fault_type are only supplied by shmem_fault:
   * otherwise they are NULL.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1486
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1487
  static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1488
1489
  	struct page **pagep, enum sgp_type sgp, gfp_t gfp,
  	struct mm_struct *fault_mm, int *fault_type)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1490
1491
  {
  	struct address_space *mapping = inode->i_mapping;
54af60421   Hugh Dickins   tmpfs: convert sh...
1492
  	struct shmem_inode_info *info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1493
  	struct shmem_sb_info *sbinfo;
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1494
  	struct mm_struct *charge_mm;
00501b531   Johannes Weiner   mm: memcontrol: r...
1495
  	struct mem_cgroup *memcg;
27ab70062   Hugh Dickins   tmpfs: simplify f...
1496
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1497
  	swp_entry_t swap;
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1498
  	enum sgp_type sgp_huge = sgp;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1499
  	pgoff_t hindex = index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1500
  	int error;
54af60421   Hugh Dickins   tmpfs: convert sh...
1501
  	int once = 0;
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1502
  	int alloced = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1503

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1504
  	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1505
  		return -EFBIG;
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1506
1507
  	if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
  		sgp = SGP_CACHE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1508
  repeat:
54af60421   Hugh Dickins   tmpfs: convert sh...
1509
  	swap.val = 0;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
1510
  	page = find_lock_entry(mapping, index);
54af60421   Hugh Dickins   tmpfs: convert sh...
1511
1512
1513
1514
  	if (radix_tree_exceptional_entry(page)) {
  		swap = radix_to_swp_entry(page);
  		page = NULL;
  	}
75edd345e   Hugh Dickins   tmpfs: preliminar...
1515
  	if (sgp <= SGP_CACHE &&
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1516
  	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1517
  		error = -EINVAL;
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1518
  		goto unlock;
54af60421   Hugh Dickins   tmpfs: convert sh...
1519
  	}
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1520
1521
  	if (page && sgp == SGP_WRITE)
  		mark_page_accessed(page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1522
1523
1524
1525
1526
  	/* fallocated page? */
  	if (page && !PageUptodate(page)) {
  		if (sgp != SGP_READ)
  			goto clear;
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1527
  		put_page(page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1528
1529
  		page = NULL;
  	}
54af60421   Hugh Dickins   tmpfs: convert sh...
1530
  	if (page || (sgp == SGP_READ && !swap.val)) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1531
1532
  		*pagep = page;
  		return 0;
27ab70062   Hugh Dickins   tmpfs: simplify f...
1533
1534
1535
  	}
  
  	/*
54af60421   Hugh Dickins   tmpfs: convert sh...
1536
1537
  	 * Fast cache lookup did not find it:
  	 * bring it back from swap or allocate.
27ab70062   Hugh Dickins   tmpfs: simplify f...
1538
  	 */
54af60421   Hugh Dickins   tmpfs: convert sh...
1539
1540
  	info = SHMEM_I(inode);
  	sbinfo = SHMEM_SB(inode->i_sb);
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1541
  	charge_mm = fault_mm ? : current->mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1542

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1543
1544
  	if (swap.val) {
  		/* Look it up and read it in.. */
27ab70062   Hugh Dickins   tmpfs: simplify f...
1545
1546
  		page = lookup_swap_cache(swap);
  		if (!page) {
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1547
1548
  			/* Or update major stats only when swapin succeeds?? */
  			if (fault_type) {
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1549
  				*fault_type |= VM_FAULT_MAJOR;
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1550
1551
1552
1553
  				count_vm_event(PGMAJFAULT);
  				mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT);
  			}
  			/* Here we actually start the io */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1554
  			page = shmem_swapin(swap, gfp, info, index);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1555
  			if (!page) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1556
1557
  				error = -ENOMEM;
  				goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1558
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1559
1560
1561
  		}
  
  		/* We have to do this with page locked to prevent races */
54af60421   Hugh Dickins   tmpfs: convert sh...
1562
  		lock_page(page);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1563
  		if (!PageSwapCache(page) || page_private(page) != swap.val ||
d18992286   Hugh Dickins   shmem: fix negati...
1564
  		    !shmem_confirm_swap(mapping, index, swap)) {
bde05d1cc   Hugh Dickins   shmem: replace pa...
1565
  			error = -EEXIST;	/* try again */
d18992286   Hugh Dickins   shmem: fix negati...
1566
  			goto unlock;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1567
  		}
27ab70062   Hugh Dickins   tmpfs: simplify f...
1568
  		if (!PageUptodate(page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1569
  			error = -EIO;
54af60421   Hugh Dickins   tmpfs: convert sh...
1570
  			goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1571
  		}
54af60421   Hugh Dickins   tmpfs: convert sh...
1572
  		wait_on_page_writeback(page);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1573
1574
1575
1576
  		if (shmem_should_replace_page(page, gfp)) {
  			error = shmem_replace_page(&page, gfp, info, index);
  			if (error)
  				goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1577
  		}
27ab70062   Hugh Dickins   tmpfs: simplify f...
1578

9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1579
  		error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1580
  				false);
d18992286   Hugh Dickins   shmem: fix negati...
1581
  		if (!error) {
aa3b18955   Hugh Dickins   tmpfs: convert me...
1582
  			error = shmem_add_to_page_cache(page, mapping, index,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
1583
  						swp_to_radix_entry(swap));
215c02bc3   Hugh Dickins   tmpfs: fix shmem_...
1584
1585
1586
1587
1588
1589
1590
1591
  			/*
  			 * We already confirmed swap under page lock, and make
  			 * no memory allocation here, so usually no possibility
  			 * of error; but free_swap_and_cache() only trylocks a
  			 * page, so it is just possible that the entry has been
  			 * truncated or holepunched since swap was confirmed.
  			 * shmem_undo_range() will have done some of the
  			 * unaccounting, now delete_from_swap_cache() will do
93aa7d952   Vladimir Davydov   swap: remove unus...
1592
  			 * the rest.
215c02bc3   Hugh Dickins   tmpfs: fix shmem_...
1593
1594
1595
  			 * Reset swap.val? No, leave it so "failed" goes back to
  			 * "repeat": reading a hole and writing should succeed.
  			 */
00501b531   Johannes Weiner   mm: memcontrol: r...
1596
  			if (error) {
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1597
  				mem_cgroup_cancel_charge(page, memcg, false);
215c02bc3   Hugh Dickins   tmpfs: fix shmem_...
1598
  				delete_from_swap_cache(page);
00501b531   Johannes Weiner   mm: memcontrol: r...
1599
  			}
d18992286   Hugh Dickins   shmem: fix negati...
1600
  		}
54af60421   Hugh Dickins   tmpfs: convert sh...
1601
1602
  		if (error)
  			goto failed;
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1603
  		mem_cgroup_commit_charge(page, memcg, true, false);
00501b531   Johannes Weiner   mm: memcontrol: r...
1604

4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1605
  		spin_lock_irq(&info->lock);
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
1606
  		info->swapped--;
54af60421   Hugh Dickins   tmpfs: convert sh...
1607
  		shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1608
  		spin_unlock_irq(&info->lock);
54af60421   Hugh Dickins   tmpfs: convert sh...
1609

66d2f4d28   Hugh Dickins   shmem: fix init_p...
1610
1611
  		if (sgp == SGP_WRITE)
  			mark_page_accessed(page);
54af60421   Hugh Dickins   tmpfs: convert sh...
1612
  		delete_from_swap_cache(page);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1613
1614
  		set_page_dirty(page);
  		swap_free(swap);
54af60421   Hugh Dickins   tmpfs: convert sh...
1615
  	} else {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1616
1617
1618
  		/* shmem_symlink() */
  		if (mapping->a_ops != &shmem_aops)
  			goto alloc_nohuge;
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1619
  		if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
  			goto alloc_nohuge;
  		if (shmem_huge == SHMEM_HUGE_FORCE)
  			goto alloc_huge;
  		switch (sbinfo->huge) {
  			loff_t i_size;
  			pgoff_t off;
  		case SHMEM_HUGE_NEVER:
  			goto alloc_nohuge;
  		case SHMEM_HUGE_WITHIN_SIZE:
  			off = round_up(index, HPAGE_PMD_NR);
  			i_size = round_up(i_size_read(inode), PAGE_SIZE);
  			if (i_size >= HPAGE_PMD_SIZE &&
  					i_size >> PAGE_SHIFT >= off)
  				goto alloc_huge;
  			/* fallthrough */
  		case SHMEM_HUGE_ADVISE:
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1636
1637
1638
  			if (sgp_huge == SGP_HUGE)
  				goto alloc_huge;
  			/* TODO: implement fadvise() hints */
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1639
  			goto alloc_nohuge;
54af60421   Hugh Dickins   tmpfs: convert sh...
1640
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1641

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1642
1643
1644
1645
1646
1647
  alloc_huge:
  		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
  				index, true);
  		if (IS_ERR(page)) {
  alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
  					index, false);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1648
  		}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1649
  		if (IS_ERR(page)) {
779750d20   Kirill A. Shutemov   shmem: split huge...
1650
  			int retry = 5;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1651
1652
  			error = PTR_ERR(page);
  			page = NULL;
779750d20   Kirill A. Shutemov   shmem: split huge...
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
  			if (error != -ENOSPC)
  				goto failed;
  			/*
  			 * Try to reclaim some spece by splitting a huge page
  			 * beyond i_size on the filesystem.
  			 */
  			while (retry--) {
  				int ret;
  				ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
  				if (ret == SHRINK_STOP)
  					break;
  				if (ret)
  					goto alloc_nohuge;
  			}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1667
1668
1669
1670
1671
1672
1673
  			goto failed;
  		}
  
  		if (PageTransHuge(page))
  			hindex = round_down(index, HPAGE_PMD_NR);
  		else
  			hindex = index;
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1674
  		if (sgp == SGP_WRITE)
eb39d618f   Hugh Dickins   mm: replace init_...
1675
  			__SetPageReferenced(page);
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1676

9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1677
  		error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1678
  				PageTransHuge(page));
54af60421   Hugh Dickins   tmpfs: convert sh...
1679
  		if (error)
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1680
1681
1682
  			goto unacct;
  		error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK,
  				compound_order(page));
b065b4321   Hugh Dickins   shmem: cleanup sh...
1683
  		if (!error) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1684
  			error = shmem_add_to_page_cache(page, mapping, hindex,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
1685
  							NULL);
b065b4321   Hugh Dickins   shmem: cleanup sh...
1686
1687
1688
  			radix_tree_preload_end();
  		}
  		if (error) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1689
1690
1691
  			mem_cgroup_cancel_charge(page, memcg,
  					PageTransHuge(page));
  			goto unacct;
b065b4321   Hugh Dickins   shmem: cleanup sh...
1692
  		}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1693
1694
  		mem_cgroup_commit_charge(page, memcg, false,
  				PageTransHuge(page));
54af60421   Hugh Dickins   tmpfs: convert sh...
1695
  		lru_cache_add_anon(page);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1696
  		spin_lock_irq(&info->lock);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1697
1698
  		info->alloced += 1 << compound_order(page);
  		inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
54af60421   Hugh Dickins   tmpfs: convert sh...
1699
  		shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1700
  		spin_unlock_irq(&info->lock);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1701
  		alloced = true;
54af60421   Hugh Dickins   tmpfs: convert sh...
1702

779750d20   Kirill A. Shutemov   shmem: split huge...
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
  		if (PageTransHuge(page) &&
  				DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
  				hindex + HPAGE_PMD_NR - 1) {
  			/*
  			 * Part of the huge page is beyond i_size: subject
  			 * to shrink under memory pressure.
  			 */
  			spin_lock(&sbinfo->shrinklist_lock);
  			if (list_empty(&info->shrinklist)) {
  				list_add_tail(&info->shrinklist,
  						&sbinfo->shrinklist);
  				sbinfo->shrinklist_len++;
  			}
  			spin_unlock(&sbinfo->shrinklist_lock);
  		}
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
1718
  		/*
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1719
1720
1721
1722
1723
1724
1725
1726
1727
  		 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
  		 */
  		if (sgp == SGP_FALLOC)
  			sgp = SGP_WRITE;
  clear:
  		/*
  		 * Let SGP_WRITE caller clear ends if write does not fill page;
  		 * but SGP_FALLOC on a page fallocated earlier must initialize
  		 * it now, lest undo on failure cancel our earlier guarantee.
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
1728
  		 */
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1729
1730
1731
1732
1733
1734
1735
1736
1737
  		if (sgp != SGP_WRITE && !PageUptodate(page)) {
  			struct page *head = compound_head(page);
  			int i;
  
  			for (i = 0; i < (1 << compound_order(head)); i++) {
  				clear_highpage(head + i);
  				flush_dcache_page(head + i);
  			}
  			SetPageUptodate(head);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
1738
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1739
  	}
bde05d1cc   Hugh Dickins   shmem: replace pa...
1740

54af60421   Hugh Dickins   tmpfs: convert sh...
1741
  	/* Perhaps the file has been truncated since we checked */
75edd345e   Hugh Dickins   tmpfs: preliminar...
1742
  	if (sgp <= SGP_CACHE &&
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1743
  	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1744
1745
1746
  		if (alloced) {
  			ClearPageDirty(page);
  			delete_from_page_cache(page);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1747
  			spin_lock_irq(&info->lock);
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1748
  			shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1749
  			spin_unlock_irq(&info->lock);
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1750
  		}
54af60421   Hugh Dickins   tmpfs: convert sh...
1751
  		error = -EINVAL;
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1752
  		goto unlock;
e83c32e8f   Hugh Dickins   tmpfs: simplify p...
1753
  	}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1754
  	*pagep = page + index - hindex;
54af60421   Hugh Dickins   tmpfs: convert sh...
1755
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1756

59a16ead5   Hugh Dickins   tmpfs: fix spurio...
1757
  	/*
54af60421   Hugh Dickins   tmpfs: convert sh...
1758
  	 * Error recovery.
59a16ead5   Hugh Dickins   tmpfs: fix spurio...
1759
  	 */
54af60421   Hugh Dickins   tmpfs: convert sh...
1760
  unacct:
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
  	if (sbinfo->max_blocks)
  		percpu_counter_sub(&sbinfo->used_blocks,
  				1 << compound_order(page));
  	shmem_unacct_blocks(info->flags, 1 << compound_order(page));
  
  	if (PageTransHuge(page)) {
  		unlock_page(page);
  		put_page(page);
  		goto alloc_nohuge;
  	}
54af60421   Hugh Dickins   tmpfs: convert sh...
1771
  failed:
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1772
  	if (swap.val && !shmem_confirm_swap(mapping, index, swap))
d18992286   Hugh Dickins   shmem: fix negati...
1773
1774
  		error = -EEXIST;
  unlock:
27ab70062   Hugh Dickins   tmpfs: simplify f...
1775
  	if (page) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1776
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1777
  		put_page(page);
54af60421   Hugh Dickins   tmpfs: convert sh...
1778
1779
1780
  	}
  	if (error == -ENOSPC && !once++) {
  		info = SHMEM_I(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1781
  		spin_lock_irq(&info->lock);
54af60421   Hugh Dickins   tmpfs: convert sh...
1782
  		shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1783
  		spin_unlock_irq(&info->lock);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1784
  		goto repeat;
ff36b8016   Shaohua Li   shmem: reduce pag...
1785
  	}
d18992286   Hugh Dickins   shmem: fix negati...
1786
  	if (error == -EEXIST)	/* from above or from radix_tree_insert */
54af60421   Hugh Dickins   tmpfs: convert sh...
1787
1788
  		goto repeat;
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1789
  }
10d20bd25   Linus Torvalds   shmem: fix shm fa...
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
  /*
   * This is like autoremove_wake_function, but it removes the wait queue
   * entry unconditionally - even if something else had already woken the
   * target.
   */
  static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
  {
  	int ret = default_wake_function(wait, mode, sync, key);
  	list_del_init(&wait->task_list);
  	return ret;
  }
d0217ac04   Nick Piggin   mm: fault feedbac...
1801
  static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1802
  {
496ad9aa8   Al Viro   new helper: file_...
1803
  	struct inode *inode = file_inode(vma->vm_file);
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1804
  	gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1805
  	enum sgp_type sgp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1806
  	int error;
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1807
  	int ret = VM_FAULT_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1808

f00cdc6df   Hugh Dickins   shmem: fix faulti...
1809
1810
1811
1812
  	/*
  	 * Trinity finds that probing a hole which tmpfs is punching can
  	 * prevent the hole-punch from ever completing: which in turn
  	 * locks writers out with its hold on i_mutex.  So refrain from
8e205f779   Hugh Dickins   shmem: fix faulti...
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
  	 * faulting pages into the hole while it's being punched.  Although
  	 * shmem_undo_range() does remove the additions, it may be unable to
  	 * keep up, as each new page needs its own unmap_mapping_range() call,
  	 * and the i_mmap tree grows ever slower to scan if new vmas are added.
  	 *
  	 * It does not matter if we sometimes reach this check just before the
  	 * hole-punch begins, so that one fault then races with the punch:
  	 * we just need to make racing faults a rare case.
  	 *
  	 * The implementation below would be much simpler if we just used a
  	 * standard mutex or completion: but we cannot take i_mutex in fault,
  	 * and bloating every shmem inode for this unlikely case would be sad.
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1825
1826
1827
1828
1829
1830
  	 */
  	if (unlikely(inode->i_private)) {
  		struct shmem_falloc *shmem_falloc;
  
  		spin_lock(&inode->i_lock);
  		shmem_falloc = inode->i_private;
8e205f779   Hugh Dickins   shmem: fix faulti...
1831
1832
1833
1834
1835
  		if (shmem_falloc &&
  		    shmem_falloc->waitq &&
  		    vmf->pgoff >= shmem_falloc->start &&
  		    vmf->pgoff < shmem_falloc->next) {
  			wait_queue_head_t *shmem_falloc_waitq;
10d20bd25   Linus Torvalds   shmem: fix shm fa...
1836
  			DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
8e205f779   Hugh Dickins   shmem: fix faulti...
1837
1838
  
  			ret = VM_FAULT_NOPAGE;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1839
1840
  			if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
  			   !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
8e205f779   Hugh Dickins   shmem: fix faulti...
1841
  				/* It's polite to up mmap_sem if we can */
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1842
  				up_read(&vma->vm_mm->mmap_sem);
8e205f779   Hugh Dickins   shmem: fix faulti...
1843
  				ret = VM_FAULT_RETRY;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1844
  			}
8e205f779   Hugh Dickins   shmem: fix faulti...
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
  
  			shmem_falloc_waitq = shmem_falloc->waitq;
  			prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
  					TASK_UNINTERRUPTIBLE);
  			spin_unlock(&inode->i_lock);
  			schedule();
  
  			/*
  			 * shmem_falloc_waitq points into the shmem_fallocate()
  			 * stack of the hole-punching task: shmem_falloc_waitq
  			 * is usually invalid by the time we reach here, but
  			 * finish_wait() does not dereference it in that case;
  			 * though i_lock needed lest racing with wake_up_all().
  			 */
  			spin_lock(&inode->i_lock);
  			finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
  			spin_unlock(&inode->i_lock);
  			return ret;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1863
  		}
8e205f779   Hugh Dickins   shmem: fix faulti...
1864
  		spin_unlock(&inode->i_lock);
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1865
  	}
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1866
1867
1868
1869
1870
1871
1872
  	sgp = SGP_CACHE;
  	if (vma->vm_flags & VM_HUGEPAGE)
  		sgp = SGP_HUGE;
  	else if (vma->vm_flags & VM_NOHUGEPAGE)
  		sgp = SGP_NOHUGE;
  
  	error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1873
  				  gfp, vma->vm_mm, &ret);
d0217ac04   Nick Piggin   mm: fault feedbac...
1874
1875
  	if (error)
  		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1876
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1877
  }
c01d5b300   Hugh Dickins   shmem: get_unmapp...
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
  unsigned long shmem_get_unmapped_area(struct file *file,
  				      unsigned long uaddr, unsigned long len,
  				      unsigned long pgoff, unsigned long flags)
  {
  	unsigned long (*get_area)(struct file *,
  		unsigned long, unsigned long, unsigned long, unsigned long);
  	unsigned long addr;
  	unsigned long offset;
  	unsigned long inflated_len;
  	unsigned long inflated_addr;
  	unsigned long inflated_offset;
  
  	if (len > TASK_SIZE)
  		return -ENOMEM;
  
  	get_area = current->mm->get_unmapped_area;
  	addr = get_area(file, uaddr, len, pgoff, flags);
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
1895
  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
c01d5b300   Hugh Dickins   shmem: get_unmapp...
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
  		return addr;
  	if (IS_ERR_VALUE(addr))
  		return addr;
  	if (addr & ~PAGE_MASK)
  		return addr;
  	if (addr > TASK_SIZE - len)
  		return addr;
  
  	if (shmem_huge == SHMEM_HUGE_DENY)
  		return addr;
  	if (len < HPAGE_PMD_SIZE)
  		return addr;
  	if (flags & MAP_FIXED)
  		return addr;
  	/*
  	 * Our priority is to support MAP_SHARED mapped hugely;
  	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
  	 * But if caller specified an address hint, respect that as before.
  	 */
  	if (uaddr)
  		return addr;
  
  	if (shmem_huge != SHMEM_HUGE_FORCE) {
  		struct super_block *sb;
  
  		if (file) {
  			VM_BUG_ON(file->f_op != &shmem_file_operations);
  			sb = file_inode(file)->i_sb;
  		} else {
  			/*
  			 * Called directly from mm/mmap.c, or drivers/char/mem.c
  			 * for "/dev/zero", to create a shared anonymous object.
  			 */
  			if (IS_ERR(shm_mnt))
  				return addr;
  			sb = shm_mnt->mnt_sb;
  		}
3089bf614   Toshi Kani   shmem: fix tmpfs ...
1933
  		if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
c01d5b300   Hugh Dickins   shmem: get_unmapp...
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
  			return addr;
  	}
  
  	offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
  	if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
  		return addr;
  	if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
  		return addr;
  
  	inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
  	if (inflated_len > TASK_SIZE)
  		return addr;
  	if (inflated_len < len)
  		return addr;
  
  	inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
  	if (IS_ERR_VALUE(inflated_addr))
  		return addr;
  	if (inflated_addr & ~PAGE_MASK)
  		return addr;
  
  	inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
  	inflated_addr += offset - inflated_offset;
  	if (inflated_offset > offset)
  		inflated_addr += HPAGE_PMD_SIZE;
  
  	if (inflated_addr > TASK_SIZE - len)
  		return addr;
  	return inflated_addr;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1964
  #ifdef CONFIG_NUMA
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1965
  static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1966
  {
496ad9aa8   Al Viro   new helper: file_...
1967
  	struct inode *inode = file_inode(vma->vm_file);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1968
  	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1969
  }
d8dc74f21   Adrian Bunk   mm/shmem.c: make ...
1970
1971
  static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
  					  unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1972
  {
496ad9aa8   Al Viro   new helper: file_...
1973
  	struct inode *inode = file_inode(vma->vm_file);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1974
  	pgoff_t index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1975

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1976
1977
  	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
  	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1978
1979
1980
1981
1982
  }
  #endif
  
  int shmem_lock(struct file *file, int lock, struct user_struct *user)
  {
496ad9aa8   Al Viro   new helper: file_...
1983
  	struct inode *inode = file_inode(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1984
1985
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	int retval = -ENOMEM;
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1986
  	spin_lock_irq(&info->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1987
1988
1989
1990
  	if (lock && !(info->flags & VM_LOCKED)) {
  		if (!user_shm_lock(inode->i_size, user))
  			goto out_nomem;
  		info->flags |= VM_LOCKED;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
1991
  		mapping_set_unevictable(file->f_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1992
1993
1994
1995
  	}
  	if (!lock && (info->flags & VM_LOCKED) && user) {
  		user_shm_unlock(inode->i_size, user);
  		info->flags &= ~VM_LOCKED;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
1996
  		mapping_clear_unevictable(file->f_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1997
1998
  	}
  	retval = 0;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
1999

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2000
  out_nomem:
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
2001
  	spin_unlock_irq(&info->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2002
2003
  	return retval;
  }
9b83a6a85   Adrian Bunk   [PATCH] mm/{,tiny...
2004
  static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2005
2006
2007
  {
  	file_accessed(file);
  	vma->vm_ops = &shmem_vm_ops;
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
2008
  	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
2009
2010
2011
2012
  			((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
  			(vma->vm_end & HPAGE_PMD_MASK)) {
  		khugepaged_enter(vma, vma->vm_flags);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2013
2014
  	return 0;
  }
454abafe9   Dmitry Monakhov   ramfs: replace in...
2015
  static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
09208d150   Al Viro   shmem, ramfs: pro...
2016
  				     umode_t mode, dev_t dev, unsigned long flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2017
2018
2019
2020
  {
  	struct inode *inode;
  	struct shmem_inode_info *info;
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
5b04c6890   Pavel Emelyanov   shmem: factor out...
2021
2022
  	if (shmem_reserve_inode(sb))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2023
2024
2025
  
  	inode = new_inode(sb);
  	if (inode) {
85fe4025c   Christoph Hellwig   fs: do not assign...
2026
  		inode->i_ino = get_next_ino();
454abafe9   Dmitry Monakhov   ramfs: replace in...
2027
  		inode_init_owner(inode, dir, mode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2028
  		inode->i_blocks = 0;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2029
  		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2030
  		inode->i_generation = get_seconds();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2031
2032
2033
  		info = SHMEM_I(inode);
  		memset(info, 0, (char *)inode - (char *)info);
  		spin_lock_init(&info->lock);
40e041a2c   David Herrmann   shm: add sealing API
2034
  		info->seals = F_SEAL_SEAL;
0b0a0806b   Hugh Dickins   shmem: fix shared...
2035
  		info->flags = flags & VM_NORESERVE;
779750d20   Kirill A. Shutemov   shmem: split huge...
2036
  		INIT_LIST_HEAD(&info->shrinklist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2037
  		INIT_LIST_HEAD(&info->swaplist);
38f386574   Aristeu Rozanski   xattr: extract si...
2038
  		simple_xattrs_init(&info->xattrs);
72c04902d   Al Viro   Get "no acls for ...
2039
  		cache_no_acl(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2040
2041
2042
  
  		switch (mode & S_IFMT) {
  		default:
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2043
  			inode->i_op = &shmem_special_inode_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2044
2045
2046
  			init_special_inode(inode, mode, dev);
  			break;
  		case S_IFREG:
14fcc23fd   Hugh Dickins   tmpfs: fix kernel...
2047
  			inode->i_mapping->a_ops = &shmem_aops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2048
2049
  			inode->i_op = &shmem_inode_operations;
  			inode->i_fop = &shmem_file_operations;
71fe804b6   Lee Schermerhorn   mempolicy: use st...
2050
2051
  			mpol_shared_policy_init(&info->policy,
  						 shmem_get_sbmpol(sbinfo));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2052
2053
  			break;
  		case S_IFDIR:
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2054
  			inc_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
  			/* Some things misbehave if size == 0 on a directory */
  			inode->i_size = 2 * BOGO_DIRENT_SIZE;
  			inode->i_op = &shmem_dir_inode_operations;
  			inode->i_fop = &simple_dir_operations;
  			break;
  		case S_IFLNK:
  			/*
  			 * Must not load anything in the rbtree,
  			 * mpol_free_shared_policy will not be called.
  			 */
71fe804b6   Lee Schermerhorn   mempolicy: use st...
2065
  			mpol_shared_policy_init(&info->policy, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2066
2067
  			break;
  		}
5b04c6890   Pavel Emelyanov   shmem: factor out...
2068
2069
  	} else
  		shmem_free_inode(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2070
2071
  	return inode;
  }
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
2072
2073
  bool shmem_mapping(struct address_space *mapping)
  {
f0774d884   Sasha Levin   mm: shmem: check ...
2074
2075
  	if (!mapping->host)
  		return false;
97b713ba3   Christoph Hellwig   fs: kill BDI_CAP_...
2076
  	return mapping->host->i_sb->s_op == &shmem_ops;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
2077
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2078
  #ifdef CONFIG_TMPFS
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
2079
  static const struct inode_operations shmem_symlink_inode_operations;
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2080
  static const struct inode_operations shmem_short_symlink_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2081

6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2082
2083
2084
2085
2086
  #ifdef CONFIG_TMPFS_XATTR
  static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
  #else
  #define shmem_initxattrs NULL
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2087
  static int
800d15a53   Nick Piggin   implement simple ...
2088
2089
2090
  shmem_write_begin(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2091
  {
800d15a53   Nick Piggin   implement simple ...
2092
  	struct inode *inode = mapping->host;
40e041a2c   David Herrmann   shm: add sealing API
2093
  	struct shmem_inode_info *info = SHMEM_I(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2094
  	pgoff_t index = pos >> PAGE_SHIFT;
40e041a2c   David Herrmann   shm: add sealing API
2095
2096
2097
2098
2099
2100
2101
2102
  
  	/* i_mutex is held by caller */
  	if (unlikely(info->seals)) {
  		if (info->seals & F_SEAL_WRITE)
  			return -EPERM;
  		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
  			return -EPERM;
  	}
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2103
  	return shmem_getpage(inode, index, pagep, SGP_WRITE);
800d15a53   Nick Piggin   implement simple ...
2104
2105
2106
2107
2108
2109
2110
2111
  }
  
  static int
  shmem_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
  	struct inode *inode = mapping->host;
d3602444e   Hugh Dickins   shmem_getpage ret...
2112
2113
  	if (pos + copied > inode->i_size)
  		i_size_write(inode, pos + copied);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2114
  	if (!PageUptodate(page)) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
  		struct page *head = compound_head(page);
  		if (PageTransCompound(page)) {
  			int i;
  
  			for (i = 0; i < HPAGE_PMD_NR; i++) {
  				if (head + i == page)
  					continue;
  				clear_highpage(head + i);
  				flush_dcache_page(head + i);
  			}
  		}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2126
2127
  		if (copied < PAGE_SIZE) {
  			unsigned from = pos & (PAGE_SIZE - 1);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2128
  			zero_user_segments(page, 0, from,
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2129
  					from + copied, PAGE_SIZE);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2130
  		}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
2131
  		SetPageUptodate(head);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2132
  	}
800d15a53   Nick Piggin   implement simple ...
2133
  	set_page_dirty(page);
6746aff74   Wu Fengguang   HWPOISON: shmem: ...
2134
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2135
  	put_page(page);
800d15a53   Nick Piggin   implement simple ...
2136

800d15a53   Nick Piggin   implement simple ...
2137
  	return copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2138
  }
2ba5bbed0   Al Viro   shmem: switch to ...
2139
  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2140
  {
6e58e79db   Al Viro   introduce copy_pa...
2141
2142
  	struct file *file = iocb->ki_filp;
  	struct inode *inode = file_inode(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2143
  	struct address_space *mapping = inode->i_mapping;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2144
2145
  	pgoff_t index;
  	unsigned long offset;
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
2146
  	enum sgp_type sgp = SGP_READ;
f7c1d0742   Geert Uytterhoeven   mm: Initialize er...
2147
  	int error = 0;
cb66a7a1f   Al Viro   kill generic_segm...
2148
  	ssize_t retval = 0;
6e58e79db   Al Viro   introduce copy_pa...
2149
  	loff_t *ppos = &iocb->ki_pos;
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
2150
2151
2152
2153
2154
2155
  
  	/*
  	 * Might this read be for a stacking filesystem?  Then when reading
  	 * holes of a sparse file, we actually need to allocate those pages,
  	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
  	 */
777eda2c5   Al Viro   new helper: iter_...
2156
  	if (!iter_is_iovec(to))
75edd345e   Hugh Dickins   tmpfs: preliminar...
2157
  		sgp = SGP_CACHE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2158

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2159
2160
  	index = *ppos >> PAGE_SHIFT;
  	offset = *ppos & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2161
2162
2163
  
  	for (;;) {
  		struct page *page = NULL;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2164
2165
  		pgoff_t end_index;
  		unsigned long nr, ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2166
  		loff_t i_size = i_size_read(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2167
  		end_index = i_size >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2168
2169
2170
  		if (index > end_index)
  			break;
  		if (index == end_index) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2171
  			nr = i_size & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2172
2173
2174
  			if (nr <= offset)
  				break;
  		}
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2175
  		error = shmem_getpage(inode, index, &page, sgp);
6e58e79db   Al Viro   introduce copy_pa...
2176
2177
2178
  		if (error) {
  			if (error == -EINVAL)
  				error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2179
2180
  			break;
  		}
75edd345e   Hugh Dickins   tmpfs: preliminar...
2181
2182
2183
  		if (page) {
  			if (sgp == SGP_CACHE)
  				set_page_dirty(page);
d3602444e   Hugh Dickins   shmem_getpage ret...
2184
  			unlock_page(page);
75edd345e   Hugh Dickins   tmpfs: preliminar...
2185
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2186
2187
2188
  
  		/*
  		 * We must evaluate after, since reads (unlike writes)
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
2189
  		 * are called without i_mutex protection against truncate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2190
  		 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2191
  		nr = PAGE_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2192
  		i_size = i_size_read(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2193
  		end_index = i_size >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2194
  		if (index == end_index) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2195
  			nr = i_size & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2196
2197
  			if (nr <= offset) {
  				if (page)
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2198
  					put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
  				break;
  			}
  		}
  		nr -= offset;
  
  		if (page) {
  			/*
  			 * If users can be writing to this page using arbitrary
  			 * virtual addresses, take care about potential aliasing
  			 * before reading the page on the kernel side.
  			 */
  			if (mapping_writably_mapped(mapping))
  				flush_dcache_page(page);
  			/*
  			 * Mark the page accessed if we read the beginning.
  			 */
  			if (!offset)
  				mark_page_accessed(page);
b5810039a   Nick Piggin   [PATCH] core remo...
2217
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2218
  			page = ZERO_PAGE(0);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2219
  			get_page(page);
b5810039a   Nick Piggin   [PATCH] core remo...
2220
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2221
2222
2223
2224
  
  		/*
  		 * Ok, we have the page, and it's up-to-date, so
  		 * now we can copy it to user space...
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2225
  		 */
2ba5bbed0   Al Viro   shmem: switch to ...
2226
  		ret = copy_page_to_iter(page, offset, nr, to);
6e58e79db   Al Viro   introduce copy_pa...
2227
  		retval += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2228
  		offset += ret;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2229
2230
  		index += offset >> PAGE_SHIFT;
  		offset &= ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2231

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2232
  		put_page(page);
2ba5bbed0   Al Viro   shmem: switch to ...
2233
  		if (!iov_iter_count(to))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2234
  			break;
6e58e79db   Al Viro   introduce copy_pa...
2235
2236
2237
2238
  		if (ret < nr) {
  			error = -EFAULT;
  			break;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2239
2240
  		cond_resched();
  	}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2241
  	*ppos = ((loff_t) index << PAGE_SHIFT) + offset;
6e58e79db   Al Viro   introduce copy_pa...
2242
2243
  	file_accessed(file);
  	return retval ? retval : error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2244
  }
220f2ac91   Hugh Dickins   tmpfs: support SE...
2245
2246
2247
2248
  /*
   * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
   */
  static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
965c8e59c   Andrew Morton   lseek: the "whenc...
2249
  				    pgoff_t index, pgoff_t end, int whence)
220f2ac91   Hugh Dickins   tmpfs: support SE...
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
  {
  	struct page *page;
  	struct pagevec pvec;
  	pgoff_t indices[PAGEVEC_SIZE];
  	bool done = false;
  	int i;
  
  	pagevec_init(&pvec, 0);
  	pvec.nr = 1;		/* start small: we may be there already */
  	while (!done) {
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
2260
  		pvec.nr = find_get_entries(mapping, index,
220f2ac91   Hugh Dickins   tmpfs: support SE...
2261
2262
  					pvec.nr, pvec.pages, indices);
  		if (!pvec.nr) {
965c8e59c   Andrew Morton   lseek: the "whenc...
2263
  			if (whence == SEEK_DATA)
220f2ac91   Hugh Dickins   tmpfs: support SE...
2264
2265
2266
2267
2268
  				index = end;
  			break;
  		}
  		for (i = 0; i < pvec.nr; i++, index++) {
  			if (index < indices[i]) {
965c8e59c   Andrew Morton   lseek: the "whenc...
2269
  				if (whence == SEEK_HOLE) {
220f2ac91   Hugh Dickins   tmpfs: support SE...
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
  					done = true;
  					break;
  				}
  				index = indices[i];
  			}
  			page = pvec.pages[i];
  			if (page && !radix_tree_exceptional_entry(page)) {
  				if (!PageUptodate(page))
  					page = NULL;
  			}
  			if (index >= end ||
965c8e59c   Andrew Morton   lseek: the "whenc...
2281
2282
  			    (page && whence == SEEK_DATA) ||
  			    (!page && whence == SEEK_HOLE)) {
220f2ac91   Hugh Dickins   tmpfs: support SE...
2283
2284
2285
2286
  				done = true;
  				break;
  			}
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
2287
  		pagevec_remove_exceptionals(&pvec);
220f2ac91   Hugh Dickins   tmpfs: support SE...
2288
2289
2290
2291
2292
2293
  		pagevec_release(&pvec);
  		pvec.nr = PAGEVEC_SIZE;
  		cond_resched();
  	}
  	return index;
  }
965c8e59c   Andrew Morton   lseek: the "whenc...
2294
  static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
220f2ac91   Hugh Dickins   tmpfs: support SE...
2295
2296
2297
2298
2299
  {
  	struct address_space *mapping = file->f_mapping;
  	struct inode *inode = mapping->host;
  	pgoff_t start, end;
  	loff_t new_offset;
965c8e59c   Andrew Morton   lseek: the "whenc...
2300
2301
  	if (whence != SEEK_DATA && whence != SEEK_HOLE)
  		return generic_file_llseek_size(file, offset, whence,
220f2ac91   Hugh Dickins   tmpfs: support SE...
2302
  					MAX_LFS_FILESIZE, i_size_read(inode));
5955102c9   Al Viro   wrappers for ->i_...
2303
  	inode_lock(inode);
220f2ac91   Hugh Dickins   tmpfs: support SE...
2304
2305
2306
2307
2308
2309
2310
  	/* We're holding i_mutex so we can access i_size directly */
  
  	if (offset < 0)
  		offset = -EINVAL;
  	else if (offset >= inode->i_size)
  		offset = -ENXIO;
  	else {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2311
2312
  		start = offset >> PAGE_SHIFT;
  		end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
965c8e59c   Andrew Morton   lseek: the "whenc...
2313
  		new_offset = shmem_seek_hole_data(mapping, start, end, whence);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2314
  		new_offset <<= PAGE_SHIFT;
220f2ac91   Hugh Dickins   tmpfs: support SE...
2315
2316
2317
  		if (new_offset > offset) {
  			if (new_offset < inode->i_size)
  				offset = new_offset;
965c8e59c   Andrew Morton   lseek: the "whenc...
2318
  			else if (whence == SEEK_DATA)
220f2ac91   Hugh Dickins   tmpfs: support SE...
2319
2320
2321
2322
2323
  				offset = -ENXIO;
  			else
  				offset = inode->i_size;
  		}
  	}
387aae6fd   Hugh Dickins   tmpfs: fix SEEK_D...
2324
2325
  	if (offset >= 0)
  		offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
5955102c9   Al Viro   wrappers for ->i_...
2326
  	inode_unlock(inode);
220f2ac91   Hugh Dickins   tmpfs: support SE...
2327
2328
  	return offset;
  }
05f65b5c7   David Herrmann   shm: wait for pin...
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
  /*
   * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
   * so reuse a tag which we firmly believe is never set or cleared on shmem.
   */
  #define SHMEM_TAG_PINNED        PAGECACHE_TAG_TOWRITE
  #define LAST_SCAN               4       /* about 150ms max */
  
  static void shmem_tag_pins(struct address_space *mapping)
  {
  	struct radix_tree_iter iter;
  	void **slot;
  	pgoff_t start;
  	struct page *page;
  
  	lru_add_drain();
  	start = 0;
  	rcu_read_lock();
05f65b5c7   David Herrmann   shm: wait for pin...
2346
2347
2348
  	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
  		page = radix_tree_deref_slot(slot);
  		if (!page || radix_tree_exception(page)) {
2cf938aae   Matthew Wilcox   mm: use radix_tre...
2349
2350
2351
2352
  			if (radix_tree_deref_retry(page)) {
  				slot = radix_tree_iter_retry(&iter);
  				continue;
  			}
05f65b5c7   David Herrmann   shm: wait for pin...
2353
2354
2355
2356
2357
2358
2359
2360
2361
  		} else if (page_count(page) - page_mapcount(page) > 1) {
  			spin_lock_irq(&mapping->tree_lock);
  			radix_tree_tag_set(&mapping->page_tree, iter.index,
  					   SHMEM_TAG_PINNED);
  			spin_unlock_irq(&mapping->tree_lock);
  		}
  
  		if (need_resched()) {
  			cond_resched_rcu();
7165092fe   Matthew Wilcox   radix-tree,shmem:...
2362
  			slot = radix_tree_iter_next(&iter);
05f65b5c7   David Herrmann   shm: wait for pin...
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
  		}
  	}
  	rcu_read_unlock();
  }
  
  /*
   * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
   * via get_user_pages(), drivers might have some pending I/O without any active
   * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
   * and see whether it has an elevated ref-count. If so, we tag them and wait for
   * them to be dropped.
   * The caller must guarantee that no new user will acquire writable references
   * to those pages to avoid races.
   */
40e041a2c   David Herrmann   shm: add sealing API
2377
2378
  static int shmem_wait_for_pins(struct address_space *mapping)
  {
05f65b5c7   David Herrmann   shm: wait for pin...
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
  	struct radix_tree_iter iter;
  	void **slot;
  	pgoff_t start;
  	struct page *page;
  	int error, scan;
  
  	shmem_tag_pins(mapping);
  
  	error = 0;
  	for (scan = 0; scan <= LAST_SCAN; scan++) {
  		if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
  			break;
  
  		if (!scan)
  			lru_add_drain_all();
  		else if (schedule_timeout_killable((HZ << scan) / 200))
  			scan = LAST_SCAN;
  
  		start = 0;
  		rcu_read_lock();
05f65b5c7   David Herrmann   shm: wait for pin...
2399
2400
2401
2402
2403
  		radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
  					   start, SHMEM_TAG_PINNED) {
  
  			page = radix_tree_deref_slot(slot);
  			if (radix_tree_exception(page)) {
2cf938aae   Matthew Wilcox   mm: use radix_tre...
2404
2405
2406
2407
  				if (radix_tree_deref_retry(page)) {
  					slot = radix_tree_iter_retry(&iter);
  					continue;
  				}
05f65b5c7   David Herrmann   shm: wait for pin...
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
  
  				page = NULL;
  			}
  
  			if (page &&
  			    page_count(page) - page_mapcount(page) != 1) {
  				if (scan < LAST_SCAN)
  					goto continue_resched;
  
  				/*
  				 * On the last scan, we clean up all those tags
  				 * we inserted; but make a note that we still
  				 * found pages pinned.
  				 */
  				error = -EBUSY;
  			}
  
  			spin_lock_irq(&mapping->tree_lock);
  			radix_tree_tag_clear(&mapping->page_tree,
  					     iter.index, SHMEM_TAG_PINNED);
  			spin_unlock_irq(&mapping->tree_lock);
  continue_resched:
  			if (need_resched()) {
  				cond_resched_rcu();
7165092fe   Matthew Wilcox   radix-tree,shmem:...
2432
  				slot = radix_tree_iter_next(&iter);
05f65b5c7   David Herrmann   shm: wait for pin...
2433
2434
2435
2436
2437
2438
  			}
  		}
  		rcu_read_unlock();
  	}
  
  	return error;
40e041a2c   David Herrmann   shm: add sealing API
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
  }
  
  #define F_ALL_SEALS (F_SEAL_SEAL | \
  		     F_SEAL_SHRINK | \
  		     F_SEAL_GROW | \
  		     F_SEAL_WRITE)
  
  int shmem_add_seals(struct file *file, unsigned int seals)
  {
  	struct inode *inode = file_inode(file);
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	int error;
  
  	/*
  	 * SEALING
  	 * Sealing allows multiple parties to share a shmem-file but restrict
  	 * access to a specific subset of file operations. Seals can only be
  	 * added, but never removed. This way, mutually untrusted parties can
  	 * share common memory regions with a well-defined policy. A malicious
  	 * peer can thus never perform unwanted operations on a shared object.
  	 *
  	 * Seals are only supported on special shmem-files and always affect
  	 * the whole underlying inode. Once a seal is set, it may prevent some
  	 * kinds of access to the file. Currently, the following seals are
  	 * defined:
  	 *   SEAL_SEAL: Prevent further seals from being set on this file
  	 *   SEAL_SHRINK: Prevent the file from shrinking
  	 *   SEAL_GROW: Prevent the file from growing
  	 *   SEAL_WRITE: Prevent write access to the file
  	 *
  	 * As we don't require any trust relationship between two parties, we
  	 * must prevent seals from being removed. Therefore, sealing a file
  	 * only adds a given set of seals to the file, it never touches
  	 * existing seals. Furthermore, the "setting seals"-operation can be
  	 * sealed itself, which basically prevents any further seal from being
  	 * added.
  	 *
  	 * Semantics of sealing are only defined on volatile files. Only
  	 * anonymous shmem files support sealing. More importantly, seals are
  	 * never written to disk. Therefore, there's no plan to support it on
  	 * other file types.
  	 */
  
  	if (file->f_op != &shmem_file_operations)
  		return -EINVAL;
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EPERM;
  	if (seals & ~(unsigned int)F_ALL_SEALS)
  		return -EINVAL;
5955102c9   Al Viro   wrappers for ->i_...
2488
  	inode_lock(inode);
40e041a2c   David Herrmann   shm: add sealing API
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
  
  	if (info->seals & F_SEAL_SEAL) {
  		error = -EPERM;
  		goto unlock;
  	}
  
  	if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
  		error = mapping_deny_writable(file->f_mapping);
  		if (error)
  			goto unlock;
  
  		error = shmem_wait_for_pins(file->f_mapping);
  		if (error) {
  			mapping_allow_writable(file->f_mapping);
  			goto unlock;
  		}
  	}
  
  	info->seals |= seals;
  	error = 0;
  
  unlock:
5955102c9   Al Viro   wrappers for ->i_...
2511
  	inode_unlock(inode);
40e041a2c   David Herrmann   shm: add sealing API
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
  	return error;
  }
  EXPORT_SYMBOL_GPL(shmem_add_seals);
  
  int shmem_get_seals(struct file *file)
  {
  	if (file->f_op != &shmem_file_operations)
  		return -EINVAL;
  
  	return SHMEM_I(file_inode(file))->seals;
  }
  EXPORT_SYMBOL_GPL(shmem_get_seals);
  
  long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
  {
  	long error;
  
  	switch (cmd) {
  	case F_ADD_SEALS:
  		/* disallow upper 32bit */
  		if (arg > UINT_MAX)
  			return -EINVAL;
  
  		error = shmem_add_seals(file, arg);
  		break;
  	case F_GET_SEALS:
  		error = shmem_get_seals(file);
  		break;
  	default:
  		error = -EINVAL;
  		break;
  	}
  
  	return error;
  }
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2547
2548
2549
  static long shmem_fallocate(struct file *file, int mode, loff_t offset,
  							 loff_t len)
  {
496ad9aa8   Al Viro   new helper: file_...
2550
  	struct inode *inode = file_inode(file);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2551
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
40e041a2c   David Herrmann   shm: add sealing API
2552
  	struct shmem_inode_info *info = SHMEM_I(inode);
1aac14003   Hugh Dickins   tmpfs: quit when ...
2553
  	struct shmem_falloc shmem_falloc;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2554
2555
  	pgoff_t start, index, end;
  	int error;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2556

13ace4d0d   Hugh Dickins   tmpfs: ZERO_RANGE...
2557
2558
  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
  		return -EOPNOTSUPP;
5955102c9   Al Viro   wrappers for ->i_...
2559
  	inode_lock(inode);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2560
2561
2562
2563
2564
  
  	if (mode & FALLOC_FL_PUNCH_HOLE) {
  		struct address_space *mapping = file->f_mapping;
  		loff_t unmap_start = round_up(offset, PAGE_SIZE);
  		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
8e205f779   Hugh Dickins   shmem: fix faulti...
2565
  		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2566

40e041a2c   David Herrmann   shm: add sealing API
2567
2568
2569
2570
2571
  		/* protected by i_mutex */
  		if (info->seals & F_SEAL_WRITE) {
  			error = -EPERM;
  			goto out;
  		}
8e205f779   Hugh Dickins   shmem: fix faulti...
2572
  		shmem_falloc.waitq = &shmem_falloc_waitq;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
2573
2574
2575
2576
2577
  		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
  		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
  		spin_lock(&inode->i_lock);
  		inode->i_private = &shmem_falloc;
  		spin_unlock(&inode->i_lock);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2578
2579
2580
2581
2582
  		if ((u64)unmap_end > (u64)unmap_start)
  			unmap_mapping_range(mapping, unmap_start,
  					    1 + unmap_end - unmap_start, 0);
  		shmem_truncate_range(inode, offset, offset + len - 1);
  		/* No need to unmap again: hole-punching leaves COWed pages */
8e205f779   Hugh Dickins   shmem: fix faulti...
2583
2584
2585
2586
  
  		spin_lock(&inode->i_lock);
  		inode->i_private = NULL;
  		wake_up_all(&shmem_falloc_waitq);
10d20bd25   Linus Torvalds   shmem: fix shm fa...
2587
  		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list));
8e205f779   Hugh Dickins   shmem: fix faulti...
2588
  		spin_unlock(&inode->i_lock);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2589
  		error = 0;
8e205f779   Hugh Dickins   shmem: fix faulti...
2590
  		goto out;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2591
2592
2593
2594
2595
2596
  	}
  
  	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
  	error = inode_newsize_ok(inode, offset + len);
  	if (error)
  		goto out;
40e041a2c   David Herrmann   shm: add sealing API
2597
2598
2599
2600
  	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
  		error = -EPERM;
  		goto out;
  	}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2601
2602
  	start = offset >> PAGE_SHIFT;
  	end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2603
2604
2605
2606
  	/* Try to avoid a swapstorm if len is impossible to satisfy */
  	if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
  		error = -ENOSPC;
  		goto out;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2607
  	}
8e205f779   Hugh Dickins   shmem: fix faulti...
2608
  	shmem_falloc.waitq = NULL;
1aac14003   Hugh Dickins   tmpfs: quit when ...
2609
2610
2611
2612
2613
2614
2615
  	shmem_falloc.start = start;
  	shmem_falloc.next  = start;
  	shmem_falloc.nr_falloced = 0;
  	shmem_falloc.nr_unswapped = 0;
  	spin_lock(&inode->i_lock);
  	inode->i_private = &shmem_falloc;
  	spin_unlock(&inode->i_lock);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2616
2617
2618
2619
2620
2621
2622
2623
2624
  	for (index = start; index < end; index++) {
  		struct page *page;
  
  		/*
  		 * Good, the fallocate(2) manpage permits EINTR: we may have
  		 * been interrupted because we are using up too much memory.
  		 */
  		if (signal_pending(current))
  			error = -EINTR;
1aac14003   Hugh Dickins   tmpfs: quit when ...
2625
2626
  		else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
  			error = -ENOMEM;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2627
  		else
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2628
  			error = shmem_getpage(inode, index, &page, SGP_FALLOC);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2629
  		if (error) {
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
2630
  			/* Remove the !PageUptodate pages we added */
7f5565670   Hugh Dickins   tmpfs: fix regres...
2631
2632
2633
2634
2635
  			if (index > start) {
  				shmem_undo_range(inode,
  				    (loff_t)start << PAGE_SHIFT,
  				    ((loff_t)index << PAGE_SHIFT) - 1, true);
  			}
1aac14003   Hugh Dickins   tmpfs: quit when ...
2636
  			goto undone;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2637
  		}
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2638
  		/*
1aac14003   Hugh Dickins   tmpfs: quit when ...
2639
2640
2641
2642
2643
2644
2645
2646
  		 * Inform shmem_writepage() how far we have reached.
  		 * No need for lock or barrier: we have the page lock.
  		 */
  		shmem_falloc.next++;
  		if (!PageUptodate(page))
  			shmem_falloc.nr_falloced++;
  
  		/*
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
2647
2648
2649
  		 * If !PageUptodate, leave it that way so that freeable pages
  		 * can be recognized if we need to rollback on error later.
  		 * But set_page_dirty so that memory pressure will swap rather
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2650
2651
2652
2653
2654
  		 * than free the pages we are allocating (and SGP_CACHE pages
  		 * might still be clean: we now need to mark those dirty too).
  		 */
  		set_page_dirty(page);
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2655
  		put_page(page);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2656
2657
2658
2659
2660
  		cond_resched();
  	}
  
  	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
  		i_size_write(inode, offset + len);
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2661
  	inode->i_ctime = current_time(inode);
1aac14003   Hugh Dickins   tmpfs: quit when ...
2662
2663
2664
2665
  undone:
  	spin_lock(&inode->i_lock);
  	inode->i_private = NULL;
  	spin_unlock(&inode->i_lock);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2666
  out:
5955102c9   Al Viro   wrappers for ->i_...
2667
  	inode_unlock(inode);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2668
2669
  	return error;
  }
726c33422   David Howells   [PATCH] VFS: Perm...
2670
  static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2671
  {
726c33422   David Howells   [PATCH] VFS: Perm...
2672
  	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2673
2674
  
  	buf->f_type = TMPFS_MAGIC;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2675
  	buf->f_bsize = PAGE_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2676
  	buf->f_namelen = NAME_MAX;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2677
  	if (sbinfo->max_blocks) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2678
  		buf->f_blocks = sbinfo->max_blocks;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2679
2680
2681
  		buf->f_bavail =
  		buf->f_bfree  = sbinfo->max_blocks -
  				percpu_counter_sum(&sbinfo->used_blocks);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2682
2683
  	}
  	if (sbinfo->max_inodes) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2684
2685
  		buf->f_files = sbinfo->max_inodes;
  		buf->f_ffree = sbinfo->free_inodes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2686
2687
2688
2689
2690
2691
2692
2693
2694
  	}
  	/* else leave those fields 0 like simple_statfs */
  	return 0;
  }
  
  /*
   * File creation. Allocate an inode, and we're done..
   */
  static int
1a67aafb5   Al Viro   switch ->mknod() ...
2695
  shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2696
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
2697
  	struct inode *inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2698
  	int error = -ENOSPC;
454abafe9   Dmitry Monakhov   ramfs: replace in...
2699
  	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2700
  	if (inode) {
feda821e7   Christoph Hellwig   fs: remove generi...
2701
2702
2703
  		error = simple_acl_create(dir, inode);
  		if (error)
  			goto out_iput;
2a7dba391   Eric Paris   fs/vfs/security: ...
2704
  		error = security_inode_init_security(inode, dir,
9d8f13ba3   Mimi Zohar   security: new sec...
2705
  						     &dentry->d_name,
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2706
  						     shmem_initxattrs, NULL);
feda821e7   Christoph Hellwig   fs: remove generi...
2707
2708
  		if (error && error != -EOPNOTSUPP)
  			goto out_iput;
37ec43cdc   Mimi Zohar   evm: calculate HM...
2709

718deb6b6   Al Viro   Fix breakage in s...
2710
  		error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2711
  		dir->i_size += BOGO_DIRENT_SIZE;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2712
  		dir->i_ctime = dir->i_mtime = current_time(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2713
2714
  		d_instantiate(dentry, inode);
  		dget(dentry); /* Extra count - pin the dentry in core */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2715
2716
  	}
  	return error;
feda821e7   Christoph Hellwig   fs: remove generi...
2717
2718
2719
  out_iput:
  	iput(inode);
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2720
  }
60545d0d4   Al Viro   [O_TMPFILE] it's ...
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
  static int
  shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
  {
  	struct inode *inode;
  	int error = -ENOSPC;
  
  	inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
  	if (inode) {
  		error = security_inode_init_security(inode, dir,
  						     NULL,
  						     shmem_initxattrs, NULL);
feda821e7   Christoph Hellwig   fs: remove generi...
2732
2733
2734
2735
2736
  		if (error && error != -EOPNOTSUPP)
  			goto out_iput;
  		error = simple_acl_create(dir, inode);
  		if (error)
  			goto out_iput;
60545d0d4   Al Viro   [O_TMPFILE] it's ...
2737
2738
2739
  		d_tmpfile(dentry, inode);
  	}
  	return error;
feda821e7   Christoph Hellwig   fs: remove generi...
2740
2741
2742
  out_iput:
  	iput(inode);
  	return error;
60545d0d4   Al Viro   [O_TMPFILE] it's ...
2743
  }
18bb1db3e   Al Viro   switch vfs_mkdir(...
2744
  static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2745
2746
2747
2748
2749
  {
  	int error;
  
  	if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
  		return error;
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2750
  	inc_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2751
2752
  	return 0;
  }
4acdaf27e   Al Viro   switch ->create()...
2753
  static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ebfc3b49a   Al Viro   don't pass nameid...
2754
  		bool excl)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2755
2756
2757
2758
2759
2760
2761
2762
2763
  {
  	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
  }
  
  /*
   * Link a file..
   */
  static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2764
  	struct inode *inode = d_inode(old_dentry);
5b04c6890   Pavel Emelyanov   shmem: factor out...
2765
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2766
2767
2768
2769
2770
2771
  
  	/*
  	 * No ordinary (disk based) filesystem counts links as inodes;
  	 * but each new link needs a new dentry, pinning lowmem, and
  	 * tmpfs dentries cannot be pruned until they are unlinked.
  	 */
5b04c6890   Pavel Emelyanov   shmem: factor out...
2772
2773
2774
  	ret = shmem_reserve_inode(inode->i_sb);
  	if (ret)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2775
2776
  
  	dir->i_size += BOGO_DIRENT_SIZE;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2777
  	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2778
  	inc_nlink(inode);
7de9c6ee3   Al Viro   new helper: ihold()
2779
  	ihold(inode);	/* New dentry reference */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2780
2781
  	dget(dentry);		/* Extra pinning count for the created dentry */
  	d_instantiate(dentry, inode);
5b04c6890   Pavel Emelyanov   shmem: factor out...
2782
2783
  out:
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2784
2785
2786
2787
  }
  
  static int shmem_unlink(struct inode *dir, struct dentry *dentry)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2788
  	struct inode *inode = d_inode(dentry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2789

5b04c6890   Pavel Emelyanov   shmem: factor out...
2790
2791
  	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
  		shmem_free_inode(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2792
2793
  
  	dir->i_size -= BOGO_DIRENT_SIZE;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2794
  	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2795
  	drop_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2796
2797
2798
2799
2800
2801
2802
2803
  	dput(dentry);	/* Undo the count from "create" - this does all the work */
  	return 0;
  }
  
  static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
  {
  	if (!simple_empty(dentry))
  		return -ENOTEMPTY;
75c3cfa85   David Howells   VFS: assorted wei...
2804
  	drop_nlink(d_inode(dentry));
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2805
  	drop_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2806
2807
  	return shmem_unlink(dir, dentry);
  }
37456771c   Miklos Szeredi   shmem: support RE...
2808
2809
  static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
  {
e36cb0b89   David Howells   VFS: (Scripted) C...
2810
2811
  	bool old_is_dir = d_is_dir(old_dentry);
  	bool new_is_dir = d_is_dir(new_dentry);
37456771c   Miklos Szeredi   shmem: support RE...
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
  
  	if (old_dir != new_dir && old_is_dir != new_is_dir) {
  		if (old_is_dir) {
  			drop_nlink(old_dir);
  			inc_nlink(new_dir);
  		} else {
  			drop_nlink(new_dir);
  			inc_nlink(old_dir);
  		}
  	}
  	old_dir->i_ctime = old_dir->i_mtime =
  	new_dir->i_ctime = new_dir->i_mtime =
75c3cfa85   David Howells   VFS: assorted wei...
2824
  	d_inode(old_dentry)->i_ctime =
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2825
  	d_inode(new_dentry)->i_ctime = current_time(old_dir);
37456771c   Miklos Szeredi   shmem: support RE...
2826
2827
2828
  
  	return 0;
  }
46fdb794e   Miklos Szeredi   shmem: support RE...
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
  static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
  {
  	struct dentry *whiteout;
  	int error;
  
  	whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
  	if (!whiteout)
  		return -ENOMEM;
  
  	error = shmem_mknod(old_dir, whiteout,
  			    S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
  	dput(whiteout);
  	if (error)
  		return error;
  
  	/*
  	 * Cheat and hash the whiteout while the old dentry is still in
  	 * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
  	 *
  	 * d_lookup() will consistently find one of them at this point,
  	 * not sure which one, but that isn't even important.
  	 */
  	d_rehash(whiteout);
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2854
2855
2856
2857
2858
2859
  /*
   * The VFS layer already does all the dentry stuff for rename,
   * we just have to decrement the usage count for the target if
   * it exists so that the VFS layer correctly free's it when it
   * gets overwritten.
   */
3b69ff51d   Miklos Szeredi   shmem: support RE...
2860
  static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2861
  {
75c3cfa85   David Howells   VFS: assorted wei...
2862
  	struct inode *inode = d_inode(old_dentry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2863
  	int they_are_dirs = S_ISDIR(inode->i_mode);
46fdb794e   Miklos Szeredi   shmem: support RE...
2864
  	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
3b69ff51d   Miklos Szeredi   shmem: support RE...
2865
  		return -EINVAL;
37456771c   Miklos Szeredi   shmem: support RE...
2866
2867
  	if (flags & RENAME_EXCHANGE)
  		return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2868
2869
  	if (!simple_empty(new_dentry))
  		return -ENOTEMPTY;
46fdb794e   Miklos Szeredi   shmem: support RE...
2870
2871
2872
2873
2874
2875
2876
  	if (flags & RENAME_WHITEOUT) {
  		int error;
  
  		error = shmem_whiteout(old_dir, old_dentry);
  		if (error)
  			return error;
  	}
75c3cfa85   David Howells   VFS: assorted wei...
2877
  	if (d_really_is_positive(new_dentry)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2878
  		(void) shmem_unlink(new_dir, new_dentry);
b928095b0   Miklos Szeredi   shmem: fix nlink ...
2879
  		if (they_are_dirs) {
75c3cfa85   David Howells   VFS: assorted wei...
2880
  			drop_nlink(d_inode(new_dentry));
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2881
  			drop_nlink(old_dir);
b928095b0   Miklos Szeredi   shmem: fix nlink ...
2882
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2883
  	} else if (they_are_dirs) {
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2884
  		drop_nlink(old_dir);
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2885
  		inc_nlink(new_dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2886
2887
2888
2889
2890
2891
  	}
  
  	old_dir->i_size -= BOGO_DIRENT_SIZE;
  	new_dir->i_size += BOGO_DIRENT_SIZE;
  	old_dir->i_ctime = old_dir->i_mtime =
  	new_dir->i_ctime = new_dir->i_mtime =
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2892
  	inode->i_ctime = current_time(old_dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2893
2894
2895
2896
2897
2898
2899
2900
  	return 0;
  }
  
  static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
  {
  	int error;
  	int len;
  	struct inode *inode;
9276aad6c   Hugh Dickins   tmpfs: remove_shm...
2901
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2902
2903
2904
  	struct shmem_inode_info *info;
  
  	len = strlen(symname) + 1;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2905
  	if (len > PAGE_SIZE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2906
  		return -ENAMETOOLONG;
454abafe9   Dmitry Monakhov   ramfs: replace in...
2907
  	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2908
2909
  	if (!inode)
  		return -ENOSPC;
9d8f13ba3   Mimi Zohar   security: new sec...
2910
  	error = security_inode_init_security(inode, dir, &dentry->d_name,
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2911
  					     shmem_initxattrs, NULL);
570bc1c2e   Stephen Smalley   [PATCH] tmpfs: En...
2912
2913
2914
2915
2916
2917
2918
  	if (error) {
  		if (error != -EOPNOTSUPP) {
  			iput(inode);
  			return error;
  		}
  		error = 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2919
2920
  	info = SHMEM_I(inode);
  	inode->i_size = len-1;
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2921
  	if (len <= SHORT_SYMLINK_LEN) {
3ed47db34   Al Viro   make sure that fr...
2922
2923
  		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
  		if (!inode->i_link) {
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2924
2925
2926
2927
  			iput(inode);
  			return -ENOMEM;
  		}
  		inode->i_op = &shmem_short_symlink_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2928
  	} else {
e8ecde25f   Al Viro   Make sure that hi...
2929
  		inode_nohighmem(inode);
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2930
  		error = shmem_getpage(inode, 0, &page, SGP_WRITE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2931
2932
2933
2934
  		if (error) {
  			iput(inode);
  			return error;
  		}
14fcc23fd   Hugh Dickins   tmpfs: fix kernel...
2935
  		inode->i_mapping->a_ops = &shmem_aops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2936
  		inode->i_op = &shmem_symlink_inode_operations;
21fc61c73   Al Viro   don't put symlink...
2937
  		memcpy(page_address(page), symname, len);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2938
  		SetPageUptodate(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2939
  		set_page_dirty(page);
6746aff74   Wu Fengguang   HWPOISON: shmem: ...
2940
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2941
  		put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2942
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2943
  	dir->i_size += BOGO_DIRENT_SIZE;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2944
  	dir->i_ctime = dir->i_mtime = current_time(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2945
2946
2947
2948
  	d_instantiate(dentry, inode);
  	dget(dentry);
  	return 0;
  }
fceef393a   Al Viro   switch ->get_link...
2949
  static void shmem_put_link(void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2950
  {
fceef393a   Al Viro   switch ->get_link...
2951
2952
  	mark_page_accessed(arg);
  	put_page(arg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2953
  }
6b2553918   Al Viro   replace ->follow_...
2954
  static const char *shmem_get_link(struct dentry *dentry,
fceef393a   Al Viro   switch ->get_link...
2955
2956
  				  struct inode *inode,
  				  struct delayed_call *done)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2957
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2958
  	struct page *page = NULL;
6b2553918   Al Viro   replace ->follow_...
2959
  	int error;
6a6c99049   Al Viro   teach shmem_get_l...
2960
2961
2962
2963
2964
2965
2966
2967
2968
  	if (!dentry) {
  		page = find_get_page(inode->i_mapping, 0);
  		if (!page)
  			return ERR_PTR(-ECHILD);
  		if (!PageUptodate(page)) {
  			put_page(page);
  			return ERR_PTR(-ECHILD);
  		}
  	} else {
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2969
  		error = shmem_getpage(inode, 0, &page, SGP_READ);
6a6c99049   Al Viro   teach shmem_get_l...
2970
2971
2972
2973
  		if (error)
  			return ERR_PTR(error);
  		unlock_page(page);
  	}
fceef393a   Al Viro   switch ->get_link...
2974
  	set_delayed_call(done, shmem_put_link, page);
21fc61c73   Al Viro   don't put symlink...
2975
  	return page_address(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2976
  }
b09e0fa4b   Eric Paris   tmpfs: implement ...
2977
  #ifdef CONFIG_TMPFS_XATTR
467118102   Randy Dunlap   mm/shmem and tiny...
2978
  /*
b09e0fa4b   Eric Paris   tmpfs: implement ...
2979
2980
   * Superblocks without xattr inode operations may get some security.* xattr
   * support from the LSM "for free". As soon as we have any other xattrs
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2981
2982
2983
   * like ACLs, we also need to implement the security.* handlers at
   * filesystem level, though.
   */
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2984
  /*
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2985
2986
2987
2988
2989
2990
2991
2992
   * Callback for security_inode_init_security() for acquiring xattrs.
   */
  static int shmem_initxattrs(struct inode *inode,
  			    const struct xattr *xattr_array,
  			    void *fs_info)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	const struct xattr *xattr;
38f386574   Aristeu Rozanski   xattr: extract si...
2993
  	struct simple_xattr *new_xattr;
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2994
2995
2996
  	size_t len;
  
  	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
38f386574   Aristeu Rozanski   xattr: extract si...
2997
  		new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
  		if (!new_xattr)
  			return -ENOMEM;
  
  		len = strlen(xattr->name) + 1;
  		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
  					  GFP_KERNEL);
  		if (!new_xattr->name) {
  			kfree(new_xattr);
  			return -ENOMEM;
  		}
  
  		memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
  		       XATTR_SECURITY_PREFIX_LEN);
  		memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
  		       xattr->name, len);
38f386574   Aristeu Rozanski   xattr: extract si...
3013
  		simple_xattr_list_add(&info->xattrs, new_xattr);
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
3014
3015
3016
3017
  	}
  
  	return 0;
  }
aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3018
  static int shmem_xattr_handler_get(const struct xattr_handler *handler,
b296821a7   Al Viro   xattr_handler: pa...
3019
3020
  				   struct dentry *unused, struct inode *inode,
  				   const char *name, void *buffer, size_t size)
b09e0fa4b   Eric Paris   tmpfs: implement ...
3021
  {
b296821a7   Al Viro   xattr_handler: pa...
3022
  	struct shmem_inode_info *info = SHMEM_I(inode);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3023

aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3024
  	name = xattr_full_name(handler, name);
38f386574   Aristeu Rozanski   xattr: extract si...
3025
  	return simple_xattr_get(&info->xattrs, name, buffer, size);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3026
  }
aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3027
  static int shmem_xattr_handler_set(const struct xattr_handler *handler,
593012268   Al Viro   switch xattr_hand...
3028
3029
3030
  				   struct dentry *unused, struct inode *inode,
  				   const char *name, const void *value,
  				   size_t size, int flags)
b09e0fa4b   Eric Paris   tmpfs: implement ...
3031
  {
593012268   Al Viro   switch xattr_hand...
3032
  	struct shmem_inode_info *info = SHMEM_I(inode);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3033

aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3034
  	name = xattr_full_name(handler, name);
38f386574   Aristeu Rozanski   xattr: extract si...
3035
  	return simple_xattr_set(&info->xattrs, name, value, size, flags);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3036
  }
aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3037
3038
3039
3040
3041
  static const struct xattr_handler shmem_security_xattr_handler = {
  	.prefix = XATTR_SECURITY_PREFIX,
  	.get = shmem_xattr_handler_get,
  	.set = shmem_xattr_handler_set,
  };
b09e0fa4b   Eric Paris   tmpfs: implement ...
3042

aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3043
3044
3045
3046
3047
  static const struct xattr_handler shmem_trusted_xattr_handler = {
  	.prefix = XATTR_TRUSTED_PREFIX,
  	.get = shmem_xattr_handler_get,
  	.set = shmem_xattr_handler_set,
  };
b09e0fa4b   Eric Paris   tmpfs: implement ...
3048

aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3049
3050
3051
3052
3053
3054
3055
3056
3057
  static const struct xattr_handler *shmem_xattr_handlers[] = {
  #ifdef CONFIG_TMPFS_POSIX_ACL
  	&posix_acl_access_xattr_handler,
  	&posix_acl_default_xattr_handler,
  #endif
  	&shmem_security_xattr_handler,
  	&shmem_trusted_xattr_handler,
  	NULL
  };
b09e0fa4b   Eric Paris   tmpfs: implement ...
3058
3059
3060
  
  static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
75c3cfa85   David Howells   VFS: assorted wei...
3061
  	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
786534b92   Andreas Gruenbacher   tmpfs: listxattr ...
3062
  	return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3063
3064
  }
  #endif /* CONFIG_TMPFS_XATTR */
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
3065
  static const struct inode_operations shmem_short_symlink_operations = {
b09e0fa4b   Eric Paris   tmpfs: implement ...
3066
  	.readlink	= generic_readlink,
6b2553918   Al Viro   replace ->follow_...
3067
  	.get_link	= simple_get_link,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3068
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3069
  	.listxattr	= shmem_listxattr,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3070
3071
3072
3073
3074
  #endif
  };
  
  static const struct inode_operations shmem_symlink_inode_operations = {
  	.readlink	= generic_readlink,
6b2553918   Al Viro   replace ->follow_...
3075
  	.get_link	= shmem_get_link,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3076
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3077
  	.listxattr	= shmem_listxattr,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3078
  #endif
b09e0fa4b   Eric Paris   tmpfs: implement ...
3079
  };
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3080

91828a405   David M. Grimes   [PATCH] knfsd: ad...
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
  static struct dentry *shmem_get_parent(struct dentry *child)
  {
  	return ERR_PTR(-ESTALE);
  }
  
  static int shmem_match(struct inode *ino, void *vfh)
  {
  	__u32 *fh = vfh;
  	__u64 inum = fh[2];
  	inum = (inum << 32) | fh[1];
  	return ino->i_ino == inum && fh[0] == ino->i_generation;
  }
480b116c9   Christoph Hellwig   shmem: new export...
3093
3094
  static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
  		struct fid *fid, int fh_len, int fh_type)
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3095
  {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3096
  	struct inode *inode;
480b116c9   Christoph Hellwig   shmem: new export...
3097
  	struct dentry *dentry = NULL;
35c2a7f49   Hugh Dickins   tmpfs,ceph,gfs2,i...
3098
  	u64 inum;
480b116c9   Christoph Hellwig   shmem: new export...
3099
3100
3101
  
  	if (fh_len < 3)
  		return NULL;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3102

35c2a7f49   Hugh Dickins   tmpfs,ceph,gfs2,i...
3103
3104
  	inum = fid->raw[2];
  	inum = (inum << 32) | fid->raw[1];
480b116c9   Christoph Hellwig   shmem: new export...
3105
3106
  	inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
  			shmem_match, fid->raw);
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3107
  	if (inode) {
480b116c9   Christoph Hellwig   shmem: new export...
3108
  		dentry = d_find_alias(inode);
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3109
3110
  		iput(inode);
  	}
480b116c9   Christoph Hellwig   shmem: new export...
3111
  	return dentry;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3112
  }
b0b0382bb   Al Viro   ->encode_fh() API...
3113
3114
  static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
  				struct inode *parent)
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3115
  {
5fe0c2378   Aneesh Kumar K.V   exportfs: Return ...
3116
3117
  	if (*len < 3) {
  		*len = 3;
94e07a759   Namjae Jeon   fs: encode_fh: re...
3118
  		return FILEID_INVALID;
5fe0c2378   Aneesh Kumar K.V   exportfs: Return ...
3119
  	}
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3120

1d3382cbf   Al Viro   new helper: inode...
3121
  	if (inode_unhashed(inode)) {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3122
3123
3124
3125
3126
3127
3128
  		/* Unfortunately insert_inode_hash is not idempotent,
  		 * so as we hash inodes here rather than at creation
  		 * time, we need a lock to ensure we only try
  		 * to do it once
  		 */
  		static DEFINE_SPINLOCK(lock);
  		spin_lock(&lock);
1d3382cbf   Al Viro   new helper: inode...
3129
  		if (inode_unhashed(inode))
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
  			__insert_inode_hash(inode,
  					    inode->i_ino + inode->i_generation);
  		spin_unlock(&lock);
  	}
  
  	fh[0] = inode->i_generation;
  	fh[1] = inode->i_ino;
  	fh[2] = ((__u64)inode->i_ino) >> 32;
  
  	*len = 3;
  	return 1;
  }
396551644   Christoph Hellwig   exportfs: make st...
3142
  static const struct export_operations shmem_export_ops = {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3143
  	.get_parent     = shmem_get_parent,
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3144
  	.encode_fh      = shmem_encode_fh,
480b116c9   Christoph Hellwig   shmem: new export...
3145
  	.fh_to_dentry	= shmem_fh_to_dentry,
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3146
  };
680d794ba   akpm@linux-foundation.org   mount options: fi...
3147
3148
  static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
  			       bool remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3149
3150
  {
  	char *this_char, *value, *rest;
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3151
  	struct mempolicy *mpol = NULL;
8751e0395   Eric W. Biederman   userns: Convert t...
3152
3153
  	uid_t uid;
  	gid_t gid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3154

b00dc3ad7   Hugh Dickins   [PATCH] tmpfs: fi...
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
  	while (options != NULL) {
  		this_char = options;
  		for (;;) {
  			/*
  			 * NUL-terminate this option: unfortunately,
  			 * mount options form a comma-separated list,
  			 * but mpol's nodelist may also contain commas.
  			 */
  			options = strchr(options, ',');
  			if (options == NULL)
  				break;
  			options++;
  			if (!isdigit(*options)) {
  				options[-1] = '\0';
  				break;
  			}
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3172
3173
3174
3175
3176
  		if (!*this_char)
  			continue;
  		if ((value = strchr(this_char,'=')) != NULL) {
  			*value++ = 0;
  		} else {
1170532bb   Joe Perches   mm: convert print...
3177
3178
3179
  			pr_err("tmpfs: No value for mount option '%s'
  ",
  			       this_char);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3180
  			goto error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
  		}
  
  		if (!strcmp(this_char,"size")) {
  			unsigned long long size;
  			size = memparse(value,&rest);
  			if (*rest == '%') {
  				size <<= PAGE_SHIFT;
  				size *= totalram_pages;
  				do_div(size, 100);
  				rest++;
  			}
  			if (*rest)
  				goto bad_val;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3194
  			sbinfo->max_blocks =
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
3195
  				DIV_ROUND_UP(size, PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3196
  		} else if (!strcmp(this_char,"nr_blocks")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3197
  			sbinfo->max_blocks = memparse(value, &rest);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3198
3199
3200
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"nr_inodes")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3201
  			sbinfo->max_inodes = memparse(value, &rest);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3202
3203
3204
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"mode")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3205
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3206
  				continue;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3207
  			sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3208
3209
3210
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"uid")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3211
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3212
  				continue;
8751e0395   Eric W. Biederman   userns: Convert t...
3213
  			uid = simple_strtoul(value, &rest, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3214
3215
  			if (*rest)
  				goto bad_val;
8751e0395   Eric W. Biederman   userns: Convert t...
3216
3217
3218
  			sbinfo->uid = make_kuid(current_user_ns(), uid);
  			if (!uid_valid(sbinfo->uid))
  				goto bad_val;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3219
  		} else if (!strcmp(this_char,"gid")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3220
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3221
  				continue;
8751e0395   Eric W. Biederman   userns: Convert t...
3222
  			gid = simple_strtoul(value, &rest, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3223
3224
  			if (*rest)
  				goto bad_val;
8751e0395   Eric W. Biederman   userns: Convert t...
3225
3226
3227
  			sbinfo->gid = make_kgid(current_user_ns(), gid);
  			if (!gid_valid(sbinfo->gid))
  				goto bad_val;
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3228
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
  		} else if (!strcmp(this_char, "huge")) {
  			int huge;
  			huge = shmem_parse_huge(value);
  			if (huge < 0)
  				goto bad_val;
  			if (!has_transparent_hugepage() &&
  					huge != SHMEM_HUGE_NEVER)
  				goto bad_val;
  			sbinfo->huge = huge;
  #endif
  #ifdef CONFIG_NUMA
7339ff830   Robin Holt   [PATCH] Add tmpfs...
3240
  		} else if (!strcmp(this_char,"mpol")) {
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3241
3242
3243
  			mpol_put(mpol);
  			mpol = NULL;
  			if (mpol_parse_str(value, &mpol))
7339ff830   Robin Holt   [PATCH] Add tmpfs...
3244
  				goto bad_val;
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3245
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3246
  		} else {
1170532bb   Joe Perches   mm: convert print...
3247
3248
  			pr_err("tmpfs: Bad mount option %s
  ", this_char);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3249
  			goto error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3250
3251
  		}
  	}
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3252
  	sbinfo->mpol = mpol;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3253
3254
3255
  	return 0;
  
  bad_val:
1170532bb   Joe Perches   mm: convert print...
3256
3257
  	pr_err("tmpfs: Bad value '%s' for mount option '%s'
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3258
  	       value, this_char);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3259
3260
  error:
  	mpol_put(mpol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3261
3262
3263
3264
3265
3266
3267
  	return 1;
  
  }
  
  static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
680d794ba   akpm@linux-foundation.org   mount options: fi...
3268
  	struct shmem_sb_info config = *sbinfo;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3269
3270
  	unsigned long inodes;
  	int error = -EINVAL;
5f00110f7   Greg Thelen   tmpfs: fix use-af...
3271
  	config.mpol = NULL;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3272
  	if (shmem_parse_options(data, &config, true))
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3273
  		return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3274

0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3275
  	spin_lock(&sbinfo->stat_lock);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3276
  	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
7e496299d   Tim Chen   tmpfs: make tmpfs...
3277
  	if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3278
  		goto out;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3279
  	if (config.max_inodes < inodes)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3280
3281
  		goto out;
  	/*
54af60421   Hugh Dickins   tmpfs: convert sh...
3282
  	 * Those tests disallow limited->unlimited while any are in use;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3283
3284
3285
  	 * but we must separately disallow unlimited->limited, because
  	 * in that case we have no record of how much is already in use.
  	 */
680d794ba   akpm@linux-foundation.org   mount options: fi...
3286
  	if (config.max_blocks && !sbinfo->max_blocks)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3287
  		goto out;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3288
  	if (config.max_inodes && !sbinfo->max_inodes)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3289
3290
3291
  		goto out;
  
  	error = 0;
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3292
  	sbinfo->huge = config.huge;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3293
  	sbinfo->max_blocks  = config.max_blocks;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3294
3295
  	sbinfo->max_inodes  = config.max_inodes;
  	sbinfo->free_inodes = config.max_inodes - inodes;
71fe804b6   Lee Schermerhorn   mempolicy: use st...
3296

5f00110f7   Greg Thelen   tmpfs: fix use-af...
3297
3298
3299
3300
3301
3302
3303
  	/*
  	 * Preserve previous mempolicy unless mpol remount option was specified.
  	 */
  	if (config.mpol) {
  		mpol_put(sbinfo->mpol);
  		sbinfo->mpol = config.mpol;	/* transfers initial ref */
  	}
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3304
3305
3306
  out:
  	spin_unlock(&sbinfo->stat_lock);
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3307
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
3308

34c80b1d9   Al Viro   vfs: switch ->sho...
3309
  static int shmem_show_options(struct seq_file *seq, struct dentry *root)
680d794ba   akpm@linux-foundation.org   mount options: fi...
3310
  {
34c80b1d9   Al Viro   vfs: switch ->sho...
3311
  	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
680d794ba   akpm@linux-foundation.org   mount options: fi...
3312
3313
3314
  
  	if (sbinfo->max_blocks != shmem_default_max_blocks())
  		seq_printf(seq, ",size=%luk",
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
3315
  			sbinfo->max_blocks << (PAGE_SHIFT - 10));
680d794ba   akpm@linux-foundation.org   mount options: fi...
3316
3317
3318
  	if (sbinfo->max_inodes != shmem_default_max_inodes())
  		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
  	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
09208d150   Al Viro   shmem, ramfs: pro...
3319
  		seq_printf(seq, ",mode=%03ho", sbinfo->mode);
8751e0395   Eric W. Biederman   userns: Convert t...
3320
3321
3322
3323
3324
3325
  	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
  		seq_printf(seq, ",uid=%u",
  				from_kuid_munged(&init_user_ns, sbinfo->uid));
  	if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
  		seq_printf(seq, ",gid=%u",
  				from_kgid_munged(&init_user_ns, sbinfo->gid));
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3326
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3327
3328
3329
3330
  	/* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
  	if (sbinfo->huge)
  		seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
  #endif
71fe804b6   Lee Schermerhorn   mempolicy: use st...
3331
  	shmem_show_mpol(seq, sbinfo->mpol);
680d794ba   akpm@linux-foundation.org   mount options: fi...
3332
3333
  	return 0;
  }
9183df25f   David Herrmann   shm: add memfd_cr...
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
  
  #define MFD_NAME_PREFIX "memfd:"
  #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
  #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
  
  #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
  
  SYSCALL_DEFINE2(memfd_create,
  		const char __user *, uname,
  		unsigned int, flags)
  {
  	struct shmem_inode_info *info;
  	struct file *file;
  	int fd, error;
  	char *name;
  	long len;
  
  	if (flags & ~(unsigned int)MFD_ALL_FLAGS)
  		return -EINVAL;
  
  	/* length includes terminating zero */
  	len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
  	if (len <= 0)
  		return -EFAULT;
  	if (len > MFD_NAME_MAX_LEN + 1)
  		return -EINVAL;
  
  	name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
  	if (!name)
  		return -ENOMEM;
  
  	strcpy(name, MFD_NAME_PREFIX);
  	if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
  		error = -EFAULT;
  		goto err_name;
  	}
  
  	/* terminating-zero may have changed after strnlen_user() returned */
  	if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
  		error = -EFAULT;
  		goto err_name;
  	}
  
  	fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
  	if (fd < 0) {
  		error = fd;
  		goto err_name;
  	}
  
  	file = shmem_file_setup(name, 0, VM_NORESERVE);
  	if (IS_ERR(file)) {
  		error = PTR_ERR(file);
  		goto err_fd;
  	}
  	info = SHMEM_I(file_inode(file));
  	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
  	file->f_flags |= O_RDWR | O_LARGEFILE;
  	if (flags & MFD_ALLOW_SEALING)
  		info->seals &= ~F_SEAL_SEAL;
  
  	fd_install(fd, file);
  	kfree(name);
  	return fd;
  
  err_fd:
  	put_unused_fd(fd);
  err_name:
  	kfree(name);
  	return error;
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
3404
  #endif /* CONFIG_TMPFS */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3405
3406
3407
  
  static void shmem_put_super(struct super_block *sb)
  {
602586a83   Hugh Dickins   shmem: put_super ...
3408
3409
3410
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  
  	percpu_counter_destroy(&sbinfo->used_blocks);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3411
  	mpol_put(sbinfo->mpol);
602586a83   Hugh Dickins   shmem: put_super ...
3412
  	kfree(sbinfo);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3413
3414
  	sb->s_fs_info = NULL;
  }
2b2af54a5   Kay Sievers   Driver Core: devt...
3415
  int shmem_fill_super(struct super_block *sb, void *data, int silent)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3416
3417
  {
  	struct inode *inode;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3418
  	struct shmem_sb_info *sbinfo;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3419
3420
3421
  	int err = -ENOMEM;
  
  	/* Round up to L1_CACHE_BYTES to resist false sharing */
425fbf047   Pekka Enberg   shmem: initialize...
3422
  	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
680d794ba   akpm@linux-foundation.org   mount options: fi...
3423
3424
3425
  				L1_CACHE_BYTES), GFP_KERNEL);
  	if (!sbinfo)
  		return -ENOMEM;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3426
  	sbinfo->mode = S_IRWXUGO | S_ISVTX;
76aac0e9a   David Howells   CRED: Wrap task c...
3427
3428
  	sbinfo->uid = current_fsuid();
  	sbinfo->gid = current_fsgid();
680d794ba   akpm@linux-foundation.org   mount options: fi...
3429
  	sb->s_fs_info = sbinfo;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3430

0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3431
  #ifdef CONFIG_TMPFS
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3432
3433
3434
3435
3436
  	/*
  	 * Per default we only allow half of the physical ram per
  	 * tmpfs instance, limiting inodes to one per page of lowmem;
  	 * but the internal instance is left unlimited.
  	 */
ca4e05195   Al Viro   shm_mnt is as lon...
3437
  	if (!(sb->s_flags & MS_KERNMOUNT)) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3438
3439
3440
3441
3442
3443
  		sbinfo->max_blocks = shmem_default_max_blocks();
  		sbinfo->max_inodes = shmem_default_max_inodes();
  		if (shmem_parse_options(data, sbinfo, false)) {
  			err = -EINVAL;
  			goto failed;
  		}
ca4e05195   Al Viro   shm_mnt is as lon...
3444
3445
  	} else {
  		sb->s_flags |= MS_NOUSER;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3446
  	}
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3447
  	sb->s_export_op = &shmem_export_ops;
2f6e38f3c   Hugh Dickins   tmpfs: enable NOS...
3448
  	sb->s_flags |= MS_NOSEC;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3449
3450
3451
  #else
  	sb->s_flags |= MS_NOUSER;
  #endif
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3452
  	spin_lock_init(&sbinfo->stat_lock);
908c7f194   Tejun Heo   percpu_counter: a...
3453
  	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
602586a83   Hugh Dickins   shmem: put_super ...
3454
  		goto failed;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3455
  	sbinfo->free_inodes = sbinfo->max_inodes;
779750d20   Kirill A. Shutemov   shmem: split huge...
3456
3457
  	spin_lock_init(&sbinfo->shrinklist_lock);
  	INIT_LIST_HEAD(&sbinfo->shrinklist);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3458

285b2c4fd   Hugh Dickins   tmpfs: demolish o...
3459
  	sb->s_maxbytes = MAX_LFS_FILESIZE;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
3460
3461
  	sb->s_blocksize = PAGE_SIZE;
  	sb->s_blocksize_bits = PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3462
3463
  	sb->s_magic = TMPFS_MAGIC;
  	sb->s_op = &shmem_ops;
cfd95a9cf   Robin H. Johnson   [PATCH] tmpfs: ti...
3464
  	sb->s_time_gran = 1;
b09e0fa4b   Eric Paris   tmpfs: implement ...
3465
  #ifdef CONFIG_TMPFS_XATTR
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3466
  	sb->s_xattr = shmem_xattr_handlers;
b09e0fa4b   Eric Paris   tmpfs: implement ...
3467
3468
  #endif
  #ifdef CONFIG_TMPFS_POSIX_ACL
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3469
3470
  	sb->s_flags |= MS_POSIXACL;
  #endif
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3471

454abafe9   Dmitry Monakhov   ramfs: replace in...
3472
  	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3473
3474
  	if (!inode)
  		goto failed;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3475
3476
  	inode->i_uid = sbinfo->uid;
  	inode->i_gid = sbinfo->gid;
318ceed08   Al Viro   tidy up after d_m...
3477
3478
  	sb->s_root = d_make_root(inode);
  	if (!sb->s_root)
48fde701a   Al Viro   switch open-coded...
3479
  		goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3480
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3481
3482
3483
3484
  failed:
  	shmem_put_super(sb);
  	return err;
  }
fcc234f88   Pekka Enberg   [PATCH] mm: kill ...
3485
  static struct kmem_cache *shmem_inode_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3486
3487
3488
  
  static struct inode *shmem_alloc_inode(struct super_block *sb)
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3489
3490
3491
  	struct shmem_inode_info *info;
  	info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
  	if (!info)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3492
  		return NULL;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3493
  	return &info->vfs_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3494
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3495
  static void shmem_destroy_callback(struct rcu_head *head)
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
3496
3497
  {
  	struct inode *inode = container_of(head, struct inode, i_rcu);
84e710da2   Al Viro   parallel lookups ...
3498
3499
  	if (S_ISLNK(inode->i_mode))
  		kfree(inode->i_link);
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
3500
3501
  	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3502
3503
  static void shmem_destroy_inode(struct inode *inode)
  {
09208d150   Al Viro   shmem, ramfs: pro...
3504
  	if (S_ISREG(inode->i_mode))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3505
  		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3506
  	call_rcu(&inode->i_rcu, shmem_destroy_callback);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3507
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3508
  static void shmem_init_inode(void *foo)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3509
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3510
3511
  	struct shmem_inode_info *info = foo;
  	inode_init_once(&info->vfs_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3512
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3513
  static int shmem_init_inodecache(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3514
3515
3516
  {
  	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
  				sizeof(struct shmem_inode_info),
5d097056c   Vladimir Davydov   kmemcg: account c...
3517
  				0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3518
3519
  	return 0;
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3520
  static void shmem_destroy_inodecache(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3521
  {
1a1d92c10   Alexey Dobriyan   [PATCH] Really ig...
3522
  	kmem_cache_destroy(shmem_inode_cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3523
  }
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
3524
  static const struct address_space_operations shmem_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3525
  	.writepage	= shmem_writepage,
767193253   Ken Chen   [PATCH] simplify ...
3526
  	.set_page_dirty	= __set_page_dirty_no_writeback,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3527
  #ifdef CONFIG_TMPFS
800d15a53   Nick Piggin   implement simple ...
3528
3529
  	.write_begin	= shmem_write_begin,
  	.write_end	= shmem_write_end,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3530
  #endif
1c93923cc   Andrew Morton   include/linux/mig...
3531
  #ifdef CONFIG_MIGRATION
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
3532
  	.migratepage	= migrate_page,
1c93923cc   Andrew Morton   include/linux/mig...
3533
  #endif
aa261f549   Andi Kleen   HWPOISON: Enable ...
3534
  	.error_remove_page = generic_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3535
  };
15ad7cdcf   Helge Deller   [PATCH] struct se...
3536
  static const struct file_operations shmem_file_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3537
  	.mmap		= shmem_mmap,
c01d5b300   Hugh Dickins   shmem: get_unmapp...
3538
  	.get_unmapped_area = shmem_get_unmapped_area,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3539
  #ifdef CONFIG_TMPFS
220f2ac91   Hugh Dickins   tmpfs: support SE...
3540
  	.llseek		= shmem_file_llseek,
2ba5bbed0   Al Viro   shmem: switch to ...
3541
  	.read_iter	= shmem_file_read_iter,
8174202b3   Al Viro   write_iter varian...
3542
  	.write_iter	= generic_file_write_iter,
1b061d924   Christoph Hellwig   rename the generi...
3543
  	.fsync		= noop_fsync,
82c156f85   Al Viro   switch generic_fi...
3544
  	.splice_read	= generic_file_splice_read,
f6cb85d00   Al Viro   shmem: switch to ...
3545
  	.splice_write	= iter_file_splice_write,
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
3546
  	.fallocate	= shmem_fallocate,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3547
3548
  #endif
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
3549
  static const struct inode_operations shmem_inode_operations = {
44a30220b   Yu Zhao   shmem: recalculat...
3550
  	.getattr	= shmem_getattr,
94c1e62df   Hugh Dickins   tmpfs: take contr...
3551
  	.setattr	= shmem_setattr,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3552
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3553
  	.listxattr	= shmem_listxattr,
feda821e7   Christoph Hellwig   fs: remove generi...
3554
  	.set_acl	= simple_set_acl,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3555
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3556
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
3557
  static const struct inode_operations shmem_dir_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3558
3559
3560
3561
3562
3563
3564
3565
3566
  #ifdef CONFIG_TMPFS
  	.create		= shmem_create,
  	.lookup		= simple_lookup,
  	.link		= shmem_link,
  	.unlink		= shmem_unlink,
  	.symlink	= shmem_symlink,
  	.mkdir		= shmem_mkdir,
  	.rmdir		= shmem_rmdir,
  	.mknod		= shmem_mknod,
2773bf00a   Miklos Szeredi   fs: rename "renam...
3567
  	.rename		= shmem_rename2,
60545d0d4   Al Viro   [O_TMPFILE] it's ...
3568
  	.tmpfile	= shmem_tmpfile,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3569
  #endif
b09e0fa4b   Eric Paris   tmpfs: implement ...
3570
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3571
  	.listxattr	= shmem_listxattr,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3572
  #endif
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3573
  #ifdef CONFIG_TMPFS_POSIX_ACL
94c1e62df   Hugh Dickins   tmpfs: take contr...
3574
  	.setattr	= shmem_setattr,
feda821e7   Christoph Hellwig   fs: remove generi...
3575
  	.set_acl	= simple_set_acl,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3576
3577
  #endif
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
3578
  static const struct inode_operations shmem_special_inode_operations = {
b09e0fa4b   Eric Paris   tmpfs: implement ...
3579
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3580
  	.listxattr	= shmem_listxattr,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3581
  #endif
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3582
  #ifdef CONFIG_TMPFS_POSIX_ACL
94c1e62df   Hugh Dickins   tmpfs: take contr...
3583
  	.setattr	= shmem_setattr,
feda821e7   Christoph Hellwig   fs: remove generi...
3584
  	.set_acl	= simple_set_acl,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3585
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3586
  };
759b9775c   Hugh Dickins   [PATCH] shmem and...
3587
  static const struct super_operations shmem_ops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3588
3589
3590
3591
3592
  	.alloc_inode	= shmem_alloc_inode,
  	.destroy_inode	= shmem_destroy_inode,
  #ifdef CONFIG_TMPFS
  	.statfs		= shmem_statfs,
  	.remount_fs	= shmem_remount_fs,
680d794ba   akpm@linux-foundation.org   mount options: fi...
3593
  	.show_options	= shmem_show_options,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3594
  #endif
1f895f75d   Al Viro   switch shmem.c to...
3595
  	.evict_inode	= shmem_evict_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3596
3597
  	.drop_inode	= generic_delete_inode,
  	.put_super	= shmem_put_super,
779750d20   Kirill A. Shutemov   shmem: split huge...
3598
3599
3600
3601
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
  	.nr_cached_objects	= shmem_unused_huge_count,
  	.free_cached_objects	= shmem_unused_huge_scan,
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3602
  };
f0f37e2f7   Alexey Dobriyan   const: mark struc...
3603
  static const struct vm_operations_struct shmem_vm_ops = {
54cb8821d   Nick Piggin   mm: merge populat...
3604
  	.fault		= shmem_fault,
d7c175517   Ning Qu   mm: implement ->m...
3605
  	.map_pages	= filemap_map_pages,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3606
3607
3608
3609
3610
  #ifdef CONFIG_NUMA
  	.set_policy     = shmem_set_policy,
  	.get_policy     = shmem_get_policy,
  #endif
  };
3c26ff6e4   Al Viro   convert get_sb_no...
3611
3612
  static struct dentry *shmem_mount(struct file_system_type *fs_type,
  	int flags, const char *dev_name, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3613
  {
3c26ff6e4   Al Viro   convert get_sb_no...
3614
  	return mount_nodev(fs_type, flags, data, shmem_fill_super);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3615
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3616
  static struct file_system_type shmem_fs_type = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3617
3618
  	.owner		= THIS_MODULE,
  	.name		= "tmpfs",
3c26ff6e4   Al Viro   convert get_sb_no...
3619
  	.mount		= shmem_mount,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3620
  	.kill_sb	= kill_litter_super,
2b8576cb0   Eric W. Biederman   userns: Allow the...
3621
  	.fs_flags	= FS_USERNS_MOUNT,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3622
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3623

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3624
  int __init shmem_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3625
3626
  {
  	int error;
16203a7a9   Rob Landley   initmpfs: make ro...
3627
3628
3629
  	/* If rootfs called this, don't re-init */
  	if (shmem_inode_cachep)
  		return 0;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3630
  	error = shmem_init_inodecache();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3631
3632
  	if (error)
  		goto out3;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3633
  	error = register_filesystem(&shmem_fs_type);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3634
  	if (error) {
1170532bb   Joe Perches   mm: convert print...
3635
3636
  		pr_err("Could not register tmpfs
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3637
3638
  		goto out2;
  	}
95dc112a5   Greg Kroah-Hartman   [PATCH] devfs: Re...
3639

ca4e05195   Al Viro   shm_mnt is as lon...
3640
  	shm_mnt = kern_mount(&shmem_fs_type);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3641
3642
  	if (IS_ERR(shm_mnt)) {
  		error = PTR_ERR(shm_mnt);
1170532bb   Joe Perches   mm: convert print...
3643
3644
  		pr_err("Could not kern_mount tmpfs
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3645
3646
  		goto out1;
  	}
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3647

e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3648
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3649
3650
3651
3652
3653
  	if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY)
  		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
  	else
  		shmem_huge = 0; /* just in case it was patched */
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3654
3655
3656
  	return 0;
  
  out1:
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3657
  	unregister_filesystem(&shmem_fs_type);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3658
  out2:
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3659
  	shmem_destroy_inodecache();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3660
3661
3662
3663
  out3:
  	shm_mnt = ERR_PTR(error);
  	return error;
  }
853ac43ab   Matt Mackall   shmem: unify regu...
3664

e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3665
  #if defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && defined(CONFIG_SYSFS)
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
  static ssize_t shmem_enabled_show(struct kobject *kobj,
  		struct kobj_attribute *attr, char *buf)
  {
  	int values[] = {
  		SHMEM_HUGE_ALWAYS,
  		SHMEM_HUGE_WITHIN_SIZE,
  		SHMEM_HUGE_ADVISE,
  		SHMEM_HUGE_NEVER,
  		SHMEM_HUGE_DENY,
  		SHMEM_HUGE_FORCE,
  	};
  	int i, count;
  
  	for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) {
  		const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s ";
  
  		count += sprintf(buf + count, fmt,
  				shmem_format_huge(values[i]));
  	}
  	buf[count - 1] = '
  ';
  	return count;
  }
  
  static ssize_t shmem_enabled_store(struct kobject *kobj,
  		struct kobj_attribute *attr, const char *buf, size_t count)
  {
  	char tmp[16];
  	int huge;
  
  	if (count + 1 > sizeof(tmp))
  		return -EINVAL;
  	memcpy(tmp, buf, count);
  	tmp[count] = '\0';
  	if (count && tmp[count - 1] == '
  ')
  		tmp[count - 1] = '\0';
  
  	huge = shmem_parse_huge(tmp);
  	if (huge == -EINVAL)
  		return -EINVAL;
  	if (!has_transparent_hugepage() &&
  			huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
  		return -EINVAL;
  
  	shmem_huge = huge;
  	if (shmem_huge < SHMEM_HUGE_DENY)
  		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
  	return count;
  }
  
  struct kobj_attribute shmem_enabled_attr =
  	__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
3b33719c9   Arnd Bergmann   thp: move shmem_h...
3719
  #endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
3720

3b33719c9   Arnd Bergmann   thp: move shmem_h...
3721
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
  bool shmem_huge_enabled(struct vm_area_struct *vma)
  {
  	struct inode *inode = file_inode(vma->vm_file);
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  	loff_t i_size;
  	pgoff_t off;
  
  	if (shmem_huge == SHMEM_HUGE_FORCE)
  		return true;
  	if (shmem_huge == SHMEM_HUGE_DENY)
  		return false;
  	switch (sbinfo->huge) {
  		case SHMEM_HUGE_NEVER:
  			return false;
  		case SHMEM_HUGE_ALWAYS:
  			return true;
  		case SHMEM_HUGE_WITHIN_SIZE:
  			off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
  			i_size = round_up(i_size_read(inode), PAGE_SIZE);
  			if (i_size >= HPAGE_PMD_SIZE &&
  					i_size >> PAGE_SHIFT >= off)
  				return true;
  		case SHMEM_HUGE_ADVISE:
  			/* TODO: implement fadvise() hints */
  			return (vma->vm_flags & VM_HUGEPAGE);
  		default:
  			VM_BUG_ON(1);
  			return false;
  	}
  }
3b33719c9   Arnd Bergmann   thp: move shmem_h...
3752
  #endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3753

853ac43ab   Matt Mackall   shmem: unify regu...
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
  #else /* !CONFIG_SHMEM */
  
  /*
   * tiny-shmem: simple shmemfs and tmpfs using ramfs code
   *
   * This is intended for small system where the benefits of the full
   * shmem code (swap-backed and resource-limited) are outweighed by
   * their complexity. On systems without swap this code should be
   * effectively equivalent, but much lighter weight.
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3764
  static struct file_system_type shmem_fs_type = {
853ac43ab   Matt Mackall   shmem: unify regu...
3765
  	.name		= "tmpfs",
3c26ff6e4   Al Viro   convert get_sb_no...
3766
  	.mount		= ramfs_mount,
853ac43ab   Matt Mackall   shmem: unify regu...
3767
  	.kill_sb	= kill_litter_super,
2b8576cb0   Eric W. Biederman   userns: Allow the...
3768
  	.fs_flags	= FS_USERNS_MOUNT,
853ac43ab   Matt Mackall   shmem: unify regu...
3769
  };
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3770
  int __init shmem_init(void)
853ac43ab   Matt Mackall   shmem: unify regu...
3771
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3772
  	BUG_ON(register_filesystem(&shmem_fs_type) != 0);
853ac43ab   Matt Mackall   shmem: unify regu...
3773

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3774
  	shm_mnt = kern_mount(&shmem_fs_type);
853ac43ab   Matt Mackall   shmem: unify regu...
3775
3776
3777
3778
  	BUG_ON(IS_ERR(shm_mnt));
  
  	return 0;
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3779
  int shmem_unuse(swp_entry_t swap, struct page *page)
853ac43ab   Matt Mackall   shmem: unify regu...
3780
3781
3782
  {
  	return 0;
  }
3f96b79ad   Hugh Dickins   tmpfs: depend on ...
3783
3784
3785
3786
  int shmem_lock(struct file *file, int lock, struct user_struct *user)
  {
  	return 0;
  }
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
3787
3788
3789
  void shmem_unlock_mapping(struct address_space *mapping)
  {
  }
c01d5b300   Hugh Dickins   shmem: get_unmapp...
3790
3791
3792
3793
3794
3795
3796
3797
  #ifdef CONFIG_MMU
  unsigned long shmem_get_unmapped_area(struct file *file,
  				      unsigned long addr, unsigned long len,
  				      unsigned long pgoff, unsigned long flags)
  {
  	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
  }
  #endif
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3798
  void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
94c1e62df   Hugh Dickins   tmpfs: take contr...
3799
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3800
  	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
94c1e62df   Hugh Dickins   tmpfs: take contr...
3801
3802
  }
  EXPORT_SYMBOL_GPL(shmem_truncate_range);
0b0a0806b   Hugh Dickins   shmem: fix shared...
3803
3804
  #define shmem_vm_ops				generic_file_vm_ops
  #define shmem_file_operations			ramfs_file_operations
454abafe9   Dmitry Monakhov   ramfs: replace in...
3805
  #define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
0b0a0806b   Hugh Dickins   shmem: fix shared...
3806
3807
  #define shmem_acct_size(flags, size)		0
  #define shmem_unacct_size(flags, size)		do {} while (0)
853ac43ab   Matt Mackall   shmem: unify regu...
3808
3809
3810
3811
  
  #endif /* CONFIG_SHMEM */
  
  /* common code */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3812

19938e350   Rasmus Villemoes   mm/shmem.c: const...
3813
  static const struct dentry_operations anon_ops = {
118b23022   Al Viro   cope with potenti...
3814
  	.d_dname = simple_dname
3451538a1   Al Viro   shmem_setup_file(...
3815
  };
c72770909   Eric Paris   security: shmem: ...
3816
3817
  static struct file *__shmem_file_setup(const char *name, loff_t size,
  				       unsigned long flags, unsigned int i_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3818
  {
6b4d0b279   Al Viro   clean shmem_file_...
3819
  	struct file *res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3820
  	struct inode *inode;
2c48b9c45   Al Viro   switch alloc_file...
3821
  	struct path path;
3451538a1   Al Viro   shmem_setup_file(...
3822
  	struct super_block *sb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3823
3824
3825
  	struct qstr this;
  
  	if (IS_ERR(shm_mnt))
6b4d0b279   Al Viro   clean shmem_file_...
3826
  		return ERR_CAST(shm_mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3827

285b2c4fd   Hugh Dickins   tmpfs: demolish o...
3828
  	if (size < 0 || size > MAX_LFS_FILESIZE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3829
3830
3831
3832
  		return ERR_PTR(-EINVAL);
  
  	if (shmem_acct_size(flags, size))
  		return ERR_PTR(-ENOMEM);
6b4d0b279   Al Viro   clean shmem_file_...
3833
  	res = ERR_PTR(-ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3834
3835
3836
  	this.name = name;
  	this.len = strlen(name);
  	this.hash = 0; /* will go */
3451538a1   Al Viro   shmem_setup_file(...
3837
  	sb = shm_mnt->mnt_sb;
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3838
  	path.mnt = mntget(shm_mnt);
3451538a1   Al Viro   shmem_setup_file(...
3839
  	path.dentry = d_alloc_pseudo(sb, &this);
2c48b9c45   Al Viro   switch alloc_file...
3840
  	if (!path.dentry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3841
  		goto put_memory;
3451538a1   Al Viro   shmem_setup_file(...
3842
  	d_set_d_op(path.dentry, &anon_ops);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3843

6b4d0b279   Al Viro   clean shmem_file_...
3844
  	res = ERR_PTR(-ENOSPC);
3451538a1   Al Viro   shmem_setup_file(...
3845
  	inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3846
  	if (!inode)
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3847
  		goto put_memory;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3848

c72770909   Eric Paris   security: shmem: ...
3849
  	inode->i_flags |= i_flags;
2c48b9c45   Al Viro   switch alloc_file...
3850
  	d_instantiate(path.dentry, inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3851
  	inode->i_size = size;
6d6b77f16   Miklos Szeredi   filesystems: add ...
3852
  	clear_nlink(inode);	/* It is unlinked */
26567cdbb   Al Viro   fix nommu breakag...
3853
3854
  	res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
  	if (IS_ERR(res))
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3855
  		goto put_path;
4b42af81f   Al Viro   switch shmem_file...
3856

6b4d0b279   Al Viro   clean shmem_file_...
3857
  	res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
4b42af81f   Al Viro   switch shmem_file...
3858
  		  &shmem_file_operations);
6b4d0b279   Al Viro   clean shmem_file_...
3859
  	if (IS_ERR(res))
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3860
  		goto put_path;
4b42af81f   Al Viro   switch shmem_file...
3861

6b4d0b279   Al Viro   clean shmem_file_...
3862
  	return res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3863

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3864
3865
  put_memory:
  	shmem_unacct_size(flags, size);
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3866
3867
  put_path:
  	path_put(&path);
6b4d0b279   Al Viro   clean shmem_file_...
3868
  	return res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3869
  }
c72770909   Eric Paris   security: shmem: ...
3870
3871
3872
3873
3874
  
  /**
   * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
   * 	kernel internal.  There will be NO LSM permission checks against the
   * 	underlying inode.  So users of this interface must do LSM checks at a
e1832f292   Stephen Smalley   ipc: use private ...
3875
3876
   *	higher layer.  The users are the big_key and shm implementations.  LSM
   *	checks are provided at the key or shm level rather than the inode.
c72770909   Eric Paris   security: shmem: ...
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
   * @name: name for dentry (to be seen in /proc/<pid>/maps
   * @size: size to be set for the file
   * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
   */
  struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
  {
  	return __shmem_file_setup(name, size, flags, S_PRIVATE);
  }
  
  /**
   * shmem_file_setup - get an unlinked file living in tmpfs
   * @name: name for dentry (to be seen in /proc/<pid>/maps
   * @size: size to be set for the file
   * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
   */
  struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
  {
  	return __shmem_file_setup(name, size, flags, 0);
  }
395e0ddc4   Keith Packard   Export shmem_file...
3896
  EXPORT_SYMBOL_GPL(shmem_file_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3897

467118102   Randy Dunlap   mm/shmem and tiny...
3898
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3899
   * shmem_zero_setup - setup a shared anonymous mapping
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3900
3901
3902
3903
3904
3905
   * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
   */
  int shmem_zero_setup(struct vm_area_struct *vma)
  {
  	struct file *file;
  	loff_t size = vma->vm_end - vma->vm_start;
66fc13039   Hugh Dickins   mm: shmem_zero_se...
3906
3907
3908
3909
3910
3911
3912
  	/*
  	 * Cloning a new file under mmap_sem leads to a lock ordering conflict
  	 * between XFS directory reading and selinux: since this file is only
  	 * accessible to the user through its mapping, use S_PRIVATE flag to
  	 * bypass file security, in the same way as shmem_kernel_file_setup().
  	 */
  	file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3913
3914
3915
3916
3917
3918
3919
  	if (IS_ERR(file))
  		return PTR_ERR(file);
  
  	if (vma->vm_file)
  		fput(vma->vm_file);
  	vma->vm_file = file;
  	vma->vm_ops = &shmem_vm_ops;
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
3920

e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3921
  	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
3922
3923
3924
3925
  			((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
  			(vma->vm_end & HPAGE_PMD_MASK)) {
  		khugepaged_enter(vma, vma->vm_flags);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3926
3927
  	return 0;
  }
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
  
  /**
   * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
   * @mapping:	the page's address_space
   * @index:	the page index
   * @gfp:	the page allocator flags to use if allocating
   *
   * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
   * with any new page allocations done using the specified allocation flags.
   * But read_cache_page_gfp() uses the ->readpage() method: which does not
   * suit tmpfs, since it may have pages in swapcache, and needs to find those
   * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
   *
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3941
3942
   * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in
   * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3943
3944
3945
3946
   */
  struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
  					 pgoff_t index, gfp_t gfp)
  {
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3947
3948
  #ifdef CONFIG_SHMEM
  	struct inode *inode = mapping->host;
9276aad6c   Hugh Dickins   tmpfs: remove_shm...
3949
  	struct page *page;
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3950
3951
3952
  	int error;
  
  	BUG_ON(mapping->a_ops != &shmem_aops);
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
3953
3954
  	error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
  				  gfp, NULL, NULL);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3955
3956
3957
3958
3959
3960
3961
3962
3963
  	if (error)
  		page = ERR_PTR(error);
  	else
  		unlock_page(page);
  	return page;
  #else
  	/*
  	 * The tiny !SHMEM case uses ramfs without swap
  	 */
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3964
  	return read_cache_page_gfp(mapping, index, gfp);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3965
  #endif
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3966
3967
  }
  EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);