Blame view

mm/shmem.c 105 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
  /*
   * Resizable virtual memory filesystem for Linux.
   *
   * Copyright (C) 2000 Linus Torvalds.
   *		 2000 Transmeta Corp.
   *		 2000-2001 Christoph Rohland
   *		 2000-2001 SAP AG
   *		 2002 Red Hat Inc.
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
9
10
   * Copyright (C) 2002-2011 Hugh Dickins.
   * Copyright (C) 2011 Google Inc.
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
11
   * Copyright (C) 2002-2005 VERITAS Software Corporation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
17
   * Copyright (C) 2004 Andi Kleen, SuSE Labs
   *
   * Extended attribute support for tmpfs:
   * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
   *
853ac43ab   Matt Mackall   shmem: unify regu...
18
19
20
   * tiny-shmem:
   * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
   * This file is released under the GPL.
   */
853ac43ab   Matt Mackall   shmem: unify regu...
23
24
25
26
  #include <linux/fs.h>
  #include <linux/init.h>
  #include <linux/vfs.h>
  #include <linux/mount.h>
250297edf   Andrew Morton   mm/shmem.c: remov...
27
  #include <linux/ramfs.h>
caefba174   Hugh Dickins   shmem: respect MA...
28
  #include <linux/pagemap.h>
853ac43ab   Matt Mackall   shmem: unify regu...
29
30
  #include <linux/file.h>
  #include <linux/mm.h>
46c9a946d   Arnd Bergmann   shmem: use monoto...
31
  #include <linux/random.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
32
  #include <linux/sched/signal.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
33
  #include <linux/export.h>
853ac43ab   Matt Mackall   shmem: unify regu...
34
  #include <linux/swap.h>
e2e40f2c1   Christoph Hellwig   fs: move struct k...
35
  #include <linux/uio.h>
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
36
  #include <linux/khugepaged.h>
749df87bd   Mike Kravetz   mm/shmem: add hug...
37
  #include <linux/hugetlb.h>
853ac43ab   Matt Mackall   shmem: unify regu...
38

95cc09d66   Andrea Arcangeli   userfaultfd: shme...
39
  #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
853ac43ab   Matt Mackall   shmem: unify regu...
40
41
42
  static struct vfsmount *shm_mnt;
  
  #ifdef CONFIG_SHMEM
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
44
45
46
47
  /*
   * This virtual memory filesystem is heavily based on the ramfs. It
   * extends ramfs by the ability to use swap and honor resource limits
   * which makes it a completely usable filesystem.
   */
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
48
  #include <linux/xattr.h>
a56942551   Christoph Hellwig   knfsd: exportfs: ...
49
  #include <linux/exportfs.h>
1c7c474c3   Christoph Hellwig   make generic_acl ...
50
  #include <linux/posix_acl.h>
feda821e7   Christoph Hellwig   fs: remove generi...
51
  #include <linux/posix_acl_xattr.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
  #include <linux/mman.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
54
55
56
  #include <linux/string.h>
  #include <linux/slab.h>
  #include <linux/backing-dev.h>
  #include <linux/shmem_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
57
  #include <linux/writeback.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
58
  #include <linux/blkdev.h>
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
59
  #include <linux/pagevec.h>
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
60
  #include <linux/percpu_counter.h>
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
61
  #include <linux/falloc.h>
708e3508c   Hugh Dickins   tmpfs: clone shme...
62
  #include <linux/splice.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
63
64
65
66
  #include <linux/security.h>
  #include <linux/swapops.h>
  #include <linux/mempolicy.h>
  #include <linux/namei.h>
b00dc3ad7   Hugh Dickins   [PATCH] tmpfs: fi...
67
  #include <linux/ctype.h>
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
68
  #include <linux/migrate.h>
c1f60a5a4   Christoph Lameter   [PATCH] reduce MA...
69
  #include <linux/highmem.h>
680d794ba   akpm@linux-foundation.org   mount options: fi...
70
  #include <linux/seq_file.h>
925629278   Mimi Zohar   integrity: specia...
71
  #include <linux/magic.h>
9183df25f   David Herrmann   shm: add memfd_cr...
72
  #include <linux/syscalls.h>
40e041a2c   David Herrmann   shm: add sealing API
73
  #include <linux/fcntl.h>
9183df25f   David Herrmann   shm: add memfd_cr...
74
  #include <uapi/linux/memfd.h>
cfda05267   Mike Rapoport   userfaultfd: shme...
75
  #include <linux/userfaultfd_k.h>
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
76
  #include <linux/rmap.h>
2b4db7961   Amir Goldstein   tmpfs: generate r...
77
  #include <linux/uuid.h>
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
78

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
79
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
  #include <asm/pgtable.h>
dd56b0464   Mel Gorman   mm: page_alloc: h...
81
  #include "internal.h"
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
82
83
  #define BLOCKS_PER_PAGE  (PAGE_SIZE/512)
  #define VM_ACCT(size)    (PAGE_ALIGN(size) >> PAGE_SHIFT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
84

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85
86
  /* Pretend that each entry is of this size in directory's i_size */
  #define BOGO_DIRENT_SIZE 20
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
87
88
  /* Symlink up to this size is kmalloc'ed instead of using a swappable page */
  #define SHORT_SYMLINK_LEN 128
1aac14003   Hugh Dickins   tmpfs: quit when ...
89
  /*
f00cdc6df   Hugh Dickins   shmem: fix faulti...
90
91
92
   * shmem_fallocate communicates with shmem_fault or shmem_writepage via
   * inode->i_private (with i_mutex making sure that it has only one user at
   * a time): we would prefer not to enlarge the shmem inode just for that.
1aac14003   Hugh Dickins   tmpfs: quit when ...
93
94
   */
  struct shmem_falloc {
8e205f779   Hugh Dickins   shmem: fix faulti...
95
  	wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
1aac14003   Hugh Dickins   tmpfs: quit when ...
96
97
98
99
100
  	pgoff_t start;		/* start of range currently being fallocated */
  	pgoff_t next;		/* the next page offset to be fallocated */
  	pgoff_t nr_falloced;	/* how many new pages have been fallocated */
  	pgoff_t nr_unswapped;	/* how often writepage refused to swap out */
  };
b76db7354   Andrew Morton   mount-options-fix...
101
  #ifdef CONFIG_TMPFS
680d794ba   akpm@linux-foundation.org   mount options: fi...
102
103
104
105
106
107
108
109
110
  static unsigned long shmem_default_max_blocks(void)
  {
  	return totalram_pages / 2;
  }
  
  static unsigned long shmem_default_max_inodes(void)
  {
  	return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
  }
b76db7354   Andrew Morton   mount-options-fix...
111
  #endif
680d794ba   akpm@linux-foundation.org   mount options: fi...
112

bde05d1cc   Hugh Dickins   shmem: replace pa...
113
114
115
  static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
  static int shmem_replace_page(struct page **pagep, gfp_t gfp,
  				struct shmem_inode_info *info, pgoff_t index);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
116
  static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
117
  		struct page **pagep, enum sgp_type sgp,
cfda05267   Mike Rapoport   userfaultfd: shme...
118
  		gfp_t gfp, struct vm_area_struct *vma,
2b7403035   Souptick Joarder   mm: Change return...
119
  		struct vm_fault *vmf, vm_fault_t *fault_type);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
120

f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
121
  int shmem_getpage(struct inode *inode, pgoff_t index,
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
122
  		struct page **pagep, enum sgp_type sgp)
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
123
124
  {
  	return shmem_getpage_gfp(inode, index, pagep, sgp,
cfda05267   Mike Rapoport   userfaultfd: shme...
125
  		mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
126
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
129
130
131
132
133
134
135
136
137
138
139
140
  static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
  {
  	return sb->s_fs_info;
  }
  
  /*
   * shmem_file_setup pre-accounts the whole fixed size of a VM object,
   * for shared memory and for shared anonymous (/dev/zero) mappings
   * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
   * consistent with the pre-accounting of private mappings ...
   */
  static inline int shmem_acct_size(unsigned long flags, loff_t size)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
141
  	return (flags & VM_NORESERVE) ?
191c54244   Al Viro   mm: collapse secu...
142
  		0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
143
144
145
146
  }
  
  static inline void shmem_unacct_size(unsigned long flags, loff_t size)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
147
  	if (!(flags & VM_NORESERVE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148
149
  		vm_unacct_memory(VM_ACCT(size));
  }
771425179   Konstantin Khlebnikov   shmem: update mem...
150
151
152
153
154
155
156
157
158
159
160
161
  static inline int shmem_reacct_size(unsigned long flags,
  		loff_t oldsize, loff_t newsize)
  {
  	if (!(flags & VM_NORESERVE)) {
  		if (VM_ACCT(newsize) > VM_ACCT(oldsize))
  			return security_vm_enough_memory_mm(current->mm,
  					VM_ACCT(newsize) - VM_ACCT(oldsize));
  		else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
  			vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
  	}
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
163
  /*
   * ... whereas tmpfs objects are accounted incrementally as
75edd345e   Hugh Dickins   tmpfs: preliminar...
164
   * pages are allocated, in order to allow large sparse files.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
165
166
167
   * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
   * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
   */
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
168
  static inline int shmem_acct_block(unsigned long flags, long pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
169
  {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
170
171
172
173
174
  	if (!(flags & VM_NORESERVE))
  		return 0;
  
  	return security_vm_enough_memory_mm(current->mm,
  			pages * VM_ACCT(PAGE_SIZE));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
177
178
  }
  
  static inline void shmem_unacct_blocks(unsigned long flags, long pages)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
179
  	if (flags & VM_NORESERVE)
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
180
  		vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
  }
0f0796945   Mike Rapoport   shmem: introduce ...
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
  static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  
  	if (shmem_acct_block(info->flags, pages))
  		return false;
  
  	if (sbinfo->max_blocks) {
  		if (percpu_counter_compare(&sbinfo->used_blocks,
  					   sbinfo->max_blocks - pages) > 0)
  			goto unacct;
  		percpu_counter_add(&sbinfo->used_blocks, pages);
  	}
  
  	return true;
  
  unacct:
  	shmem_unacct_blocks(info->flags, pages);
  	return false;
  }
  
  static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  
  	if (sbinfo->max_blocks)
  		percpu_counter_sub(&sbinfo->used_blocks, pages);
  	shmem_unacct_blocks(info->flags, pages);
  }
759b9775c   Hugh Dickins   [PATCH] shmem and...
213
  static const struct super_operations shmem_ops;
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
214
  static const struct address_space_operations shmem_aops;
15ad7cdcf   Helge Deller   [PATCH] struct se...
215
  static const struct file_operations shmem_file_operations;
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
216
217
218
  static const struct inode_operations shmem_inode_operations;
  static const struct inode_operations shmem_dir_inode_operations;
  static const struct inode_operations shmem_special_inode_operations;
f0f37e2f7   Alexey Dobriyan   const: mark struc...
219
  static const struct vm_operations_struct shmem_vm_ops;
779750d20   Kirill A. Shutemov   shmem: split huge...
220
  static struct file_system_type shmem_fs_type;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221

b0506e488   Mike Rapoport   userfaultfd: shme...
222
223
224
225
  bool vma_is_shmem(struct vm_area_struct *vma)
  {
  	return vma->vm_ops == &shmem_vm_ops;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226
  static LIST_HEAD(shmem_swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
227
  static DEFINE_MUTEX(shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
228

5b04c6890   Pavel Emelyanov   shmem: factor out...
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
  static int shmem_reserve_inode(struct super_block *sb)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	if (sbinfo->max_inodes) {
  		spin_lock(&sbinfo->stat_lock);
  		if (!sbinfo->free_inodes) {
  			spin_unlock(&sbinfo->stat_lock);
  			return -ENOSPC;
  		}
  		sbinfo->free_inodes--;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	return 0;
  }
  
  static void shmem_free_inode(struct super_block *sb)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	if (sbinfo->max_inodes) {
  		spin_lock(&sbinfo->stat_lock);
  		sbinfo->free_inodes++;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  }
467118102   Randy Dunlap   mm/shmem and tiny...
253
  /**
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
254
   * shmem_recalc_inode - recalculate the block usage of an inode
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
   * @inode: inode to recalc
   *
   * We have to calculate the free blocks since the mm can drop
   * undirtied hole pages behind our back.
   *
   * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
   * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
   *
   * It has to be called with the spinlock held.
   */
  static void shmem_recalc_inode(struct inode *inode)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	long freed;
  
  	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
  	if (freed > 0) {
  		info->alloced -= freed;
54af60421   Hugh Dickins   tmpfs: convert sh...
273
  		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
0f0796945   Mike Rapoport   shmem: introduce ...
274
  		shmem_inode_unacct_blocks(inode, freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
275
276
  	}
  }
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
277
278
279
  bool shmem_charge(struct inode *inode, long pages)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
280
  	unsigned long flags;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
281

0f0796945   Mike Rapoport   shmem: introduce ...
282
  	if (!shmem_inode_acct_block(inode, pages))
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
283
  		return false;
b1cc94ab2   Mike Rapoport   shmem: shmem_char...
284

78141aabf   Hugh Dickins   mm/khugepaged: fi...
285
286
  	/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
  	inode->i_mapping->nrpages += pages;
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
287
  	spin_lock_irqsave(&info->lock, flags);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
288
289
290
  	info->alloced += pages;
  	inode->i_blocks += pages * BLOCKS_PER_PAGE;
  	shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
291
  	spin_unlock_irqrestore(&info->lock, flags);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
292

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
293
294
295
296
297
298
  	return true;
  }
  
  void shmem_uncharge(struct inode *inode, long pages)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
299
  	unsigned long flags;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
300

78141aabf   Hugh Dickins   mm/khugepaged: fi...
301
  	/* nrpages adjustment done by __delete_from_page_cache() or caller */
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
302
  	spin_lock_irqsave(&info->lock, flags);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
303
304
305
  	info->alloced -= pages;
  	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
  	shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
306
  	spin_unlock_irqrestore(&info->lock, flags);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
307

0f0796945   Mike Rapoport   shmem: introduce ...
308
  	shmem_inode_unacct_blocks(inode, pages);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
309
  }
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
310
311
312
313
314
315
  /*
   * Replace item expected in radix tree by a new item, while holding tree lock.
   */
  static int shmem_radix_tree_replace(struct address_space *mapping,
  			pgoff_t index, void *expected, void *replacement)
  {
f7942430e   Johannes Weiner   lib: radix-tree: ...
316
  	struct radix_tree_node *node;
5b9c98f30   Mike Kravetz   mm/shmem: add __r...
317
  	void __rcu **pslot;
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
318
  	void *item;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
319
320
  
  	VM_BUG_ON(!expected);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
321
  	VM_BUG_ON(!replacement);
b93b01631   Matthew Wilcox   page cache: use x...
322
  	item = __radix_tree_lookup(&mapping->i_pages, index, &node, &pslot);
f7942430e   Johannes Weiner   lib: radix-tree: ...
323
  	if (!item)
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
324
  		return -ENOENT;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
325
326
  	if (item != expected)
  		return -ENOENT;
b93b01631   Matthew Wilcox   page cache: use x...
327
  	__radix_tree_replace(&mapping->i_pages, node, pslot,
c7df8ad29   Mel Gorman   mm, truncate: do ...
328
  			     replacement, NULL);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
329
330
331
332
  	return 0;
  }
  
  /*
d18992286   Hugh Dickins   shmem: fix negati...
333
334
335
336
337
338
339
340
341
342
343
344
   * Sometimes, before we decide whether to proceed or to fail, we must check
   * that an entry was not already brought back from swap by a racing thread.
   *
   * Checking page is not enough: by the time a SwapCache page is locked, it
   * might be reused, and again be SwapCache, using the same swap as before.
   */
  static bool shmem_confirm_swap(struct address_space *mapping,
  			       pgoff_t index, swp_entry_t swap)
  {
  	void *item;
  
  	rcu_read_lock();
b93b01631   Matthew Wilcox   page cache: use x...
345
  	item = radix_tree_lookup(&mapping->i_pages, index);
d18992286   Hugh Dickins   shmem: fix negati...
346
347
348
349
350
  	rcu_read_unlock();
  	return item == swp_to_radix_entry(swap);
  }
  
  /*
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
   * Definitions for "huge tmpfs": tmpfs mounted with the huge= option
   *
   * SHMEM_HUGE_NEVER:
   *	disables huge pages for the mount;
   * SHMEM_HUGE_ALWAYS:
   *	enables huge pages for the mount;
   * SHMEM_HUGE_WITHIN_SIZE:
   *	only allocate huge pages if the page will be fully within i_size,
   *	also respect fadvise()/madvise() hints;
   * SHMEM_HUGE_ADVISE:
   *	only allocate huge pages if requested with fadvise()/madvise();
   */
  
  #define SHMEM_HUGE_NEVER	0
  #define SHMEM_HUGE_ALWAYS	1
  #define SHMEM_HUGE_WITHIN_SIZE	2
  #define SHMEM_HUGE_ADVISE	3
  
  /*
   * Special values.
   * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled:
   *
   * SHMEM_HUGE_DENY:
   *	disables huge on shm_mnt and all mounts, for emergency use;
   * SHMEM_HUGE_FORCE:
   *	enables huge on shm_mnt and all mounts, w/o needing option, for testing;
   *
   */
  #define SHMEM_HUGE_DENY		(-1)
  #define SHMEM_HUGE_FORCE	(-2)
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
381
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
382
  /* ifdef here to avoid bloating shmem.o when not necessary */
5b9c98f30   Mike Kravetz   mm/shmem: add __r...
383
  static int shmem_huge __read_mostly;
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
384

f1f5929cd   Jérémy Lefaure   shmem: fix compil...
385
  #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
  static int shmem_parse_huge(const char *str)
  {
  	if (!strcmp(str, "never"))
  		return SHMEM_HUGE_NEVER;
  	if (!strcmp(str, "always"))
  		return SHMEM_HUGE_ALWAYS;
  	if (!strcmp(str, "within_size"))
  		return SHMEM_HUGE_WITHIN_SIZE;
  	if (!strcmp(str, "advise"))
  		return SHMEM_HUGE_ADVISE;
  	if (!strcmp(str, "deny"))
  		return SHMEM_HUGE_DENY;
  	if (!strcmp(str, "force"))
  		return SHMEM_HUGE_FORCE;
  	return -EINVAL;
  }
  
  static const char *shmem_format_huge(int huge)
  {
  	switch (huge) {
  	case SHMEM_HUGE_NEVER:
  		return "never";
  	case SHMEM_HUGE_ALWAYS:
  		return "always";
  	case SHMEM_HUGE_WITHIN_SIZE:
  		return "within_size";
  	case SHMEM_HUGE_ADVISE:
  		return "advise";
  	case SHMEM_HUGE_DENY:
  		return "deny";
  	case SHMEM_HUGE_FORCE:
  		return "force";
  	default:
  		VM_BUG_ON(1);
  		return "bad_val";
  	}
  }
f1f5929cd   Jérémy Lefaure   shmem: fix compil...
423
  #endif
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
424

779750d20   Kirill A. Shutemov   shmem: split huge...
425
426
427
428
  static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
  		struct shrink_control *sc, unsigned long nr_to_split)
  {
  	LIST_HEAD(list), *pos, *next;
253fd0f02   Kirill A. Shutemov   shmem: fix sleepi...
429
  	LIST_HEAD(to_remove);
779750d20   Kirill A. Shutemov   shmem: split huge...
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
  	struct inode *inode;
  	struct shmem_inode_info *info;
  	struct page *page;
  	unsigned long batch = sc ? sc->nr_to_scan : 128;
  	int removed = 0, split = 0;
  
  	if (list_empty(&sbinfo->shrinklist))
  		return SHRINK_STOP;
  
  	spin_lock(&sbinfo->shrinklist_lock);
  	list_for_each_safe(pos, next, &sbinfo->shrinklist) {
  		info = list_entry(pos, struct shmem_inode_info, shrinklist);
  
  		/* pin the inode */
  		inode = igrab(&info->vfs_inode);
  
  		/* inode is about to be evicted */
  		if (!inode) {
  			list_del_init(&info->shrinklist);
  			removed++;
  			goto next;
  		}
  
  		/* Check if there's anything to gain */
  		if (round_up(inode->i_size, PAGE_SIZE) ==
  				round_up(inode->i_size, HPAGE_PMD_SIZE)) {
253fd0f02   Kirill A. Shutemov   shmem: fix sleepi...
456
  			list_move(&info->shrinklist, &to_remove);
779750d20   Kirill A. Shutemov   shmem: split huge...
457
  			removed++;
779750d20   Kirill A. Shutemov   shmem: split huge...
458
459
460
461
462
463
464
465
466
  			goto next;
  		}
  
  		list_move(&info->shrinklist, &list);
  next:
  		if (!--batch)
  			break;
  	}
  	spin_unlock(&sbinfo->shrinklist_lock);
253fd0f02   Kirill A. Shutemov   shmem: fix sleepi...
467
468
469
470
471
472
  	list_for_each_safe(pos, next, &to_remove) {
  		info = list_entry(pos, struct shmem_inode_info, shrinklist);
  		inode = &info->vfs_inode;
  		list_del_init(&info->shrinklist);
  		iput(inode);
  	}
779750d20   Kirill A. Shutemov   shmem: split huge...
473
474
475
476
477
  	list_for_each_safe(pos, next, &list) {
  		int ret;
  
  		info = list_entry(pos, struct shmem_inode_info, shrinklist);
  		inode = &info->vfs_inode;
b3cd54b25   Kirill A. Shutemov   mm/shmem: do not ...
478
479
  		if (nr_to_split && split >= nr_to_split)
  			goto leave;
779750d20   Kirill A. Shutemov   shmem: split huge...
480

b3cd54b25   Kirill A. Shutemov   mm/shmem: do not ...
481
  		page = find_get_page(inode->i_mapping,
779750d20   Kirill A. Shutemov   shmem: split huge...
482
483
484
  				(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
  		if (!page)
  			goto drop;
b3cd54b25   Kirill A. Shutemov   mm/shmem: do not ...
485
  		/* No huge page at the end of the file: nothing to split */
779750d20   Kirill A. Shutemov   shmem: split huge...
486
  		if (!PageTransHuge(page)) {
779750d20   Kirill A. Shutemov   shmem: split huge...
487
488
489
  			put_page(page);
  			goto drop;
  		}
b3cd54b25   Kirill A. Shutemov   mm/shmem: do not ...
490
491
492
493
494
495
496
497
498
499
500
  		/*
  		 * Leave the inode on the list if we failed to lock
  		 * the page at this time.
  		 *
  		 * Waiting for the lock may lead to deadlock in the
  		 * reclaim path.
  		 */
  		if (!trylock_page(page)) {
  			put_page(page);
  			goto leave;
  		}
779750d20   Kirill A. Shutemov   shmem: split huge...
501
502
503
  		ret = split_huge_page(page);
  		unlock_page(page);
  		put_page(page);
b3cd54b25   Kirill A. Shutemov   mm/shmem: do not ...
504
505
506
  		/* If split failed leave the inode on the list */
  		if (ret)
  			goto leave;
779750d20   Kirill A. Shutemov   shmem: split huge...
507
508
509
510
511
  
  		split++;
  drop:
  		list_del_init(&info->shrinklist);
  		removed++;
b3cd54b25   Kirill A. Shutemov   mm/shmem: do not ...
512
  leave:
779750d20   Kirill A. Shutemov   shmem: split huge...
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
  		iput(inode);
  	}
  
  	spin_lock(&sbinfo->shrinklist_lock);
  	list_splice_tail(&list, &sbinfo->shrinklist);
  	sbinfo->shrinklist_len -= removed;
  	spin_unlock(&sbinfo->shrinklist_lock);
  
  	return split;
  }
  
  static long shmem_unused_huge_scan(struct super_block *sb,
  		struct shrink_control *sc)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  
  	if (!READ_ONCE(sbinfo->shrinklist_len))
  		return SHRINK_STOP;
  
  	return shmem_unused_huge_shrink(sbinfo, sc, 0);
  }
  
  static long shmem_unused_huge_count(struct super_block *sb,
  		struct shrink_control *sc)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	return READ_ONCE(sbinfo->shrinklist_len);
  }
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
541
  #else /* !CONFIG_TRANSPARENT_HUGE_PAGECACHE */
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
542
543
  
  #define shmem_huge SHMEM_HUGE_DENY
779750d20   Kirill A. Shutemov   shmem: split huge...
544
545
546
547
548
  static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
  		struct shrink_control *sc, unsigned long nr_to_split)
  {
  	return 0;
  }
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
549
  #endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
550

89fdcd262   Yang Shi   mm: shmem: make s...
551
552
553
554
555
556
557
558
  static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo)
  {
  	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
  	    (shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) &&
  	    shmem_huge != SHMEM_HUGE_DENY)
  		return true;
  	return false;
  }
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
559
  /*
46f65ec15   Hugh Dickins   tmpfs: convert sh...
560
561
562
563
   * Like add_to_page_cache_locked, but error if expected item has gone.
   */
  static int shmem_add_to_page_cache(struct page *page,
  				   struct address_space *mapping,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
564
  				   pgoff_t index, void *expected)
46f65ec15   Hugh Dickins   tmpfs: convert sh...
565
  {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
566
  	int error, nr = hpage_nr_pages(page);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
567

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
568
569
  	VM_BUG_ON_PAGE(PageTail(page), page);
  	VM_BUG_ON_PAGE(index != round_down(index, nr), page);
309381fea   Sasha Levin   mm: dump page whe...
570
571
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
572
  	VM_BUG_ON(expected && PageTransHuge(page));
46f65ec15   Hugh Dickins   tmpfs: convert sh...
573

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
574
  	page_ref_add(page, nr);
b065b4321   Hugh Dickins   shmem: cleanup sh...
575
576
  	page->mapping = mapping;
  	page->index = index;
b93b01631   Matthew Wilcox   page cache: use x...
577
  	xa_lock_irq(&mapping->i_pages);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
578
579
580
581
582
583
  	if (PageTransHuge(page)) {
  		void __rcu **results;
  		pgoff_t idx;
  		int i;
  
  		error = 0;
b93b01631   Matthew Wilcox   page cache: use x...
584
  		if (radix_tree_gang_lookup_slot(&mapping->i_pages,
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
585
586
587
588
589
590
591
  					&results, &idx, index, 1) &&
  				idx < index + HPAGE_PMD_NR) {
  			error = -EEXIST;
  		}
  
  		if (!error) {
  			for (i = 0; i < HPAGE_PMD_NR; i++) {
b93b01631   Matthew Wilcox   page cache: use x...
592
  				error = radix_tree_insert(&mapping->i_pages,
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
593
594
595
596
597
598
  						index + i, page + i);
  				VM_BUG_ON(error);
  			}
  			count_vm_event(THP_FILE_ALLOC);
  		}
  	} else if (!expected) {
b93b01631   Matthew Wilcox   page cache: use x...
599
  		error = radix_tree_insert(&mapping->i_pages, index, page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
600
  	} else {
b065b4321   Hugh Dickins   shmem: cleanup sh...
601
602
  		error = shmem_radix_tree_replace(mapping, index, expected,
  								 page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
603
  	}
46f65ec15   Hugh Dickins   tmpfs: convert sh...
604
  	if (!error) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
605
606
  		mapping->nrpages += nr;
  		if (PageTransHuge(page))
11fb99898   Mel Gorman   mm: move most fil...
607
608
609
  			__inc_node_page_state(page, NR_SHMEM_THPS);
  		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
  		__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
b93b01631   Matthew Wilcox   page cache: use x...
610
  		xa_unlock_irq(&mapping->i_pages);
b065b4321   Hugh Dickins   shmem: cleanup sh...
611
612
  	} else {
  		page->mapping = NULL;
b93b01631   Matthew Wilcox   page cache: use x...
613
  		xa_unlock_irq(&mapping->i_pages);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
614
  		page_ref_sub(page, nr);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
615
  	}
46f65ec15   Hugh Dickins   tmpfs: convert sh...
616
617
618
619
  	return error;
  }
  
  /*
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
620
621
622
623
624
625
   * Like delete_from_page_cache, but substitutes swap for page.
   */
  static void shmem_delete_from_page_cache(struct page *page, void *radswap)
  {
  	struct address_space *mapping = page->mapping;
  	int error;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
626
  	VM_BUG_ON_PAGE(PageCompound(page), page);
b93b01631   Matthew Wilcox   page cache: use x...
627
  	xa_lock_irq(&mapping->i_pages);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
628
629
630
  	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
  	page->mapping = NULL;
  	mapping->nrpages--;
11fb99898   Mel Gorman   mm: move most fil...
631
632
  	__dec_node_page_state(page, NR_FILE_PAGES);
  	__dec_node_page_state(page, NR_SHMEM);
b93b01631   Matthew Wilcox   page cache: use x...
633
  	xa_unlock_irq(&mapping->i_pages);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
634
  	put_page(page);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
635
636
637
638
  	BUG_ON(error);
  }
  
  /*
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
639
640
641
642
643
   * Remove swap entry from radix tree, free the swap and its page cache.
   */
  static int shmem_free_swap(struct address_space *mapping,
  			   pgoff_t index, void *radswap)
  {
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
644
  	void *old;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
645

b93b01631   Matthew Wilcox   page cache: use x...
646
647
648
  	xa_lock_irq(&mapping->i_pages);
  	old = radix_tree_delete_item(&mapping->i_pages, index, radswap);
  	xa_unlock_irq(&mapping->i_pages);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
649
650
651
652
  	if (old != radswap)
  		return -ENOENT;
  	free_swap_and_cache(radix_to_swp_entry(radswap));
  	return 0;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
653
654
655
  }
  
  /*
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
656
   * Determine (in bytes) how many of the shmem object's pages mapped by the
48131e03c   Vlastimil Babka   mm, proc: reduce ...
657
   * given offsets are swapped out.
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
658
   *
b93b01631   Matthew Wilcox   page cache: use x...
659
   * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
660
661
   * as long as the inode doesn't go away and racy results are not a problem.
   */
48131e03c   Vlastimil Babka   mm, proc: reduce ...
662
663
  unsigned long shmem_partial_swap_usage(struct address_space *mapping,
  						pgoff_t start, pgoff_t end)
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
664
  {
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
665
  	struct radix_tree_iter iter;
5b9c98f30   Mike Kravetz   mm/shmem: add __r...
666
  	void __rcu **slot;
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
667
  	struct page *page;
48131e03c   Vlastimil Babka   mm, proc: reduce ...
668
  	unsigned long swapped = 0;
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
669
670
  
  	rcu_read_lock();
b93b01631   Matthew Wilcox   page cache: use x...
671
  	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
672
673
674
675
  		if (iter.index >= end)
  			break;
  
  		page = radix_tree_deref_slot(slot);
2cf938aae   Matthew Wilcox   mm: use radix_tre...
676
677
678
679
  		if (radix_tree_deref_retry(page)) {
  			slot = radix_tree_iter_retry(&iter);
  			continue;
  		}
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
680
681
682
683
684
  
  		if (radix_tree_exceptional_entry(page))
  			swapped++;
  
  		if (need_resched()) {
148deab22   Matthew Wilcox   radix-tree: impro...
685
  			slot = radix_tree_iter_resume(slot, &iter);
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
686
  			cond_resched_rcu();
6a15a3709   Vlastimil Babka   mm, proc: reduce ...
687
688
689
690
691
692
693
694
695
  		}
  	}
  
  	rcu_read_unlock();
  
  	return swapped << PAGE_SHIFT;
  }
  
  /*
48131e03c   Vlastimil Babka   mm, proc: reduce ...
696
697
698
   * Determine (in bytes) how many of the shmem object's pages mapped by the
   * given vma is swapped out.
   *
b93b01631   Matthew Wilcox   page cache: use x...
699
   * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
48131e03c   Vlastimil Babka   mm, proc: reduce ...
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
   * as long as the inode doesn't go away and racy results are not a problem.
   */
  unsigned long shmem_swap_usage(struct vm_area_struct *vma)
  {
  	struct inode *inode = file_inode(vma->vm_file);
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	struct address_space *mapping = inode->i_mapping;
  	unsigned long swapped;
  
  	/* Be careful as we don't hold info->lock */
  	swapped = READ_ONCE(info->swapped);
  
  	/*
  	 * The easier cases are when the shmem object has nothing in swap, or
  	 * the vma maps it whole. Then we can simply use the stats that we
  	 * already track.
  	 */
  	if (!swapped)
  		return 0;
  
  	if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
  		return swapped << PAGE_SHIFT;
  
  	/* Here comes the more involved part */
  	return shmem_partial_swap_usage(mapping,
  			linear_page_index(vma, vma->vm_start),
  			linear_page_index(vma, vma->vm_end));
  }
  
  /*
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
730
731
732
733
734
735
736
   * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
   */
  void shmem_unlock_mapping(struct address_space *mapping)
  {
  	struct pagevec pvec;
  	pgoff_t indices[PAGEVEC_SIZE];
  	pgoff_t index = 0;
866798201   Mel Gorman   mm, pagevec: remo...
737
  	pagevec_init(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
738
739
740
741
742
743
744
745
  	/*
  	 * Minor point, but we might as well stop if someone else SHM_LOCKs it.
  	 */
  	while (!mapping_unevictable(mapping)) {
  		/*
  		 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
  		 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
  		 */
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
746
747
  		pvec.nr = find_get_entries(mapping, index,
  					   PAGEVEC_SIZE, pvec.pages, indices);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
748
749
750
  		if (!pvec.nr)
  			break;
  		index = indices[pvec.nr - 1] + 1;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
751
  		pagevec_remove_exceptionals(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
752
753
754
755
  		check_move_unevictable_pages(pvec.pages, pvec.nr);
  		pagevec_release(&pvec);
  		cond_resched();
  	}
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
756
757
758
759
  }
  
  /*
   * Remove range of pages and swap entries from radix tree, and free them.
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
760
   * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
761
   */
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
762
763
  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
  								 bool unfalloc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764
  {
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
765
  	struct address_space *mapping = inode->i_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
766
  	struct shmem_inode_info *info = SHMEM_I(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
767
768
769
770
  	pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	pgoff_t end = (lend + 1) >> PAGE_SHIFT;
  	unsigned int partial_start = lstart & (PAGE_SIZE - 1);
  	unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
771
  	struct pagevec pvec;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
772
773
  	pgoff_t indices[PAGEVEC_SIZE];
  	long nr_swaps_freed = 0;
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
774
  	pgoff_t index;
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
775
  	int i;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
776
777
  	if (lend == -1)
  		end = -1;	/* unsigned, so actually very big */
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
778

866798201   Mel Gorman   mm, pagevec: remo...
779
  	pagevec_init(&pvec);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
780
  	index = start;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
781
  	while (index < end) {
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
782
783
784
  		pvec.nr = find_get_entries(mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE),
  			pvec.pages, indices);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
785
786
  		if (!pvec.nr)
  			break;
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
787
788
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
789
  			index = indices[i];
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
790
  			if (index >= end)
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
791
  				break;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
792
  			if (radix_tree_exceptional_entry(page)) {
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
793
794
  				if (unfalloc)
  					continue;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
795
796
  				nr_swaps_freed += !shmem_free_swap(mapping,
  								index, page);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
797
  				continue;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
798
  			}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
799
  			VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
800
  			if (!trylock_page(page))
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
801
  				continue;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
  
  			if (PageTransTail(page)) {
  				/* Middle of THP: zero out the page */
  				clear_highpage(page);
  				unlock_page(page);
  				continue;
  			} else if (PageTransHuge(page)) {
  				if (index == round_down(end, HPAGE_PMD_NR)) {
  					/*
  					 * Range ends in the middle of THP:
  					 * zero out the page
  					 */
  					clear_highpage(page);
  					unlock_page(page);
  					continue;
  				}
  				index += HPAGE_PMD_NR - 1;
  				i += HPAGE_PMD_NR - 1;
  			}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
821
  			if (!unfalloc || !PageUptodate(page)) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
822
823
  				VM_BUG_ON_PAGE(PageTail(page), page);
  				if (page_mapping(page) == mapping) {
309381fea   Sasha Levin   mm: dump page whe...
824
  					VM_BUG_ON_PAGE(PageWriteback(page), page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
825
826
  					truncate_inode_page(mapping, page);
  				}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
827
  			}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
828
829
  			unlock_page(page);
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
830
  		pagevec_remove_exceptionals(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
831
  		pagevec_release(&pvec);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
832
833
834
  		cond_resched();
  		index++;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
835

83e4fa9c1   Hugh Dickins   tmpfs: support fa...
836
  	if (partial_start) {
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
837
  		struct page *page = NULL;
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
838
  		shmem_getpage(inode, start - 1, &page, SGP_READ);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
839
  		if (page) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
840
  			unsigned int top = PAGE_SIZE;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
841
842
843
844
845
846
847
  			if (start > end) {
  				top = partial_end;
  				partial_end = 0;
  			}
  			zero_user_segment(page, partial_start, top);
  			set_page_dirty(page);
  			unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
848
  			put_page(page);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
849
850
851
852
  		}
  	}
  	if (partial_end) {
  		struct page *page = NULL;
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
853
  		shmem_getpage(inode, end, &page, SGP_READ);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
854
855
  		if (page) {
  			zero_user_segment(page, 0, partial_end);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
856
857
  			set_page_dirty(page);
  			unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
858
  			put_page(page);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
859
860
  		}
  	}
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
861
862
  	if (start >= end)
  		return;
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
863
864
  
  	index = start;
b1a366500   Hugh Dickins   shmem: fix splici...
865
  	while (index < end) {
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
866
  		cond_resched();
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
867
868
  
  		pvec.nr = find_get_entries(mapping, index,
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
869
  				min(end - index, (pgoff_t)PAGEVEC_SIZE),
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
870
  				pvec.pages, indices);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
871
  		if (!pvec.nr) {
b1a366500   Hugh Dickins   shmem: fix splici...
872
873
  			/* If all gone or hole-punch or unfalloc, we're done */
  			if (index == start || end != -1)
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
874
  				break;
b1a366500   Hugh Dickins   shmem: fix splici...
875
  			/* But if truncating, restart to make sure all gone */
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
876
877
878
  			index = start;
  			continue;
  		}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
879
880
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
881
  			index = indices[i];
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
882
  			if (index >= end)
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
883
  				break;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
884
  			if (radix_tree_exceptional_entry(page)) {
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
885
886
  				if (unfalloc)
  					continue;
b1a366500   Hugh Dickins   shmem: fix splici...
887
888
889
890
891
892
  				if (shmem_free_swap(mapping, index, page)) {
  					/* Swap was replaced by page: retry */
  					index--;
  					break;
  				}
  				nr_swaps_freed++;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
893
894
  				continue;
  			}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
895
  			lock_page(page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
  
  			if (PageTransTail(page)) {
  				/* Middle of THP: zero out the page */
  				clear_highpage(page);
  				unlock_page(page);
  				/*
  				 * Partial thp truncate due 'start' in middle
  				 * of THP: don't need to look on these pages
  				 * again on !pvec.nr restart.
  				 */
  				if (index != round_down(end, HPAGE_PMD_NR))
  					start++;
  				continue;
  			} else if (PageTransHuge(page)) {
  				if (index == round_down(end, HPAGE_PMD_NR)) {
  					/*
  					 * Range ends in the middle of THP:
  					 * zero out the page
  					 */
  					clear_highpage(page);
  					unlock_page(page);
  					continue;
  				}
  				index += HPAGE_PMD_NR - 1;
  				i += HPAGE_PMD_NR - 1;
  			}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
922
  			if (!unfalloc || !PageUptodate(page)) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
923
924
  				VM_BUG_ON_PAGE(PageTail(page), page);
  				if (page_mapping(page) == mapping) {
309381fea   Sasha Levin   mm: dump page whe...
925
  					VM_BUG_ON_PAGE(PageWriteback(page), page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
926
  					truncate_inode_page(mapping, page);
b1a366500   Hugh Dickins   shmem: fix splici...
927
928
929
930
931
  				} else {
  					/* Page was replaced by swap: retry */
  					unlock_page(page);
  					index--;
  					break;
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
932
  				}
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
933
  			}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
934
935
  			unlock_page(page);
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
936
  		pagevec_remove_exceptionals(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
937
  		pagevec_release(&pvec);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
938
939
  		index++;
  	}
94c1e62df   Hugh Dickins   tmpfs: take contr...
940

4595ef88d   Kirill A. Shutemov   shmem: make shmem...
941
  	spin_lock_irq(&info->lock);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
942
  	info->swapped -= nr_swaps_freed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
943
  	shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
944
  	spin_unlock_irq(&info->lock);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
945
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
946

1635f6a74   Hugh Dickins   tmpfs: undo fallo...
947
948
949
  void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
  {
  	shmem_undo_range(inode, lstart, lend, false);
078cd8279   Deepa Dinamani   fs: Replace CURRE...
950
  	inode->i_ctime = inode->i_mtime = current_time(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
951
  }
94c1e62df   Hugh Dickins   tmpfs: take contr...
952
  EXPORT_SYMBOL_GPL(shmem_truncate_range);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
953

a528d35e8   David Howells   statx: Add a syst...
954
955
  static int shmem_getattr(const struct path *path, struct kstat *stat,
  			 u32 request_mask, unsigned int query_flags)
44a30220b   Yu Zhao   shmem: recalculat...
956
  {
a528d35e8   David Howells   statx: Add a syst...
957
  	struct inode *inode = path->dentry->d_inode;
44a30220b   Yu Zhao   shmem: recalculat...
958
  	struct shmem_inode_info *info = SHMEM_I(inode);
89fdcd262   Yang Shi   mm: shmem: make s...
959
  	struct shmem_sb_info *sb_info = SHMEM_SB(inode->i_sb);
44a30220b   Yu Zhao   shmem: recalculat...
960

d0424c429   Hugh Dickins   tmpfs: avoid a li...
961
  	if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
962
  		spin_lock_irq(&info->lock);
d0424c429   Hugh Dickins   tmpfs: avoid a li...
963
  		shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
964
  		spin_unlock_irq(&info->lock);
d0424c429   Hugh Dickins   tmpfs: avoid a li...
965
  	}
44a30220b   Yu Zhao   shmem: recalculat...
966
  	generic_fillattr(inode, stat);
89fdcd262   Yang Shi   mm: shmem: make s...
967
968
969
  
  	if (is_huge_enabled(sb_info))
  		stat->blksize = HPAGE_PMD_SIZE;
44a30220b   Yu Zhao   shmem: recalculat...
970
971
  	return 0;
  }
94c1e62df   Hugh Dickins   tmpfs: take contr...
972
  static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
973
  {
75c3cfa85   David Howells   VFS: assorted wei...
974
  	struct inode *inode = d_inode(dentry);
40e041a2c   David Herrmann   shm: add sealing API
975
  	struct shmem_inode_info *info = SHMEM_I(inode);
779750d20   Kirill A. Shutemov   shmem: split huge...
976
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
977
  	int error;
31051c85b   Jan Kara   fs: Give dentry t...
978
  	error = setattr_prepare(dentry, attr);
db78b877f   Christoph Hellwig   always call inode...
979
980
  	if (error)
  		return error;
94c1e62df   Hugh Dickins   tmpfs: take contr...
981
982
983
  	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
  		loff_t oldsize = inode->i_size;
  		loff_t newsize = attr->ia_size;
3889e6e76   npiggin@suse.de   tmpfs: convert to...
984

40e041a2c   David Herrmann   shm: add sealing API
985
986
987
988
  		/* protected by i_mutex */
  		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
  		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
  			return -EPERM;
94c1e62df   Hugh Dickins   tmpfs: take contr...
989
  		if (newsize != oldsize) {
771425179   Konstantin Khlebnikov   shmem: update mem...
990
991
992
993
  			error = shmem_reacct_size(SHMEM_I(inode)->flags,
  					oldsize, newsize);
  			if (error)
  				return error;
94c1e62df   Hugh Dickins   tmpfs: take contr...
994
  			i_size_write(inode, newsize);
078cd8279   Deepa Dinamani   fs: Replace CURRE...
995
  			inode->i_ctime = inode->i_mtime = current_time(inode);
94c1e62df   Hugh Dickins   tmpfs: take contr...
996
  		}
afa2db2fb   Josef Bacik   tmpfs: truncate p...
997
  		if (newsize <= oldsize) {
94c1e62df   Hugh Dickins   tmpfs: take contr...
998
  			loff_t holebegin = round_up(newsize, PAGE_SIZE);
d0424c429   Hugh Dickins   tmpfs: avoid a li...
999
1000
1001
1002
1003
1004
  			if (oldsize > holebegin)
  				unmap_mapping_range(inode->i_mapping,
  							holebegin, 0, 1);
  			if (info->alloced)
  				shmem_truncate_range(inode,
  							newsize, (loff_t)-1);
94c1e62df   Hugh Dickins   tmpfs: take contr...
1005
  			/* unmap again to remove racily COWed private pages */
d0424c429   Hugh Dickins   tmpfs: avoid a li...
1006
1007
1008
  			if (oldsize > holebegin)
  				unmap_mapping_range(inode->i_mapping,
  							holebegin, 0, 1);
779750d20   Kirill A. Shutemov   shmem: split huge...
1009
1010
1011
1012
1013
1014
1015
  
  			/*
  			 * Part of the huge page can be beyond i_size: subject
  			 * to shrink under memory pressure.
  			 */
  			if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
  				spin_lock(&sbinfo->shrinklist_lock);
d041353dc   Cong Wang   mm: fix list corr...
1016
1017
1018
1019
1020
  				/*
  				 * _careful to defend against unlocked access to
  				 * ->shrink_list in shmem_unused_huge_shrink()
  				 */
  				if (list_empty_careful(&info->shrinklist)) {
779750d20   Kirill A. Shutemov   shmem: split huge...
1021
1022
1023
1024
1025
1026
  					list_add_tail(&info->shrinklist,
  							&sbinfo->shrinklist);
  					sbinfo->shrinklist_len++;
  				}
  				spin_unlock(&sbinfo->shrinklist_lock);
  			}
94c1e62df   Hugh Dickins   tmpfs: take contr...
1027
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1028
  	}
db78b877f   Christoph Hellwig   always call inode...
1029
  	setattr_copy(inode, attr);
db78b877f   Christoph Hellwig   always call inode...
1030
  	if (attr->ia_valid & ATTR_MODE)
feda821e7   Christoph Hellwig   fs: remove generi...
1031
  		error = posix_acl_chmod(inode, inode->i_mode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1032
1033
  	return error;
  }
1f895f75d   Al Viro   switch shmem.c to...
1034
  static void shmem_evict_inode(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1035
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1036
  	struct shmem_inode_info *info = SHMEM_I(inode);
779750d20   Kirill A. Shutemov   shmem: split huge...
1037
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1038

3889e6e76   npiggin@suse.de   tmpfs: convert to...
1039
  	if (inode->i_mapping->a_ops == &shmem_aops) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1040
1041
  		shmem_unacct_size(info->flags, inode->i_size);
  		inode->i_size = 0;
3889e6e76   npiggin@suse.de   tmpfs: convert to...
1042
  		shmem_truncate_range(inode, 0, (loff_t)-1);
779750d20   Kirill A. Shutemov   shmem: split huge...
1043
1044
1045
1046
1047
1048
1049
1050
  		if (!list_empty(&info->shrinklist)) {
  			spin_lock(&sbinfo->shrinklist_lock);
  			if (!list_empty(&info->shrinklist)) {
  				list_del_init(&info->shrinklist);
  				sbinfo->shrinklist_len--;
  			}
  			spin_unlock(&sbinfo->shrinklist_lock);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1051
  		if (!list_empty(&info->swaplist)) {
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
1052
  			mutex_lock(&shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1053
  			list_del_init(&info->swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
1054
  			mutex_unlock(&shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1055
  		}
3ed47db34   Al Viro   make sure that fr...
1056
  	}
b09e0fa4b   Eric Paris   tmpfs: implement ...
1057

38f386574   Aristeu Rozanski   xattr: extract si...
1058
  	simple_xattrs_free(&info->xattrs);
0f3c42f52   Hugh Dickins   tmpfs: change fin...
1059
  	WARN_ON(inode->i_blocks);
5b04c6890   Pavel Emelyanov   shmem: factor out...
1060
  	shmem_free_inode(inode->i_sb);
dbd5768f8   Jan Kara   vfs: Rename end_w...
1061
  	clear_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1062
  }
478922e2b   Matthew Wilcox   radix-tree: delet...
1063
1064
1065
  static unsigned long find_swap_entry(struct radix_tree_root *root, void *item)
  {
  	struct radix_tree_iter iter;
5b9c98f30   Mike Kravetz   mm/shmem: add __r...
1066
  	void __rcu **slot;
478922e2b   Matthew Wilcox   radix-tree: delet...
1067
1068
1069
1070
1071
  	unsigned long found = -1;
  	unsigned int checked = 0;
  
  	rcu_read_lock();
  	radix_tree_for_each_slot(slot, root, &iter, 0) {
5b9c98f30   Mike Kravetz   mm/shmem: add __r...
1072
1073
1074
1075
1076
1077
1078
  		void *entry = radix_tree_deref_slot(slot);
  
  		if (radix_tree_deref_retry(entry)) {
  			slot = radix_tree_iter_retry(&iter);
  			continue;
  		}
  		if (entry == item) {
478922e2b   Matthew Wilcox   radix-tree: delet...
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
  			found = iter.index;
  			break;
  		}
  		checked++;
  		if ((checked % 4096) != 0)
  			continue;
  		slot = radix_tree_iter_resume(slot, &iter);
  		cond_resched_rcu();
  	}
  
  	rcu_read_unlock();
  	return found;
  }
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1092
1093
1094
  /*
   * If swap found in inode, free it and move page from swapcache to filecache.
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1095
  static int shmem_unuse_inode(struct shmem_inode_info *info,
bde05d1cc   Hugh Dickins   shmem: replace pa...
1096
  			     swp_entry_t swap, struct page **pagep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1097
  {
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
1098
  	struct address_space *mapping = info->vfs_inode.i_mapping;
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1099
  	void *radswap;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1100
  	pgoff_t index;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1101
1102
  	gfp_t gfp;
  	int error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1103

46f65ec15   Hugh Dickins   tmpfs: convert sh...
1104
  	radswap = swp_to_radix_entry(swap);
b93b01631   Matthew Wilcox   page cache: use x...
1105
  	index = find_swap_entry(&mapping->i_pages, radswap);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1106
  	if (index == -1)
00501b531   Johannes Weiner   mm: memcontrol: r...
1107
  		return -EAGAIN;	/* tell shmem_unuse we found nothing */
2e0e26c76   Hugh Dickins   tmpfs: open a win...
1108

1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
1109
1110
  	/*
  	 * Move _head_ to start search for next from here.
1f895f75d   Al Viro   switch shmem.c to...
1111
  	 * But be careful: shmem_evict_inode checks list_empty without taking
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
1112
  	 * mutex, and there's an instant in list_move_tail when info->swaplist
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
1113
  	 * would appear empty, if it were the only one on shmem_swaplist.
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
1114
1115
1116
  	 */
  	if (shmem_swaplist.next != &info->swaplist)
  		list_move_tail(&shmem_swaplist, &info->swaplist);
2e0e26c76   Hugh Dickins   tmpfs: open a win...
1117

bde05d1cc   Hugh Dickins   shmem: replace pa...
1118
1119
1120
1121
1122
1123
1124
  	gfp = mapping_gfp_mask(mapping);
  	if (shmem_should_replace_page(*pagep, gfp)) {
  		mutex_unlock(&shmem_swaplist_mutex);
  		error = shmem_replace_page(pagep, gfp, info, index);
  		mutex_lock(&shmem_swaplist_mutex);
  		/*
  		 * We needed to drop mutex to make that restrictive page
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
  		 * allocation, but the inode might have been freed while we
  		 * dropped it: although a racing shmem_evict_inode() cannot
  		 * complete without emptying the radix_tree, our page lock
  		 * on this swapcache page is not enough to prevent that -
  		 * free_swap_and_cache() of our swap entry will only
  		 * trylock_page(), removing swap from radix_tree whatever.
  		 *
  		 * We must not proceed to shmem_add_to_page_cache() if the
  		 * inode has been freed, but of course we cannot rely on
  		 * inode or mapping or info to check that.  However, we can
  		 * safely check if our swap entry is still in use (and here
  		 * it can't have got reused for another page): if it's still
  		 * in use, then the inode cannot have been freed yet, and we
  		 * can safely proceed (if it's no longer in use, that tells
  		 * nothing about the inode, but we don't need to unuse swap).
bde05d1cc   Hugh Dickins   shmem: replace pa...
1140
1141
1142
1143
  		 */
  		if (!page_swapcount(*pagep))
  			error = -ENOENT;
  	}
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1144
  	/*
778dd893a   Hugh Dickins   tmpfs: fix race b...
1145
1146
1147
  	 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
  	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
  	 * beneath us (pagelock doesn't help until the page is in pagecache).
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1148
  	 */
bde05d1cc   Hugh Dickins   shmem: replace pa...
1149
1150
  	if (!error)
  		error = shmem_add_to_page_cache(*pagep, mapping, index,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
1151
  						radswap);
48f170fb7   Hugh Dickins   tmpfs: simplify u...
1152
  	if (error != -ENOMEM) {
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1153
1154
1155
1156
  		/*
  		 * Truncation and eviction use free_swap_and_cache(), which
  		 * only does trylock page: if we raced, best clean up here.
  		 */
bde05d1cc   Hugh Dickins   shmem: replace pa...
1157
1158
  		delete_from_swap_cache(*pagep);
  		set_page_dirty(*pagep);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1159
  		if (!error) {
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1160
  			spin_lock_irq(&info->lock);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1161
  			info->swapped--;
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1162
  			spin_unlock_irq(&info->lock);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1163
1164
  			swap_free(swap);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1165
  	}
2e0e26c76   Hugh Dickins   tmpfs: open a win...
1166
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1167
1168
1169
  }
  
  /*
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1170
   * Search through swapped inodes to find and replace swap by page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1171
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1172
  int shmem_unuse(swp_entry_t swap, struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1173
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1174
  	struct list_head *this, *next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175
  	struct shmem_inode_info *info;
00501b531   Johannes Weiner   mm: memcontrol: r...
1176
  	struct mem_cgroup *memcg;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1177
1178
1179
1180
  	int error = 0;
  
  	/*
  	 * There's a faint possibility that swap page was replaced before
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1181
  	 * caller locked it: caller will come back later with the right page.
bde05d1cc   Hugh Dickins   shmem: replace pa...
1182
  	 */
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1183
  	if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
bde05d1cc   Hugh Dickins   shmem: replace pa...
1184
  		goto out;
778dd893a   Hugh Dickins   tmpfs: fix race b...
1185
1186
1187
1188
1189
  
  	/*
  	 * Charge page using GFP_KERNEL while we can wait, before taking
  	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
  	 * Charged back to the user (not to caller) when swap account is used.
778dd893a   Hugh Dickins   tmpfs: fix race b...
1190
  	 */
2cf855837   Tejun Heo   memcontrol: sched...
1191
1192
  	error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL,
  					    &memcg, false);
778dd893a   Hugh Dickins   tmpfs: fix race b...
1193
1194
  	if (error)
  		goto out;
46f65ec15   Hugh Dickins   tmpfs: convert sh...
1195
  	/* No radix_tree_preload: swap entry keeps a place for page in tree */
00501b531   Johannes Weiner   mm: memcontrol: r...
1196
  	error = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1197

cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
1198
  	mutex_lock(&shmem_swaplist_mutex);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1199
1200
  	list_for_each_safe(this, next, &shmem_swaplist) {
  		info = list_entry(this, struct shmem_inode_info, swaplist);
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
1201
  		if (info->swapped)
00501b531   Johannes Weiner   mm: memcontrol: r...
1202
  			error = shmem_unuse_inode(info, swap, &page);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1203
1204
  		else
  			list_del_init(&info->swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
1205
  		cond_resched();
00501b531   Johannes Weiner   mm: memcontrol: r...
1206
  		if (error != -EAGAIN)
778dd893a   Hugh Dickins   tmpfs: fix race b...
1207
  			break;
00501b531   Johannes Weiner   mm: memcontrol: r...
1208
  		/* found nothing in this: move on to search the next */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1209
  	}
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
1210
  	mutex_unlock(&shmem_swaplist_mutex);
778dd893a   Hugh Dickins   tmpfs: fix race b...
1211

00501b531   Johannes Weiner   mm: memcontrol: r...
1212
1213
1214
  	if (error) {
  		if (error != -ENOMEM)
  			error = 0;
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1215
  		mem_cgroup_cancel_charge(page, memcg, false);
00501b531   Johannes Weiner   mm: memcontrol: r...
1216
  	} else
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1217
  		mem_cgroup_commit_charge(page, memcg, true, false);
778dd893a   Hugh Dickins   tmpfs: fix race b...
1218
  out:
aaa468653   Hugh Dickins   swap_info: note S...
1219
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1220
  	put_page(page);
778dd893a   Hugh Dickins   tmpfs: fix race b...
1221
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1222
1223
1224
1225
1226
1227
1228
1229
  }
  
  /*
   * Move the page from the page cache to the swap cache.
   */
  static int shmem_writepage(struct page *page, struct writeback_control *wbc)
  {
  	struct shmem_inode_info *info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1230
  	struct address_space *mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1231
  	struct inode *inode;
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1232
1233
  	swp_entry_t swap;
  	pgoff_t index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1234

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1235
  	VM_BUG_ON_PAGE(PageCompound(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1236
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1237
1238
1239
1240
1241
1242
  	mapping = page->mapping;
  	index = page->index;
  	inode = mapping->host;
  	info = SHMEM_I(inode);
  	if (info->flags & VM_LOCKED)
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1243
  	if (!total_swap_pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1244
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1245
  	/*
97b713ba3   Christoph Hellwig   fs: kill BDI_CAP_...
1246
1247
1248
1249
1250
  	 * Our capabilities prevent regular writeback or sync from ever calling
  	 * shmem_writepage; but a stacking filesystem might use ->writepage of
  	 * its underlying filesystem, in which case tmpfs should write out to
  	 * swap only in response to memory pressure, and not for the writeback
  	 * threads or sync.
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1251
  	 */
48f170fb7   Hugh Dickins   tmpfs: simplify u...
1252
1253
1254
1255
  	if (!wbc->for_reclaim) {
  		WARN_ON_ONCE(1);	/* Still happens? Tell us about it! */
  		goto redirty;
  	}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1256
1257
1258
1259
1260
  
  	/*
  	 * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
  	 * value into swapfile.c, the only way we can correctly account for a
  	 * fallocated page arriving here is now to initialize it and write it.
1aac14003   Hugh Dickins   tmpfs: quit when ...
1261
1262
1263
1264
1265
1266
  	 *
  	 * That's okay for a page already fallocated earlier, but if we have
  	 * not yet completed the fallocation, then (a) we want to keep track
  	 * of this page in case we have to undo it, and (b) it may not be a
  	 * good idea to continue anyway, once we're pushing into swap.  So
  	 * reactivate the page, and let shmem_fallocate() quit when too many.
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1267
1268
  	 */
  	if (!PageUptodate(page)) {
1aac14003   Hugh Dickins   tmpfs: quit when ...
1269
1270
1271
1272
1273
  		if (inode->i_private) {
  			struct shmem_falloc *shmem_falloc;
  			spin_lock(&inode->i_lock);
  			shmem_falloc = inode->i_private;
  			if (shmem_falloc &&
8e205f779   Hugh Dickins   shmem: fix faulti...
1274
  			    !shmem_falloc->waitq &&
1aac14003   Hugh Dickins   tmpfs: quit when ...
1275
1276
1277
1278
1279
1280
1281
1282
1283
  			    index >= shmem_falloc->start &&
  			    index < shmem_falloc->next)
  				shmem_falloc->nr_unswapped++;
  			else
  				shmem_falloc = NULL;
  			spin_unlock(&inode->i_lock);
  			if (shmem_falloc)
  				goto redirty;
  		}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1284
1285
1286
1287
  		clear_highpage(page);
  		flush_dcache_page(page);
  		SetPageUptodate(page);
  	}
38d8b4e6b   Huang Ying   mm, THP, swap: de...
1288
  	swap = get_swap_page(page);
48f170fb7   Hugh Dickins   tmpfs: simplify u...
1289
1290
  	if (!swap.val)
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1291

b1dea800a   Hugh Dickins   tmpfs: fix race b...
1292
1293
  	/*
  	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1294
1295
  	 * if it's not already there.  Do it now before the page is
  	 * moved to swap cache, when its pagelock no longer protects
b1dea800a   Hugh Dickins   tmpfs: fix race b...
1296
  	 * the inode from eviction.  But don't unlock the mutex until
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1297
1298
  	 * we've incremented swapped, because shmem_unuse_inode() will
  	 * prune a !swapped inode from the swaplist under this mutex.
b1dea800a   Hugh Dickins   tmpfs: fix race b...
1299
  	 */
48f170fb7   Hugh Dickins   tmpfs: simplify u...
1300
1301
1302
  	mutex_lock(&shmem_swaplist_mutex);
  	if (list_empty(&info->swaplist))
  		list_add_tail(&info->swaplist, &shmem_swaplist);
b1dea800a   Hugh Dickins   tmpfs: fix race b...
1303

48f170fb7   Hugh Dickins   tmpfs: simplify u...
1304
  	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1305
  		spin_lock_irq(&info->lock);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1306
  		shmem_recalc_inode(inode);
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1307
  		info->swapped++;
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1308
  		spin_unlock_irq(&info->lock);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1309

267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1310
1311
  		swap_shmem_alloc(swap);
  		shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1312
  		mutex_unlock(&shmem_swaplist_mutex);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1313
  		BUG_ON(page_mapped(page));
9fab5619b   Hugh Dickins   shmem: writepage ...
1314
  		swap_writepage(page, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1315
1316
  		return 0;
  	}
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
1317
  	mutex_unlock(&shmem_swaplist_mutex);
75f6d6d29   Minchan Kim   mm, THP, swap: un...
1318
  	put_swap_page(page, swap);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1319
1320
  redirty:
  	set_page_dirty(page);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1321
1322
1323
1324
  	if (wbc->for_reclaim)
  		return AOP_WRITEPAGE_ACTIVATE;	/* Return with page locked */
  	unlock_page(page);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1325
  }
75edd345e   Hugh Dickins   tmpfs: preliminar...
1326
  #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1327
  static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
680d794ba   akpm@linux-foundation.org   mount options: fi...
1328
  {
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
1329
  	char buffer[64];
680d794ba   akpm@linux-foundation.org   mount options: fi...
1330

71fe804b6   Lee Schermerhorn   mempolicy: use st...
1331
  	if (!mpol || mpol->mode == MPOL_DEFAULT)
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
1332
  		return;		/* show nothing */
680d794ba   akpm@linux-foundation.org   mount options: fi...
1333

a7a88b237   Hugh Dickins   mempolicy: remove...
1334
  	mpol_to_str(buffer, sizeof(buffer), mpol);
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
1335
1336
  
  	seq_printf(seq, ",mpol=%s", buffer);
680d794ba   akpm@linux-foundation.org   mount options: fi...
1337
  }
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
  
  static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  {
  	struct mempolicy *mpol = NULL;
  	if (sbinfo->mpol) {
  		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
  		mpol = sbinfo->mpol;
  		mpol_get(mpol);
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	return mpol;
  }
75edd345e   Hugh Dickins   tmpfs: preliminar...
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
  #else /* !CONFIG_NUMA || !CONFIG_TMPFS */
  static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
  {
  }
  static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  {
  	return NULL;
  }
  #endif /* CONFIG_NUMA && CONFIG_TMPFS */
  #ifndef CONFIG_NUMA
  #define vm_policy vm_private_data
  #endif
680d794ba   akpm@linux-foundation.org   mount options: fi...
1362

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1363
1364
1365
1366
  static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
  		struct shmem_inode_info *info, pgoff_t index)
  {
  	/* Create a pseudo vma that just contains the policy */
2c4541e24   Kirill A. Shutemov   mm: use vma_init(...
1367
  	vma_init(vma, NULL);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1368
1369
  	/* Bias interleave by inode number to distribute better across nodes */
  	vma->vm_pgoff = index + info->vfs_inode.i_ino;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1370
1371
1372
1373
1374
1375
1376
1377
  	vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
  }
  
  static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma)
  {
  	/* Drop reference taken by mpol_shared_policy_lookup() */
  	mpol_cond_put(vma->vm_policy);
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1378
1379
  static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
  			struct shmem_inode_info *info, pgoff_t index)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1380
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1381
  	struct vm_area_struct pvma;
18a2f371f   Mel Gorman   tmpfs: fix shared...
1382
  	struct page *page;
e9e9b7ece   Minchan Kim   mm: swap: unify c...
1383
  	struct vm_fault vmf;
52cd3b074   Lee Schermerhorn   mempolicy: rework...
1384

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1385
  	shmem_pseudo_vma_init(&pvma, info, index);
e9e9b7ece   Minchan Kim   mm: swap: unify c...
1386
1387
1388
  	vmf.vma = &pvma;
  	vmf.address = 0;
  	page = swap_cluster_readahead(swap, gfp, &vmf);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1389
  	shmem_pseudo_vma_destroy(&pvma);
18a2f371f   Mel Gorman   tmpfs: fix shared...
1390

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1391
1392
1393
1394
1395
1396
1397
1398
1399
  	return page;
  }
  
  static struct page *shmem_alloc_hugepage(gfp_t gfp,
  		struct shmem_inode_info *info, pgoff_t index)
  {
  	struct vm_area_struct pvma;
  	struct inode *inode = &info->vfs_inode;
  	struct address_space *mapping = inode->i_mapping;
4620a06e4   Geert Uytterhoeven   shmem: Fix link e...
1400
  	pgoff_t idx, hindex;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1401
1402
  	void __rcu **results;
  	struct page *page;
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
1403
  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1404
  		return NULL;
4620a06e4   Geert Uytterhoeven   shmem: Fix link e...
1405
  	hindex = round_down(index, HPAGE_PMD_NR);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1406
  	rcu_read_lock();
b93b01631   Matthew Wilcox   page cache: use x...
1407
  	if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx,
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1408
1409
1410
1411
1412
  				hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
  		rcu_read_unlock();
  		return NULL;
  	}
  	rcu_read_unlock();
18a2f371f   Mel Gorman   tmpfs: fix shared...
1413

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1414
1415
1416
1417
1418
1419
  	shmem_pseudo_vma_init(&pvma, info, hindex);
  	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
  			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
  	shmem_pseudo_vma_destroy(&pvma);
  	if (page)
  		prep_transhuge_page(page);
18a2f371f   Mel Gorman   tmpfs: fix shared...
1420
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1421
  }
02098feaa   Hugh Dickins   swapin needs gfp_...
1422
  static struct page *shmem_alloc_page(gfp_t gfp,
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1423
  			struct shmem_inode_info *info, pgoff_t index)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1424
1425
  {
  	struct vm_area_struct pvma;
18a2f371f   Mel Gorman   tmpfs: fix shared...
1426
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1427

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1428
1429
1430
1431
1432
1433
1434
1435
  	shmem_pseudo_vma_init(&pvma, info, index);
  	page = alloc_page_vma(gfp, &pvma, 0);
  	shmem_pseudo_vma_destroy(&pvma);
  
  	return page;
  }
  
  static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
0f0796945   Mike Rapoport   shmem: introduce ...
1436
  		struct inode *inode,
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1437
1438
  		pgoff_t index, bool huge)
  {
0f0796945   Mike Rapoport   shmem: introduce ...
1439
  	struct shmem_inode_info *info = SHMEM_I(inode);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1440
1441
1442
  	struct page *page;
  	int nr;
  	int err = -ENOSPC;
52cd3b074   Lee Schermerhorn   mempolicy: rework...
1443

e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
1444
  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1445
1446
  		huge = false;
  	nr = huge ? HPAGE_PMD_NR : 1;
0f0796945   Mike Rapoport   shmem: introduce ...
1447
  	if (!shmem_inode_acct_block(inode, nr))
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1448
  		goto failed;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1449
1450
1451
1452
1453
  
  	if (huge)
  		page = shmem_alloc_hugepage(gfp, info, index);
  	else
  		page = shmem_alloc_page(gfp, info, index);
75edd345e   Hugh Dickins   tmpfs: preliminar...
1454
1455
1456
  	if (page) {
  		__SetPageLocked(page);
  		__SetPageSwapBacked(page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1457
  		return page;
75edd345e   Hugh Dickins   tmpfs: preliminar...
1458
  	}
18a2f371f   Mel Gorman   tmpfs: fix shared...
1459

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1460
  	err = -ENOMEM;
0f0796945   Mike Rapoport   shmem: introduce ...
1461
  	shmem_inode_unacct_blocks(inode, nr);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1462
1463
  failed:
  	return ERR_PTR(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1464
  }
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1465

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1466
  /*
bde05d1cc   Hugh Dickins   shmem: replace pa...
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
   * When a page is moved from swapcache to shmem filecache (either by the
   * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
   * shmem_unuse_inode()), it may have been read in earlier from swap, in
   * ignorance of the mapping it belongs to.  If that mapping has special
   * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
   * we may need to copy to a suitable page before moving to filecache.
   *
   * In a future release, this may well be extended to respect cpuset and
   * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
   * but for now it is a simple matter of zone.
   */
  static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
  {
  	return page_zonenum(page) > gfp_zone(gfp);
  }
  
  static int shmem_replace_page(struct page **pagep, gfp_t gfp,
  				struct shmem_inode_info *info, pgoff_t index)
  {
  	struct page *oldpage, *newpage;
  	struct address_space *swap_mapping;
b66375b59   Yu Zhao   mm: use swp_offse...
1488
  	swp_entry_t entry;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1489
1490
1491
1492
  	pgoff_t swap_index;
  	int error;
  
  	oldpage = *pagep;
b66375b59   Yu Zhao   mm: use swp_offse...
1493
1494
  	entry.val = page_private(oldpage);
  	swap_index = swp_offset(entry);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
  	swap_mapping = page_mapping(oldpage);
  
  	/*
  	 * We have arrived here because our zones are constrained, so don't
  	 * limit chance of success by further cpuset and node constraints.
  	 */
  	gfp &= ~GFP_CONSTRAINT_MASK;
  	newpage = shmem_alloc_page(gfp, info, index);
  	if (!newpage)
  		return -ENOMEM;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1505

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1506
  	get_page(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1507
  	copy_highpage(newpage, oldpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1508
  	flush_dcache_page(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1509

9956edf37   Hugh Dickins   shmem: fix pagefl...
1510
1511
  	__SetPageLocked(newpage);
  	__SetPageSwapBacked(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1512
  	SetPageUptodate(newpage);
b66375b59   Yu Zhao   mm: use swp_offse...
1513
  	set_page_private(newpage, entry.val);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1514
1515
1516
1517
1518
1519
  	SetPageSwapCache(newpage);
  
  	/*
  	 * Our caller will very soon move newpage out of swapcache, but it's
  	 * a nice clean interface for us to replace oldpage by newpage there.
  	 */
b93b01631   Matthew Wilcox   page cache: use x...
1520
  	xa_lock_irq(&swap_mapping->i_pages);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1521
1522
  	error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
  								   newpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1523
  	if (!error) {
11fb99898   Mel Gorman   mm: move most fil...
1524
1525
  		__inc_node_page_state(newpage, NR_FILE_PAGES);
  		__dec_node_page_state(oldpage, NR_FILE_PAGES);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1526
  	}
b93b01631   Matthew Wilcox   page cache: use x...
1527
  	xa_unlock_irq(&swap_mapping->i_pages);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1528

0142ef6cd   Hugh Dickins   shmem: replace_pa...
1529
1530
1531
1532
1533
1534
1535
1536
  	if (unlikely(error)) {
  		/*
  		 * Is this possible?  I think not, now that our callers check
  		 * both PageSwapCache and page_private after getting page lock;
  		 * but be defensive.  Reverse old to newpage for clear and free.
  		 */
  		oldpage = newpage;
  	} else {
6a93ca8fd   Johannes Weiner   mm: migrate: do n...
1537
  		mem_cgroup_migrate(oldpage, newpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1538
1539
1540
  		lru_cache_add_anon(newpage);
  		*pagep = newpage;
  	}
bde05d1cc   Hugh Dickins   shmem: replace pa...
1541
1542
1543
1544
1545
  
  	ClearPageSwapCache(oldpage);
  	set_page_private(oldpage, 0);
  
  	unlock_page(oldpage);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1546
1547
  	put_page(oldpage);
  	put_page(oldpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1548
  	return error;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1549
1550
1551
  }
  
  /*
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1552
   * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1553
1554
1555
   *
   * If we allocate a new one we do not mark it dirty. That's up to the
   * vm. If we swap it in we mark it dirty since we also free the swap
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1556
1557
1558
1559
   * entry since a page cannot live in both the swap and page cache.
   *
   * fault_mm and fault_type are only supplied by shmem_fault:
   * otherwise they are NULL.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1560
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1561
  static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1562
  	struct page **pagep, enum sgp_type sgp, gfp_t gfp,
2b7403035   Souptick Joarder   mm: Change return...
1563
1564
  	struct vm_area_struct *vma, struct vm_fault *vmf,
  			vm_fault_t *fault_type)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1565
1566
  {
  	struct address_space *mapping = inode->i_mapping;
23f919d4a   Arnd Bergmann   shmem: avoid mayb...
1567
  	struct shmem_inode_info *info = SHMEM_I(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1568
  	struct shmem_sb_info *sbinfo;
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1569
  	struct mm_struct *charge_mm;
00501b531   Johannes Weiner   mm: memcontrol: r...
1570
  	struct mem_cgroup *memcg;
27ab70062   Hugh Dickins   tmpfs: simplify f...
1571
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1572
  	swp_entry_t swap;
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1573
  	enum sgp_type sgp_huge = sgp;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1574
  	pgoff_t hindex = index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1575
  	int error;
54af60421   Hugh Dickins   tmpfs: convert sh...
1576
  	int once = 0;
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1577
  	int alloced = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1578

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1579
  	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1580
  		return -EFBIG;
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1581
1582
  	if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
  		sgp = SGP_CACHE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1583
  repeat:
54af60421   Hugh Dickins   tmpfs: convert sh...
1584
  	swap.val = 0;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
1585
  	page = find_lock_entry(mapping, index);
54af60421   Hugh Dickins   tmpfs: convert sh...
1586
1587
1588
1589
  	if (radix_tree_exceptional_entry(page)) {
  		swap = radix_to_swp_entry(page);
  		page = NULL;
  	}
75edd345e   Hugh Dickins   tmpfs: preliminar...
1590
  	if (sgp <= SGP_CACHE &&
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1591
  	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1592
  		error = -EINVAL;
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1593
  		goto unlock;
54af60421   Hugh Dickins   tmpfs: convert sh...
1594
  	}
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1595
1596
  	if (page && sgp == SGP_WRITE)
  		mark_page_accessed(page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1597
1598
1599
1600
1601
  	/* fallocated page? */
  	if (page && !PageUptodate(page)) {
  		if (sgp != SGP_READ)
  			goto clear;
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1602
  		put_page(page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1603
1604
  		page = NULL;
  	}
54af60421   Hugh Dickins   tmpfs: convert sh...
1605
  	if (page || (sgp == SGP_READ && !swap.val)) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1606
1607
  		*pagep = page;
  		return 0;
27ab70062   Hugh Dickins   tmpfs: simplify f...
1608
1609
1610
  	}
  
  	/*
54af60421   Hugh Dickins   tmpfs: convert sh...
1611
1612
  	 * Fast cache lookup did not find it:
  	 * bring it back from swap or allocate.
27ab70062   Hugh Dickins   tmpfs: simplify f...
1613
  	 */
54af60421   Hugh Dickins   tmpfs: convert sh...
1614
  	sbinfo = SHMEM_SB(inode->i_sb);
cfda05267   Mike Rapoport   userfaultfd: shme...
1615
  	charge_mm = vma ? vma->vm_mm : current->mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1616

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1617
1618
  	if (swap.val) {
  		/* Look it up and read it in.. */
ec560175c   Huang Ying   mm, swap: VMA bas...
1619
  		page = lookup_swap_cache(swap, NULL, 0);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1620
  		if (!page) {
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1621
1622
  			/* Or update major stats only when swapin succeeds?? */
  			if (fault_type) {
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1623
  				*fault_type |= VM_FAULT_MAJOR;
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1624
  				count_vm_event(PGMAJFAULT);
2262185c5   Roman Gushchin   mm: per-cgroup me...
1625
  				count_memcg_event_mm(charge_mm, PGMAJFAULT);
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1626
1627
  			}
  			/* Here we actually start the io */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1628
  			page = shmem_swapin(swap, gfp, info, index);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1629
  			if (!page) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1630
1631
  				error = -ENOMEM;
  				goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1632
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1633
1634
1635
  		}
  
  		/* We have to do this with page locked to prevent races */
54af60421   Hugh Dickins   tmpfs: convert sh...
1636
  		lock_page(page);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1637
  		if (!PageSwapCache(page) || page_private(page) != swap.val ||
d18992286   Hugh Dickins   shmem: fix negati...
1638
  		    !shmem_confirm_swap(mapping, index, swap)) {
bde05d1cc   Hugh Dickins   shmem: replace pa...
1639
  			error = -EEXIST;	/* try again */
d18992286   Hugh Dickins   shmem: fix negati...
1640
  			goto unlock;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1641
  		}
27ab70062   Hugh Dickins   tmpfs: simplify f...
1642
  		if (!PageUptodate(page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1643
  			error = -EIO;
54af60421   Hugh Dickins   tmpfs: convert sh...
1644
  			goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1645
  		}
54af60421   Hugh Dickins   tmpfs: convert sh...
1646
  		wait_on_page_writeback(page);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1647
1648
1649
1650
  		if (shmem_should_replace_page(page, gfp)) {
  			error = shmem_replace_page(&page, gfp, info, index);
  			if (error)
  				goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1651
  		}
27ab70062   Hugh Dickins   tmpfs: simplify f...
1652

2cf855837   Tejun Heo   memcontrol: sched...
1653
  		error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1654
  				false);
d18992286   Hugh Dickins   shmem: fix negati...
1655
  		if (!error) {
aa3b18955   Hugh Dickins   tmpfs: convert me...
1656
  			error = shmem_add_to_page_cache(page, mapping, index,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
1657
  						swp_to_radix_entry(swap));
215c02bc3   Hugh Dickins   tmpfs: fix shmem_...
1658
1659
1660
1661
1662
1663
1664
1665
  			/*
  			 * We already confirmed swap under page lock, and make
  			 * no memory allocation here, so usually no possibility
  			 * of error; but free_swap_and_cache() only trylocks a
  			 * page, so it is just possible that the entry has been
  			 * truncated or holepunched since swap was confirmed.
  			 * shmem_undo_range() will have done some of the
  			 * unaccounting, now delete_from_swap_cache() will do
93aa7d952   Vladimir Davydov   swap: remove unus...
1666
  			 * the rest.
215c02bc3   Hugh Dickins   tmpfs: fix shmem_...
1667
1668
1669
  			 * Reset swap.val? No, leave it so "failed" goes back to
  			 * "repeat": reading a hole and writing should succeed.
  			 */
00501b531   Johannes Weiner   mm: memcontrol: r...
1670
  			if (error) {
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1671
  				mem_cgroup_cancel_charge(page, memcg, false);
215c02bc3   Hugh Dickins   tmpfs: fix shmem_...
1672
  				delete_from_swap_cache(page);
00501b531   Johannes Weiner   mm: memcontrol: r...
1673
  			}
d18992286   Hugh Dickins   shmem: fix negati...
1674
  		}
54af60421   Hugh Dickins   tmpfs: convert sh...
1675
1676
  		if (error)
  			goto failed;
f627c2f53   Kirill A. Shutemov   memcg: adjust to ...
1677
  		mem_cgroup_commit_charge(page, memcg, true, false);
00501b531   Johannes Weiner   mm: memcontrol: r...
1678

4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1679
  		spin_lock_irq(&info->lock);
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
1680
  		info->swapped--;
54af60421   Hugh Dickins   tmpfs: convert sh...
1681
  		shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1682
  		spin_unlock_irq(&info->lock);
54af60421   Hugh Dickins   tmpfs: convert sh...
1683

66d2f4d28   Hugh Dickins   shmem: fix init_p...
1684
1685
  		if (sgp == SGP_WRITE)
  			mark_page_accessed(page);
54af60421   Hugh Dickins   tmpfs: convert sh...
1686
  		delete_from_swap_cache(page);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1687
1688
  		set_page_dirty(page);
  		swap_free(swap);
54af60421   Hugh Dickins   tmpfs: convert sh...
1689
  	} else {
cfda05267   Mike Rapoport   userfaultfd: shme...
1690
1691
1692
1693
  		if (vma && userfaultfd_missing(vma)) {
  			*fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
  			return 0;
  		}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1694
1695
1696
  		/* shmem_symlink() */
  		if (mapping->a_ops != &shmem_aops)
  			goto alloc_nohuge;
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1697
  		if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
  			goto alloc_nohuge;
  		if (shmem_huge == SHMEM_HUGE_FORCE)
  			goto alloc_huge;
  		switch (sbinfo->huge) {
  			loff_t i_size;
  			pgoff_t off;
  		case SHMEM_HUGE_NEVER:
  			goto alloc_nohuge;
  		case SHMEM_HUGE_WITHIN_SIZE:
  			off = round_up(index, HPAGE_PMD_NR);
  			i_size = round_up(i_size_read(inode), PAGE_SIZE);
  			if (i_size >= HPAGE_PMD_SIZE &&
  					i_size >> PAGE_SHIFT >= off)
  				goto alloc_huge;
  			/* fallthrough */
  		case SHMEM_HUGE_ADVISE:
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1714
1715
1716
  			if (sgp_huge == SGP_HUGE)
  				goto alloc_huge;
  			/* TODO: implement fadvise() hints */
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1717
  			goto alloc_nohuge;
54af60421   Hugh Dickins   tmpfs: convert sh...
1718
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1719

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1720
  alloc_huge:
0f0796945   Mike Rapoport   shmem: introduce ...
1721
  		page = shmem_alloc_and_acct_page(gfp, inode, index, true);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1722
  		if (IS_ERR(page)) {
0f0796945   Mike Rapoport   shmem: introduce ...
1723
  alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, inode,
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1724
  					index, false);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1725
  		}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1726
  		if (IS_ERR(page)) {
779750d20   Kirill A. Shutemov   shmem: split huge...
1727
  			int retry = 5;
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1728
1729
  			error = PTR_ERR(page);
  			page = NULL;
779750d20   Kirill A. Shutemov   shmem: split huge...
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
  			if (error != -ENOSPC)
  				goto failed;
  			/*
  			 * Try to reclaim some spece by splitting a huge page
  			 * beyond i_size on the filesystem.
  			 */
  			while (retry--) {
  				int ret;
  				ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
  				if (ret == SHRINK_STOP)
  					break;
  				if (ret)
  					goto alloc_nohuge;
  			}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1744
1745
1746
1747
1748
1749
1750
  			goto failed;
  		}
  
  		if (PageTransHuge(page))
  			hindex = round_down(index, HPAGE_PMD_NR);
  		else
  			hindex = index;
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1751
  		if (sgp == SGP_WRITE)
eb39d618f   Hugh Dickins   mm: replace init_...
1752
  			__SetPageReferenced(page);
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1753

2cf855837   Tejun Heo   memcontrol: sched...
1754
  		error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1755
  				PageTransHuge(page));
54af60421   Hugh Dickins   tmpfs: convert sh...
1756
  		if (error)
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1757
1758
1759
  			goto unacct;
  		error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK,
  				compound_order(page));
b065b4321   Hugh Dickins   shmem: cleanup sh...
1760
  		if (!error) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1761
  			error = shmem_add_to_page_cache(page, mapping, hindex,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
1762
  							NULL);
b065b4321   Hugh Dickins   shmem: cleanup sh...
1763
1764
1765
  			radix_tree_preload_end();
  		}
  		if (error) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1766
1767
1768
  			mem_cgroup_cancel_charge(page, memcg,
  					PageTransHuge(page));
  			goto unacct;
b065b4321   Hugh Dickins   shmem: cleanup sh...
1769
  		}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1770
1771
  		mem_cgroup_commit_charge(page, memcg, false,
  				PageTransHuge(page));
54af60421   Hugh Dickins   tmpfs: convert sh...
1772
  		lru_cache_add_anon(page);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1773
  		spin_lock_irq(&info->lock);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1774
1775
  		info->alloced += 1 << compound_order(page);
  		inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
54af60421   Hugh Dickins   tmpfs: convert sh...
1776
  		shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1777
  		spin_unlock_irq(&info->lock);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1778
  		alloced = true;
54af60421   Hugh Dickins   tmpfs: convert sh...
1779

779750d20   Kirill A. Shutemov   shmem: split huge...
1780
1781
1782
1783
1784
1785
1786
1787
  		if (PageTransHuge(page) &&
  				DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
  				hindex + HPAGE_PMD_NR - 1) {
  			/*
  			 * Part of the huge page is beyond i_size: subject
  			 * to shrink under memory pressure.
  			 */
  			spin_lock(&sbinfo->shrinklist_lock);
d041353dc   Cong Wang   mm: fix list corr...
1788
1789
1790
1791
1792
  			/*
  			 * _careful to defend against unlocked access to
  			 * ->shrink_list in shmem_unused_huge_shrink()
  			 */
  			if (list_empty_careful(&info->shrinklist)) {
779750d20   Kirill A. Shutemov   shmem: split huge...
1793
1794
1795
1796
1797
1798
  				list_add_tail(&info->shrinklist,
  						&sbinfo->shrinklist);
  				sbinfo->shrinklist_len++;
  			}
  			spin_unlock(&sbinfo->shrinklist_lock);
  		}
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
1799
  		/*
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1800
1801
1802
1803
1804
1805
1806
1807
1808
  		 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
  		 */
  		if (sgp == SGP_FALLOC)
  			sgp = SGP_WRITE;
  clear:
  		/*
  		 * Let SGP_WRITE caller clear ends if write does not fill page;
  		 * but SGP_FALLOC on a page fallocated earlier must initialize
  		 * it now, lest undo on failure cancel our earlier guarantee.
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
1809
  		 */
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1810
1811
1812
1813
1814
1815
1816
1817
1818
  		if (sgp != SGP_WRITE && !PageUptodate(page)) {
  			struct page *head = compound_head(page);
  			int i;
  
  			for (i = 0; i < (1 << compound_order(head)); i++) {
  				clear_highpage(head + i);
  				flush_dcache_page(head + i);
  			}
  			SetPageUptodate(head);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
1819
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1820
  	}
bde05d1cc   Hugh Dickins   shmem: replace pa...
1821

54af60421   Hugh Dickins   tmpfs: convert sh...
1822
  	/* Perhaps the file has been truncated since we checked */
75edd345e   Hugh Dickins   tmpfs: preliminar...
1823
  	if (sgp <= SGP_CACHE &&
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1824
  	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1825
1826
1827
  		if (alloced) {
  			ClearPageDirty(page);
  			delete_from_page_cache(page);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1828
  			spin_lock_irq(&info->lock);
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1829
  			shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1830
  			spin_unlock_irq(&info->lock);
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1831
  		}
54af60421   Hugh Dickins   tmpfs: convert sh...
1832
  		error = -EINVAL;
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1833
  		goto unlock;
e83c32e8f   Hugh Dickins   tmpfs: simplify p...
1834
  	}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1835
  	*pagep = page + index - hindex;
54af60421   Hugh Dickins   tmpfs: convert sh...
1836
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1837

59a16ead5   Hugh Dickins   tmpfs: fix spurio...
1838
  	/*
54af60421   Hugh Dickins   tmpfs: convert sh...
1839
  	 * Error recovery.
59a16ead5   Hugh Dickins   tmpfs: fix spurio...
1840
  	 */
54af60421   Hugh Dickins   tmpfs: convert sh...
1841
  unacct:
0f0796945   Mike Rapoport   shmem: introduce ...
1842
  	shmem_inode_unacct_blocks(inode, 1 << compound_order(page));
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
1843
1844
1845
1846
1847
1848
  
  	if (PageTransHuge(page)) {
  		unlock_page(page);
  		put_page(page);
  		goto alloc_nohuge;
  	}
54af60421   Hugh Dickins   tmpfs: convert sh...
1849
  failed:
267a4c76b   Hugh Dickins   tmpfs: fix shmem_...
1850
  	if (swap.val && !shmem_confirm_swap(mapping, index, swap))
d18992286   Hugh Dickins   shmem: fix negati...
1851
1852
  		error = -EEXIST;
  unlock:
27ab70062   Hugh Dickins   tmpfs: simplify f...
1853
  	if (page) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1854
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1855
  		put_page(page);
54af60421   Hugh Dickins   tmpfs: convert sh...
1856
1857
  	}
  	if (error == -ENOSPC && !once++) {
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1858
  		spin_lock_irq(&info->lock);
54af60421   Hugh Dickins   tmpfs: convert sh...
1859
  		shmem_recalc_inode(inode);
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
1860
  		spin_unlock_irq(&info->lock);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1861
  		goto repeat;
ff36b8016   Shaohua Li   shmem: reduce pag...
1862
  	}
d18992286   Hugh Dickins   shmem: fix negati...
1863
  	if (error == -EEXIST)	/* from above or from radix_tree_insert */
54af60421   Hugh Dickins   tmpfs: convert sh...
1864
1865
  		goto repeat;
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1866
  }
10d20bd25   Linus Torvalds   shmem: fix shm fa...
1867
1868
1869
1870
1871
  /*
   * This is like autoremove_wake_function, but it removes the wait queue
   * entry unconditionally - even if something else had already woken the
   * target.
   */
ac6424b98   Ingo Molnar   sched/wait: Renam...
1872
  static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
10d20bd25   Linus Torvalds   shmem: fix shm fa...
1873
1874
  {
  	int ret = default_wake_function(wait, mode, sync, key);
2055da973   Ingo Molnar   sched/wait: Disam...
1875
  	list_del_init(&wait->entry);
10d20bd25   Linus Torvalds   shmem: fix shm fa...
1876
1877
  	return ret;
  }
20acce679   Souptick Joarder   mm/shmem.c: use n...
1878
  static vm_fault_t shmem_fault(struct vm_fault *vmf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1879
  {
11bac8000   Dave Jiang   mm, fs: reduce fa...
1880
  	struct vm_area_struct *vma = vmf->vma;
496ad9aa8   Al Viro   new helper: file_...
1881
  	struct inode *inode = file_inode(vma->vm_file);
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
1882
  	gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1883
  	enum sgp_type sgp;
20acce679   Souptick Joarder   mm/shmem.c: use n...
1884
1885
  	int err;
  	vm_fault_t ret = VM_FAULT_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1886

f00cdc6df   Hugh Dickins   shmem: fix faulti...
1887
1888
1889
1890
  	/*
  	 * Trinity finds that probing a hole which tmpfs is punching can
  	 * prevent the hole-punch from ever completing: which in turn
  	 * locks writers out with its hold on i_mutex.  So refrain from
8e205f779   Hugh Dickins   shmem: fix faulti...
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
  	 * faulting pages into the hole while it's being punched.  Although
  	 * shmem_undo_range() does remove the additions, it may be unable to
  	 * keep up, as each new page needs its own unmap_mapping_range() call,
  	 * and the i_mmap tree grows ever slower to scan if new vmas are added.
  	 *
  	 * It does not matter if we sometimes reach this check just before the
  	 * hole-punch begins, so that one fault then races with the punch:
  	 * we just need to make racing faults a rare case.
  	 *
  	 * The implementation below would be much simpler if we just used a
  	 * standard mutex or completion: but we cannot take i_mutex in fault,
  	 * and bloating every shmem inode for this unlikely case would be sad.
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1903
1904
1905
1906
1907
1908
  	 */
  	if (unlikely(inode->i_private)) {
  		struct shmem_falloc *shmem_falloc;
  
  		spin_lock(&inode->i_lock);
  		shmem_falloc = inode->i_private;
8e205f779   Hugh Dickins   shmem: fix faulti...
1909
1910
1911
1912
1913
  		if (shmem_falloc &&
  		    shmem_falloc->waitq &&
  		    vmf->pgoff >= shmem_falloc->start &&
  		    vmf->pgoff < shmem_falloc->next) {
  			wait_queue_head_t *shmem_falloc_waitq;
10d20bd25   Linus Torvalds   shmem: fix shm fa...
1914
  			DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
8e205f779   Hugh Dickins   shmem: fix faulti...
1915
1916
  
  			ret = VM_FAULT_NOPAGE;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1917
1918
  			if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
  			   !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
8e205f779   Hugh Dickins   shmem: fix faulti...
1919
  				/* It's polite to up mmap_sem if we can */
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1920
  				up_read(&vma->vm_mm->mmap_sem);
8e205f779   Hugh Dickins   shmem: fix faulti...
1921
  				ret = VM_FAULT_RETRY;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1922
  			}
8e205f779   Hugh Dickins   shmem: fix faulti...
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
  
  			shmem_falloc_waitq = shmem_falloc->waitq;
  			prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
  					TASK_UNINTERRUPTIBLE);
  			spin_unlock(&inode->i_lock);
  			schedule();
  
  			/*
  			 * shmem_falloc_waitq points into the shmem_fallocate()
  			 * stack of the hole-punching task: shmem_falloc_waitq
  			 * is usually invalid by the time we reach here, but
  			 * finish_wait() does not dereference it in that case;
  			 * though i_lock needed lest racing with wake_up_all().
  			 */
  			spin_lock(&inode->i_lock);
  			finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
  			spin_unlock(&inode->i_lock);
  			return ret;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1941
  		}
8e205f779   Hugh Dickins   shmem: fix faulti...
1942
  		spin_unlock(&inode->i_lock);
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1943
  	}
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1944
  	sgp = SGP_CACHE;
186003323   Michal Hocko   mm: make PR_SET_T...
1945
1946
1947
  
  	if ((vma->vm_flags & VM_NOHUGEPAGE) ||
  	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1948
  		sgp = SGP_NOHUGE;
186003323   Michal Hocko   mm: make PR_SET_T...
1949
1950
  	else if (vma->vm_flags & VM_HUGEPAGE)
  		sgp = SGP_HUGE;
657e3038c   Kirill A. Shutemov   shmem, thp: respe...
1951

20acce679   Souptick Joarder   mm/shmem.c: use n...
1952
  	err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
cfda05267   Mike Rapoport   userfaultfd: shme...
1953
  				  gfp, vma, vmf, &ret);
20acce679   Souptick Joarder   mm/shmem.c: use n...
1954
1955
  	if (err)
  		return vmf_error(err);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1956
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1957
  }
c01d5b300   Hugh Dickins   shmem: get_unmapp...
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
  unsigned long shmem_get_unmapped_area(struct file *file,
  				      unsigned long uaddr, unsigned long len,
  				      unsigned long pgoff, unsigned long flags)
  {
  	unsigned long (*get_area)(struct file *,
  		unsigned long, unsigned long, unsigned long, unsigned long);
  	unsigned long addr;
  	unsigned long offset;
  	unsigned long inflated_len;
  	unsigned long inflated_addr;
  	unsigned long inflated_offset;
  
  	if (len > TASK_SIZE)
  		return -ENOMEM;
  
  	get_area = current->mm->get_unmapped_area;
  	addr = get_area(file, uaddr, len, pgoff, flags);
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
1975
  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
c01d5b300   Hugh Dickins   shmem: get_unmapp...
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
  		return addr;
  	if (IS_ERR_VALUE(addr))
  		return addr;
  	if (addr & ~PAGE_MASK)
  		return addr;
  	if (addr > TASK_SIZE - len)
  		return addr;
  
  	if (shmem_huge == SHMEM_HUGE_DENY)
  		return addr;
  	if (len < HPAGE_PMD_SIZE)
  		return addr;
  	if (flags & MAP_FIXED)
  		return addr;
  	/*
  	 * Our priority is to support MAP_SHARED mapped hugely;
  	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
  	 * But if caller specified an address hint, respect that as before.
  	 */
  	if (uaddr)
  		return addr;
  
  	if (shmem_huge != SHMEM_HUGE_FORCE) {
  		struct super_block *sb;
  
  		if (file) {
  			VM_BUG_ON(file->f_op != &shmem_file_operations);
  			sb = file_inode(file)->i_sb;
  		} else {
  			/*
  			 * Called directly from mm/mmap.c, or drivers/char/mem.c
  			 * for "/dev/zero", to create a shared anonymous object.
  			 */
  			if (IS_ERR(shm_mnt))
  				return addr;
  			sb = shm_mnt->mnt_sb;
  		}
3089bf614   Toshi Kani   shmem: fix tmpfs ...
2013
  		if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
c01d5b300   Hugh Dickins   shmem: get_unmapp...
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
  			return addr;
  	}
  
  	offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
  	if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
  		return addr;
  	if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
  		return addr;
  
  	inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
  	if (inflated_len > TASK_SIZE)
  		return addr;
  	if (inflated_len < len)
  		return addr;
  
  	inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
  	if (IS_ERR_VALUE(inflated_addr))
  		return addr;
  	if (inflated_addr & ~PAGE_MASK)
  		return addr;
  
  	inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
  	inflated_addr += offset - inflated_offset;
  	if (inflated_offset > offset)
  		inflated_addr += HPAGE_PMD_SIZE;
  
  	if (inflated_addr > TASK_SIZE - len)
  		return addr;
  	return inflated_addr;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2044
  #ifdef CONFIG_NUMA
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2045
  static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2046
  {
496ad9aa8   Al Viro   new helper: file_...
2047
  	struct inode *inode = file_inode(vma->vm_file);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2048
  	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2049
  }
d8dc74f21   Adrian Bunk   mm/shmem.c: make ...
2050
2051
  static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
  					  unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2052
  {
496ad9aa8   Al Viro   new helper: file_...
2053
  	struct inode *inode = file_inode(vma->vm_file);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2054
  	pgoff_t index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2055

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2056
2057
  	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
  	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2058
2059
2060
2061
2062
  }
  #endif
  
  int shmem_lock(struct file *file, int lock, struct user_struct *user)
  {
496ad9aa8   Al Viro   new helper: file_...
2063
  	struct inode *inode = file_inode(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2064
2065
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	int retval = -ENOMEM;
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
2066
  	spin_lock_irq(&info->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2067
2068
2069
2070
  	if (lock && !(info->flags & VM_LOCKED)) {
  		if (!user_shm_lock(inode->i_size, user))
  			goto out_nomem;
  		info->flags |= VM_LOCKED;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
2071
  		mapping_set_unevictable(file->f_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2072
2073
2074
2075
  	}
  	if (!lock && (info->flags & VM_LOCKED) && user) {
  		user_shm_unlock(inode->i_size, user);
  		info->flags &= ~VM_LOCKED;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
2076
  		mapping_clear_unevictable(file->f_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2077
2078
  	}
  	retval = 0;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
2079

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2080
  out_nomem:
4595ef88d   Kirill A. Shutemov   shmem: make shmem...
2081
  	spin_unlock_irq(&info->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2082
2083
  	return retval;
  }
9b83a6a85   Adrian Bunk   [PATCH] mm/{,tiny...
2084
  static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2085
2086
2087
  {
  	file_accessed(file);
  	vma->vm_ops = &shmem_vm_ops;
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
2088
  	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
2089
2090
2091
2092
  			((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
  			(vma->vm_end & HPAGE_PMD_MASK)) {
  		khugepaged_enter(vma, vma->vm_flags);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2093
2094
  	return 0;
  }
454abafe9   Dmitry Monakhov   ramfs: replace in...
2095
  static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
09208d150   Al Viro   shmem, ramfs: pro...
2096
  				     umode_t mode, dev_t dev, unsigned long flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2097
2098
2099
2100
  {
  	struct inode *inode;
  	struct shmem_inode_info *info;
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
5b04c6890   Pavel Emelyanov   shmem: factor out...
2101
2102
  	if (shmem_reserve_inode(sb))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2103
2104
2105
  
  	inode = new_inode(sb);
  	if (inode) {
85fe4025c   Christoph Hellwig   fs: do not assign...
2106
  		inode->i_ino = get_next_ino();
454abafe9   Dmitry Monakhov   ramfs: replace in...
2107
  		inode_init_owner(inode, dir, mode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2108
  		inode->i_blocks = 0;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2109
  		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
46c9a946d   Arnd Bergmann   shmem: use monoto...
2110
  		inode->i_generation = prandom_u32();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2111
2112
2113
  		info = SHMEM_I(inode);
  		memset(info, 0, (char *)inode - (char *)info);
  		spin_lock_init(&info->lock);
40e041a2c   David Herrmann   shm: add sealing API
2114
  		info->seals = F_SEAL_SEAL;
0b0a0806b   Hugh Dickins   shmem: fix shared...
2115
  		info->flags = flags & VM_NORESERVE;
779750d20   Kirill A. Shutemov   shmem: split huge...
2116
  		INIT_LIST_HEAD(&info->shrinklist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2117
  		INIT_LIST_HEAD(&info->swaplist);
38f386574   Aristeu Rozanski   xattr: extract si...
2118
  		simple_xattrs_init(&info->xattrs);
72c04902d   Al Viro   Get "no acls for ...
2119
  		cache_no_acl(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2120
2121
2122
  
  		switch (mode & S_IFMT) {
  		default:
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2123
  			inode->i_op = &shmem_special_inode_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2124
2125
2126
  			init_special_inode(inode, mode, dev);
  			break;
  		case S_IFREG:
14fcc23fd   Hugh Dickins   tmpfs: fix kernel...
2127
  			inode->i_mapping->a_ops = &shmem_aops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2128
2129
  			inode->i_op = &shmem_inode_operations;
  			inode->i_fop = &shmem_file_operations;
71fe804b6   Lee Schermerhorn   mempolicy: use st...
2130
2131
  			mpol_shared_policy_init(&info->policy,
  						 shmem_get_sbmpol(sbinfo));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2132
2133
  			break;
  		case S_IFDIR:
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2134
  			inc_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
  			/* Some things misbehave if size == 0 on a directory */
  			inode->i_size = 2 * BOGO_DIRENT_SIZE;
  			inode->i_op = &shmem_dir_inode_operations;
  			inode->i_fop = &simple_dir_operations;
  			break;
  		case S_IFLNK:
  			/*
  			 * Must not load anything in the rbtree,
  			 * mpol_free_shared_policy will not be called.
  			 */
71fe804b6   Lee Schermerhorn   mempolicy: use st...
2145
  			mpol_shared_policy_init(&info->policy, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2146
2147
  			break;
  		}
b45d71fb8   Joel Fernandes (Google)   mm: shmem.c: Corr...
2148
2149
  
  		lockdep_annotate_inode_mutex_key(inode);
5b04c6890   Pavel Emelyanov   shmem: factor out...
2150
2151
  	} else
  		shmem_free_inode(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2152
2153
  	return inode;
  }
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
2154
2155
  bool shmem_mapping(struct address_space *mapping)
  {
f8005451d   Hugh Dickins   tmpfs: change shm...
2156
  	return mapping->a_ops == &shmem_aops;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
2157
  }
8d1039634   Mike Rapoport   userfaultfd: shme...
2158
2159
2160
2161
2162
2163
2164
  static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
  				  pmd_t *dst_pmd,
  				  struct vm_area_struct *dst_vma,
  				  unsigned long dst_addr,
  				  unsigned long src_addr,
  				  bool zeropage,
  				  struct page **pagep)
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2165
2166
2167
  {
  	struct inode *inode = file_inode(dst_vma->vm_file);
  	struct shmem_inode_info *info = SHMEM_I(inode);
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2168
2169
2170
2171
2172
2173
2174
2175
2176
  	struct address_space *mapping = inode->i_mapping;
  	gfp_t gfp = mapping_gfp_mask(mapping);
  	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
  	struct mem_cgroup *memcg;
  	spinlock_t *ptl;
  	void *page_kaddr;
  	struct page *page;
  	pte_t _dst_pte, *dst_pte;
  	int ret;
4ce337622   Andrea Arcangeli   userfaultfd: shme...
2177
  	pgoff_t offset, max_off;
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2178

cb658a453   Andrea Arcangeli   userfaultfd: shme...
2179
  	ret = -ENOMEM;
0f0796945   Mike Rapoport   shmem: introduce ...
2180
  	if (!shmem_inode_acct_block(inode, 1))
cb658a453   Andrea Arcangeli   userfaultfd: shme...
2181
  		goto out;
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2182

cb658a453   Andrea Arcangeli   userfaultfd: shme...
2183
  	if (!*pagep) {
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2184
2185
  		page = shmem_alloc_page(gfp, info, pgoff);
  		if (!page)
0f0796945   Mike Rapoport   shmem: introduce ...
2186
  			goto out_unacct_blocks;
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2187

8d1039634   Mike Rapoport   userfaultfd: shme...
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
  		if (!zeropage) {	/* mcopy_atomic */
  			page_kaddr = kmap_atomic(page);
  			ret = copy_from_user(page_kaddr,
  					     (const void __user *)src_addr,
  					     PAGE_SIZE);
  			kunmap_atomic(page_kaddr);
  
  			/* fallback to copy_from_user outside mmap_sem */
  			if (unlikely(ret)) {
  				*pagep = page;
  				shmem_inode_unacct_blocks(inode, 1);
  				/* don't free the page */
10f98c134   Andrea Arcangeli   userfaultfd: use ...
2200
  				return -ENOENT;
8d1039634   Mike Rapoport   userfaultfd: shme...
2201
2202
2203
  			}
  		} else {		/* mfill_zeropage_atomic */
  			clear_highpage(page);
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2204
2205
2206
2207
2208
  		}
  	} else {
  		page = *pagep;
  		*pagep = NULL;
  	}
9cc90c664   Andrea Arcangeli   userfaultfd: shme...
2209
2210
2211
  	VM_BUG_ON(PageLocked(page) || PageSwapBacked(page));
  	__SetPageLocked(page);
  	__SetPageSwapBacked(page);
a425d3584   Andrea Arcangeli   userfaultfd: shme...
2212
  	__SetPageUptodate(page);
9cc90c664   Andrea Arcangeli   userfaultfd: shme...
2213

4ce337622   Andrea Arcangeli   userfaultfd: shme...
2214
2215
2216
2217
2218
  	ret = -EFAULT;
  	offset = linear_page_index(dst_vma, dst_addr);
  	max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
  	if (unlikely(offset >= max_off))
  		goto out_release;
2cf855837   Tejun Heo   memcontrol: sched...
2219
  	ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
  	if (ret)
  		goto out_release;
  
  	ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
  	if (!ret) {
  		ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL);
  		radix_tree_preload_end();
  	}
  	if (ret)
  		goto out_release_uncharge;
  
  	mem_cgroup_commit_charge(page, memcg, false, false);
  
  	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
  	if (dst_vma->vm_flags & VM_WRITE)
  		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
8f193a716   Andrea Arcangeli   userfaultfd: shme...
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
  	else {
  		/*
  		 * We don't set the pte dirty if the vma has no
  		 * VM_WRITE permission, so mark the page dirty or it
  		 * could be freed from under us. We could do it
  		 * unconditionally before unlock_page(), but doing it
  		 * only if VM_WRITE is not set is faster.
  		 */
  		set_page_dirty(page);
  	}
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2246

4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2247
  	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
4ce337622   Andrea Arcangeli   userfaultfd: shme...
2248
2249
2250
2251
2252
2253
2254
  
  	ret = -EFAULT;
  	max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
  	if (unlikely(offset >= max_off))
  		goto out_release_uncharge_unlock;
  
  	ret = -EEXIST;
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2255
2256
  	if (!pte_none(*dst_pte))
  		goto out_release_uncharge_unlock;
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
  	lru_cache_add_anon(page);
  
  	spin_lock(&info->lock);
  	info->alloced++;
  	inode->i_blocks += BLOCKS_PER_PAGE;
  	shmem_recalc_inode(inode);
  	spin_unlock(&info->lock);
  
  	inc_mm_counter(dst_mm, mm_counter_file(page));
  	page_add_file_rmap(page, false);
  	set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
  
  	/* No need to invalidate - it was non-present before */
  	update_mmu_cache(dst_vma, dst_addr, dst_pte);
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2271
  	pte_unmap_unlock(dst_pte, ptl);
4ce337622   Andrea Arcangeli   userfaultfd: shme...
2272
  	unlock_page(page);
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2273
2274
2275
2276
2277
  	ret = 0;
  out:
  	return ret;
  out_release_uncharge_unlock:
  	pte_unmap_unlock(dst_pte, ptl);
8f193a716   Andrea Arcangeli   userfaultfd: shme...
2278
  	ClearPageDirty(page);
4ce337622   Andrea Arcangeli   userfaultfd: shme...
2279
  	delete_from_page_cache(page);
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2280
2281
2282
  out_release_uncharge:
  	mem_cgroup_cancel_charge(page, memcg, false);
  out_release:
9cc90c664   Andrea Arcangeli   userfaultfd: shme...
2283
  	unlock_page(page);
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2284
  	put_page(page);
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2285
  out_unacct_blocks:
0f0796945   Mike Rapoport   shmem: introduce ...
2286
  	shmem_inode_unacct_blocks(inode, 1);
4c27fe4c4   Mike Rapoport   userfaultfd: shme...
2287
2288
  	goto out;
  }
8d1039634   Mike Rapoport   userfaultfd: shme...
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
  int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
  			   pmd_t *dst_pmd,
  			   struct vm_area_struct *dst_vma,
  			   unsigned long dst_addr,
  			   unsigned long src_addr,
  			   struct page **pagep)
  {
  	return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
  				      dst_addr, src_addr, false, pagep);
  }
  
  int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
  			     pmd_t *dst_pmd,
  			     struct vm_area_struct *dst_vma,
  			     unsigned long dst_addr)
  {
  	struct page *page = NULL;
  
  	return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
  				      dst_addr, 0, true, &page);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2310
  #ifdef CONFIG_TMPFS
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
2311
  static const struct inode_operations shmem_symlink_inode_operations;
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2312
  static const struct inode_operations shmem_short_symlink_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2313

6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2314
2315
2316
2317
2318
  #ifdef CONFIG_TMPFS_XATTR
  static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
  #else
  #define shmem_initxattrs NULL
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2319
  static int
800d15a53   Nick Piggin   implement simple ...
2320
2321
2322
  shmem_write_begin(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2323
  {
800d15a53   Nick Piggin   implement simple ...
2324
  	struct inode *inode = mapping->host;
40e041a2c   David Herrmann   shm: add sealing API
2325
  	struct shmem_inode_info *info = SHMEM_I(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2326
  	pgoff_t index = pos >> PAGE_SHIFT;
40e041a2c   David Herrmann   shm: add sealing API
2327
2328
  
  	/* i_mutex is held by caller */
3f472cc97   Steven Rostedt (VMware)   mm/shmem.c: fix u...
2329
  	if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) {
40e041a2c   David Herrmann   shm: add sealing API
2330
2331
2332
2333
2334
  		if (info->seals & F_SEAL_WRITE)
  			return -EPERM;
  		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
  			return -EPERM;
  	}
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2335
  	return shmem_getpage(inode, index, pagep, SGP_WRITE);
800d15a53   Nick Piggin   implement simple ...
2336
2337
2338
2339
2340
2341
2342
2343
  }
  
  static int
  shmem_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
  	struct inode *inode = mapping->host;
d3602444e   Hugh Dickins   shmem_getpage ret...
2344
2345
  	if (pos + copied > inode->i_size)
  		i_size_write(inode, pos + copied);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2346
  	if (!PageUptodate(page)) {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
  		struct page *head = compound_head(page);
  		if (PageTransCompound(page)) {
  			int i;
  
  			for (i = 0; i < HPAGE_PMD_NR; i++) {
  				if (head + i == page)
  					continue;
  				clear_highpage(head + i);
  				flush_dcache_page(head + i);
  			}
  		}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2358
2359
  		if (copied < PAGE_SIZE) {
  			unsigned from = pos & (PAGE_SIZE - 1);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2360
  			zero_user_segments(page, 0, from,
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2361
  					from + copied, PAGE_SIZE);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2362
  		}
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
2363
  		SetPageUptodate(head);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2364
  	}
800d15a53   Nick Piggin   implement simple ...
2365
  	set_page_dirty(page);
6746aff74   Wu Fengguang   HWPOISON: shmem: ...
2366
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2367
  	put_page(page);
800d15a53   Nick Piggin   implement simple ...
2368

800d15a53   Nick Piggin   implement simple ...
2369
  	return copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2370
  }
2ba5bbed0   Al Viro   shmem: switch to ...
2371
  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2372
  {
6e58e79db   Al Viro   introduce copy_pa...
2373
2374
  	struct file *file = iocb->ki_filp;
  	struct inode *inode = file_inode(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2375
  	struct address_space *mapping = inode->i_mapping;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2376
2377
  	pgoff_t index;
  	unsigned long offset;
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
2378
  	enum sgp_type sgp = SGP_READ;
f7c1d0742   Geert Uytterhoeven   mm: Initialize er...
2379
  	int error = 0;
cb66a7a1f   Al Viro   kill generic_segm...
2380
  	ssize_t retval = 0;
6e58e79db   Al Viro   introduce copy_pa...
2381
  	loff_t *ppos = &iocb->ki_pos;
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
2382
2383
2384
2385
2386
2387
  
  	/*
  	 * Might this read be for a stacking filesystem?  Then when reading
  	 * holes of a sparse file, we actually need to allocate those pages,
  	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
  	 */
777eda2c5   Al Viro   new helper: iter_...
2388
  	if (!iter_is_iovec(to))
75edd345e   Hugh Dickins   tmpfs: preliminar...
2389
  		sgp = SGP_CACHE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2390

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2391
2392
  	index = *ppos >> PAGE_SHIFT;
  	offset = *ppos & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2393
2394
2395
  
  	for (;;) {
  		struct page *page = NULL;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2396
2397
  		pgoff_t end_index;
  		unsigned long nr, ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2398
  		loff_t i_size = i_size_read(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2399
  		end_index = i_size >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2400
2401
2402
  		if (index > end_index)
  			break;
  		if (index == end_index) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2403
  			nr = i_size & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2404
2405
2406
  			if (nr <= offset)
  				break;
  		}
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2407
  		error = shmem_getpage(inode, index, &page, sgp);
6e58e79db   Al Viro   introduce copy_pa...
2408
2409
2410
  		if (error) {
  			if (error == -EINVAL)
  				error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2411
2412
  			break;
  		}
75edd345e   Hugh Dickins   tmpfs: preliminar...
2413
2414
2415
  		if (page) {
  			if (sgp == SGP_CACHE)
  				set_page_dirty(page);
d3602444e   Hugh Dickins   shmem_getpage ret...
2416
  			unlock_page(page);
75edd345e   Hugh Dickins   tmpfs: preliminar...
2417
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2418
2419
2420
  
  		/*
  		 * We must evaluate after, since reads (unlike writes)
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
2421
  		 * are called without i_mutex protection against truncate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2422
  		 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2423
  		nr = PAGE_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2424
  		i_size = i_size_read(inode);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2425
  		end_index = i_size >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2426
  		if (index == end_index) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2427
  			nr = i_size & ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2428
2429
  			if (nr <= offset) {
  				if (page)
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2430
  					put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
  				break;
  			}
  		}
  		nr -= offset;
  
  		if (page) {
  			/*
  			 * If users can be writing to this page using arbitrary
  			 * virtual addresses, take care about potential aliasing
  			 * before reading the page on the kernel side.
  			 */
  			if (mapping_writably_mapped(mapping))
  				flush_dcache_page(page);
  			/*
  			 * Mark the page accessed if we read the beginning.
  			 */
  			if (!offset)
  				mark_page_accessed(page);
b5810039a   Nick Piggin   [PATCH] core remo...
2449
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2450
  			page = ZERO_PAGE(0);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2451
  			get_page(page);
b5810039a   Nick Piggin   [PATCH] core remo...
2452
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2453
2454
2455
2456
  
  		/*
  		 * Ok, we have the page, and it's up-to-date, so
  		 * now we can copy it to user space...
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2457
  		 */
2ba5bbed0   Al Viro   shmem: switch to ...
2458
  		ret = copy_page_to_iter(page, offset, nr, to);
6e58e79db   Al Viro   introduce copy_pa...
2459
  		retval += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2460
  		offset += ret;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2461
2462
  		index += offset >> PAGE_SHIFT;
  		offset &= ~PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2463

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2464
  		put_page(page);
2ba5bbed0   Al Viro   shmem: switch to ...
2465
  		if (!iov_iter_count(to))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2466
  			break;
6e58e79db   Al Viro   introduce copy_pa...
2467
2468
2469
2470
  		if (ret < nr) {
  			error = -EFAULT;
  			break;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2471
2472
  		cond_resched();
  	}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2473
  	*ppos = ((loff_t) index << PAGE_SHIFT) + offset;
6e58e79db   Al Viro   introduce copy_pa...
2474
2475
  	file_accessed(file);
  	return retval ? retval : error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2476
  }
220f2ac91   Hugh Dickins   tmpfs: support SE...
2477
2478
2479
2480
  /*
   * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
   */
  static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
965c8e59c   Andrew Morton   lseek: the "whenc...
2481
  				    pgoff_t index, pgoff_t end, int whence)
220f2ac91   Hugh Dickins   tmpfs: support SE...
2482
2483
2484
2485
2486
2487
  {
  	struct page *page;
  	struct pagevec pvec;
  	pgoff_t indices[PAGEVEC_SIZE];
  	bool done = false;
  	int i;
866798201   Mel Gorman   mm, pagevec: remo...
2488
  	pagevec_init(&pvec);
220f2ac91   Hugh Dickins   tmpfs: support SE...
2489
2490
  	pvec.nr = 1;		/* start small: we may be there already */
  	while (!done) {
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
2491
  		pvec.nr = find_get_entries(mapping, index,
220f2ac91   Hugh Dickins   tmpfs: support SE...
2492
2493
  					pvec.nr, pvec.pages, indices);
  		if (!pvec.nr) {
965c8e59c   Andrew Morton   lseek: the "whenc...
2494
  			if (whence == SEEK_DATA)
220f2ac91   Hugh Dickins   tmpfs: support SE...
2495
2496
2497
2498
2499
  				index = end;
  			break;
  		}
  		for (i = 0; i < pvec.nr; i++, index++) {
  			if (index < indices[i]) {
965c8e59c   Andrew Morton   lseek: the "whenc...
2500
  				if (whence == SEEK_HOLE) {
220f2ac91   Hugh Dickins   tmpfs: support SE...
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
  					done = true;
  					break;
  				}
  				index = indices[i];
  			}
  			page = pvec.pages[i];
  			if (page && !radix_tree_exceptional_entry(page)) {
  				if (!PageUptodate(page))
  					page = NULL;
  			}
  			if (index >= end ||
965c8e59c   Andrew Morton   lseek: the "whenc...
2512
2513
  			    (page && whence == SEEK_DATA) ||
  			    (!page && whence == SEEK_HOLE)) {
220f2ac91   Hugh Dickins   tmpfs: support SE...
2514
2515
2516
2517
  				done = true;
  				break;
  			}
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
2518
  		pagevec_remove_exceptionals(&pvec);
220f2ac91   Hugh Dickins   tmpfs: support SE...
2519
2520
2521
2522
2523
2524
  		pagevec_release(&pvec);
  		pvec.nr = PAGEVEC_SIZE;
  		cond_resched();
  	}
  	return index;
  }
965c8e59c   Andrew Morton   lseek: the "whenc...
2525
  static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
220f2ac91   Hugh Dickins   tmpfs: support SE...
2526
2527
2528
2529
2530
  {
  	struct address_space *mapping = file->f_mapping;
  	struct inode *inode = mapping->host;
  	pgoff_t start, end;
  	loff_t new_offset;
965c8e59c   Andrew Morton   lseek: the "whenc...
2531
2532
  	if (whence != SEEK_DATA && whence != SEEK_HOLE)
  		return generic_file_llseek_size(file, offset, whence,
220f2ac91   Hugh Dickins   tmpfs: support SE...
2533
  					MAX_LFS_FILESIZE, i_size_read(inode));
5955102c9   Al Viro   wrappers for ->i_...
2534
  	inode_lock(inode);
220f2ac91   Hugh Dickins   tmpfs: support SE...
2535
  	/* We're holding i_mutex so we can access i_size directly */
db89fc007   Yufen Yu   tmpfs: make lseek...
2536
  	if (offset < 0 || offset >= inode->i_size)
220f2ac91   Hugh Dickins   tmpfs: support SE...
2537
2538
  		offset = -ENXIO;
  	else {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2539
2540
  		start = offset >> PAGE_SHIFT;
  		end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
965c8e59c   Andrew Morton   lseek: the "whenc...
2541
  		new_offset = shmem_seek_hole_data(mapping, start, end, whence);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2542
  		new_offset <<= PAGE_SHIFT;
220f2ac91   Hugh Dickins   tmpfs: support SE...
2543
2544
2545
  		if (new_offset > offset) {
  			if (new_offset < inode->i_size)
  				offset = new_offset;
965c8e59c   Andrew Morton   lseek: the "whenc...
2546
  			else if (whence == SEEK_DATA)
220f2ac91   Hugh Dickins   tmpfs: support SE...
2547
2548
2549
2550
2551
  				offset = -ENXIO;
  			else
  				offset = inode->i_size;
  		}
  	}
387aae6fd   Hugh Dickins   tmpfs: fix SEEK_D...
2552
2553
  	if (offset >= 0)
  		offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
5955102c9   Al Viro   wrappers for ->i_...
2554
  	inode_unlock(inode);
220f2ac91   Hugh Dickins   tmpfs: support SE...
2555
2556
  	return offset;
  }
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2557
2558
2559
  static long shmem_fallocate(struct file *file, int mode, loff_t offset,
  							 loff_t len)
  {
496ad9aa8   Al Viro   new helper: file_...
2560
  	struct inode *inode = file_inode(file);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2561
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
40e041a2c   David Herrmann   shm: add sealing API
2562
  	struct shmem_inode_info *info = SHMEM_I(inode);
1aac14003   Hugh Dickins   tmpfs: quit when ...
2563
  	struct shmem_falloc shmem_falloc;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2564
2565
  	pgoff_t start, index, end;
  	int error;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2566

13ace4d0d   Hugh Dickins   tmpfs: ZERO_RANGE...
2567
2568
  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
  		return -EOPNOTSUPP;
5955102c9   Al Viro   wrappers for ->i_...
2569
  	inode_lock(inode);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2570
2571
2572
2573
2574
  
  	if (mode & FALLOC_FL_PUNCH_HOLE) {
  		struct address_space *mapping = file->f_mapping;
  		loff_t unmap_start = round_up(offset, PAGE_SIZE);
  		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
8e205f779   Hugh Dickins   shmem: fix faulti...
2575
  		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2576

40e041a2c   David Herrmann   shm: add sealing API
2577
2578
2579
2580
2581
  		/* protected by i_mutex */
  		if (info->seals & F_SEAL_WRITE) {
  			error = -EPERM;
  			goto out;
  		}
8e205f779   Hugh Dickins   shmem: fix faulti...
2582
  		shmem_falloc.waitq = &shmem_falloc_waitq;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
2583
2584
2585
2586
2587
  		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
  		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
  		spin_lock(&inode->i_lock);
  		inode->i_private = &shmem_falloc;
  		spin_unlock(&inode->i_lock);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2588
2589
2590
2591
2592
  		if ((u64)unmap_end > (u64)unmap_start)
  			unmap_mapping_range(mapping, unmap_start,
  					    1 + unmap_end - unmap_start, 0);
  		shmem_truncate_range(inode, offset, offset + len - 1);
  		/* No need to unmap again: hole-punching leaves COWed pages */
8e205f779   Hugh Dickins   shmem: fix faulti...
2593
2594
2595
2596
  
  		spin_lock(&inode->i_lock);
  		inode->i_private = NULL;
  		wake_up_all(&shmem_falloc_waitq);
2055da973   Ingo Molnar   sched/wait: Disam...
2597
  		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
8e205f779   Hugh Dickins   shmem: fix faulti...
2598
  		spin_unlock(&inode->i_lock);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2599
  		error = 0;
8e205f779   Hugh Dickins   shmem: fix faulti...
2600
  		goto out;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2601
2602
2603
2604
2605
2606
  	}
  
  	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
  	error = inode_newsize_ok(inode, offset + len);
  	if (error)
  		goto out;
40e041a2c   David Herrmann   shm: add sealing API
2607
2608
2609
2610
  	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
  		error = -EPERM;
  		goto out;
  	}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2611
2612
  	start = offset >> PAGE_SHIFT;
  	end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2613
2614
2615
2616
  	/* Try to avoid a swapstorm if len is impossible to satisfy */
  	if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
  		error = -ENOSPC;
  		goto out;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2617
  	}
8e205f779   Hugh Dickins   shmem: fix faulti...
2618
  	shmem_falloc.waitq = NULL;
1aac14003   Hugh Dickins   tmpfs: quit when ...
2619
2620
2621
2622
2623
2624
2625
  	shmem_falloc.start = start;
  	shmem_falloc.next  = start;
  	shmem_falloc.nr_falloced = 0;
  	shmem_falloc.nr_unswapped = 0;
  	spin_lock(&inode->i_lock);
  	inode->i_private = &shmem_falloc;
  	spin_unlock(&inode->i_lock);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2626
2627
2628
2629
2630
2631
2632
2633
2634
  	for (index = start; index < end; index++) {
  		struct page *page;
  
  		/*
  		 * Good, the fallocate(2) manpage permits EINTR: we may have
  		 * been interrupted because we are using up too much memory.
  		 */
  		if (signal_pending(current))
  			error = -EINTR;
1aac14003   Hugh Dickins   tmpfs: quit when ...
2635
2636
  		else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
  			error = -ENOMEM;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2637
  		else
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2638
  			error = shmem_getpage(inode, index, &page, SGP_FALLOC);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2639
  		if (error) {
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
2640
  			/* Remove the !PageUptodate pages we added */
7f5565670   Hugh Dickins   tmpfs: fix regres...
2641
2642
2643
2644
2645
  			if (index > start) {
  				shmem_undo_range(inode,
  				    (loff_t)start << PAGE_SHIFT,
  				    ((loff_t)index << PAGE_SHIFT) - 1, true);
  			}
1aac14003   Hugh Dickins   tmpfs: quit when ...
2646
  			goto undone;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2647
  		}
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2648
  		/*
1aac14003   Hugh Dickins   tmpfs: quit when ...
2649
2650
2651
2652
2653
2654
2655
2656
  		 * Inform shmem_writepage() how far we have reached.
  		 * No need for lock or barrier: we have the page lock.
  		 */
  		shmem_falloc.next++;
  		if (!PageUptodate(page))
  			shmem_falloc.nr_falloced++;
  
  		/*
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
2657
2658
2659
  		 * If !PageUptodate, leave it that way so that freeable pages
  		 * can be recognized if we need to rollback on error later.
  		 * But set_page_dirty so that memory pressure will swap rather
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2660
2661
2662
2663
2664
  		 * than free the pages we are allocating (and SGP_CACHE pages
  		 * might still be clean: we now need to mark those dirty too).
  		 */
  		set_page_dirty(page);
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2665
  		put_page(page);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2666
2667
2668
2669
2670
  		cond_resched();
  	}
  
  	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
  		i_size_write(inode, offset + len);
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2671
  	inode->i_ctime = current_time(inode);
1aac14003   Hugh Dickins   tmpfs: quit when ...
2672
2673
2674
2675
  undone:
  	spin_lock(&inode->i_lock);
  	inode->i_private = NULL;
  	spin_unlock(&inode->i_lock);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2676
  out:
5955102c9   Al Viro   wrappers for ->i_...
2677
  	inode_unlock(inode);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2678
2679
  	return error;
  }
726c33422   David Howells   [PATCH] VFS: Perm...
2680
  static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2681
  {
726c33422   David Howells   [PATCH] VFS: Perm...
2682
  	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2683
2684
  
  	buf->f_type = TMPFS_MAGIC;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2685
  	buf->f_bsize = PAGE_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2686
  	buf->f_namelen = NAME_MAX;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2687
  	if (sbinfo->max_blocks) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2688
  		buf->f_blocks = sbinfo->max_blocks;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2689
2690
2691
  		buf->f_bavail =
  		buf->f_bfree  = sbinfo->max_blocks -
  				percpu_counter_sum(&sbinfo->used_blocks);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2692
2693
  	}
  	if (sbinfo->max_inodes) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2694
2695
  		buf->f_files = sbinfo->max_inodes;
  		buf->f_ffree = sbinfo->free_inodes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2696
2697
2698
2699
2700
2701
2702
2703
2704
  	}
  	/* else leave those fields 0 like simple_statfs */
  	return 0;
  }
  
  /*
   * File creation. Allocate an inode, and we're done..
   */
  static int
1a67aafb5   Al Viro   switch ->mknod() ...
2705
  shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2706
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
2707
  	struct inode *inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2708
  	int error = -ENOSPC;
454abafe9   Dmitry Monakhov   ramfs: replace in...
2709
  	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2710
  	if (inode) {
feda821e7   Christoph Hellwig   fs: remove generi...
2711
2712
2713
  		error = simple_acl_create(dir, inode);
  		if (error)
  			goto out_iput;
2a7dba391   Eric Paris   fs/vfs/security: ...
2714
  		error = security_inode_init_security(inode, dir,
9d8f13ba3   Mimi Zohar   security: new sec...
2715
  						     &dentry->d_name,
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2716
  						     shmem_initxattrs, NULL);
feda821e7   Christoph Hellwig   fs: remove generi...
2717
2718
  		if (error && error != -EOPNOTSUPP)
  			goto out_iput;
37ec43cdc   Mimi Zohar   evm: calculate HM...
2719

718deb6b6   Al Viro   Fix breakage in s...
2720
  		error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2721
  		dir->i_size += BOGO_DIRENT_SIZE;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2722
  		dir->i_ctime = dir->i_mtime = current_time(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2723
2724
  		d_instantiate(dentry, inode);
  		dget(dentry); /* Extra count - pin the dentry in core */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2725
2726
  	}
  	return error;
feda821e7   Christoph Hellwig   fs: remove generi...
2727
2728
2729
  out_iput:
  	iput(inode);
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2730
  }
60545d0d4   Al Viro   [O_TMPFILE] it's ...
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
  static int
  shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
  {
  	struct inode *inode;
  	int error = -ENOSPC;
  
  	inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
  	if (inode) {
  		error = security_inode_init_security(inode, dir,
  						     NULL,
  						     shmem_initxattrs, NULL);
feda821e7   Christoph Hellwig   fs: remove generi...
2742
2743
2744
2745
2746
  		if (error && error != -EOPNOTSUPP)
  			goto out_iput;
  		error = simple_acl_create(dir, inode);
  		if (error)
  			goto out_iput;
60545d0d4   Al Viro   [O_TMPFILE] it's ...
2747
2748
2749
  		d_tmpfile(dentry, inode);
  	}
  	return error;
feda821e7   Christoph Hellwig   fs: remove generi...
2750
2751
2752
  out_iput:
  	iput(inode);
  	return error;
60545d0d4   Al Viro   [O_TMPFILE] it's ...
2753
  }
18bb1db3e   Al Viro   switch vfs_mkdir(...
2754
  static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2755
2756
2757
2758
2759
  {
  	int error;
  
  	if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
  		return error;
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2760
  	inc_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2761
2762
  	return 0;
  }
4acdaf27e   Al Viro   switch ->create()...
2763
  static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ebfc3b49a   Al Viro   don't pass nameid...
2764
  		bool excl)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2765
2766
2767
2768
2769
2770
2771
2772
2773
  {
  	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
  }
  
  /*
   * Link a file..
   */
  static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2774
  	struct inode *inode = d_inode(old_dentry);
b3139fbb3   Darrick J. Wong   tmpfs: fix uninit...
2775
  	int ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2776
2777
2778
2779
2780
  
  	/*
  	 * No ordinary (disk based) filesystem counts links as inodes;
  	 * but each new link needs a new dentry, pinning lowmem, and
  	 * tmpfs dentries cannot be pruned until they are unlinked.
064a61d3e   Darrick J. Wong   tmpfs: fix link a...
2781
2782
  	 * But if an O_TMPFILE file is linked into the tmpfs, the
  	 * first link must skip that, to get the accounting right.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2783
  	 */
064a61d3e   Darrick J. Wong   tmpfs: fix link a...
2784
2785
2786
2787
2788
  	if (inode->i_nlink) {
  		ret = shmem_reserve_inode(inode->i_sb);
  		if (ret)
  			goto out;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2789
2790
  
  	dir->i_size += BOGO_DIRENT_SIZE;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2791
  	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2792
  	inc_nlink(inode);
7de9c6ee3   Al Viro   new helper: ihold()
2793
  	ihold(inode);	/* New dentry reference */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2794
2795
  	dget(dentry);		/* Extra pinning count for the created dentry */
  	d_instantiate(dentry, inode);
5b04c6890   Pavel Emelyanov   shmem: factor out...
2796
2797
  out:
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2798
2799
2800
2801
  }
  
  static int shmem_unlink(struct inode *dir, struct dentry *dentry)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2802
  	struct inode *inode = d_inode(dentry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2803

5b04c6890   Pavel Emelyanov   shmem: factor out...
2804
2805
  	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
  		shmem_free_inode(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2806
2807
  
  	dir->i_size -= BOGO_DIRENT_SIZE;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2808
  	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2809
  	drop_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2810
2811
2812
2813
2814
2815
2816
2817
  	dput(dentry);	/* Undo the count from "create" - this does all the work */
  	return 0;
  }
  
  static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
  {
  	if (!simple_empty(dentry))
  		return -ENOTEMPTY;
75c3cfa85   David Howells   VFS: assorted wei...
2818
  	drop_nlink(d_inode(dentry));
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2819
  	drop_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2820
2821
  	return shmem_unlink(dir, dentry);
  }
37456771c   Miklos Szeredi   shmem: support RE...
2822
2823
  static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
  {
e36cb0b89   David Howells   VFS: (Scripted) C...
2824
2825
  	bool old_is_dir = d_is_dir(old_dentry);
  	bool new_is_dir = d_is_dir(new_dentry);
37456771c   Miklos Szeredi   shmem: support RE...
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
  
  	if (old_dir != new_dir && old_is_dir != new_is_dir) {
  		if (old_is_dir) {
  			drop_nlink(old_dir);
  			inc_nlink(new_dir);
  		} else {
  			drop_nlink(new_dir);
  			inc_nlink(old_dir);
  		}
  	}
  	old_dir->i_ctime = old_dir->i_mtime =
  	new_dir->i_ctime = new_dir->i_mtime =
75c3cfa85   David Howells   VFS: assorted wei...
2838
  	d_inode(old_dentry)->i_ctime =
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2839
  	d_inode(new_dentry)->i_ctime = current_time(old_dir);
37456771c   Miklos Szeredi   shmem: support RE...
2840
2841
2842
  
  	return 0;
  }
46fdb794e   Miklos Szeredi   shmem: support RE...
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
  static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
  {
  	struct dentry *whiteout;
  	int error;
  
  	whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
  	if (!whiteout)
  		return -ENOMEM;
  
  	error = shmem_mknod(old_dir, whiteout,
  			    S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
  	dput(whiteout);
  	if (error)
  		return error;
  
  	/*
  	 * Cheat and hash the whiteout while the old dentry is still in
  	 * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
  	 *
  	 * d_lookup() will consistently find one of them at this point,
  	 * not sure which one, but that isn't even important.
  	 */
  	d_rehash(whiteout);
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2868
2869
2870
2871
2872
2873
  /*
   * The VFS layer already does all the dentry stuff for rename,
   * we just have to decrement the usage count for the target if
   * it exists so that the VFS layer correctly free's it when it
   * gets overwritten.
   */
3b69ff51d   Miklos Szeredi   shmem: support RE...
2874
  static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2875
  {
75c3cfa85   David Howells   VFS: assorted wei...
2876
  	struct inode *inode = d_inode(old_dentry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2877
  	int they_are_dirs = S_ISDIR(inode->i_mode);
46fdb794e   Miklos Szeredi   shmem: support RE...
2878
  	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
3b69ff51d   Miklos Szeredi   shmem: support RE...
2879
  		return -EINVAL;
37456771c   Miklos Szeredi   shmem: support RE...
2880
2881
  	if (flags & RENAME_EXCHANGE)
  		return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2882
2883
  	if (!simple_empty(new_dentry))
  		return -ENOTEMPTY;
46fdb794e   Miklos Szeredi   shmem: support RE...
2884
2885
2886
2887
2888
2889
2890
  	if (flags & RENAME_WHITEOUT) {
  		int error;
  
  		error = shmem_whiteout(old_dir, old_dentry);
  		if (error)
  			return error;
  	}
75c3cfa85   David Howells   VFS: assorted wei...
2891
  	if (d_really_is_positive(new_dentry)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2892
  		(void) shmem_unlink(new_dir, new_dentry);
b928095b0   Miklos Szeredi   shmem: fix nlink ...
2893
  		if (they_are_dirs) {
75c3cfa85   David Howells   VFS: assorted wei...
2894
  			drop_nlink(d_inode(new_dentry));
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2895
  			drop_nlink(old_dir);
b928095b0   Miklos Szeredi   shmem: fix nlink ...
2896
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2897
  	} else if (they_are_dirs) {
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2898
  		drop_nlink(old_dir);
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2899
  		inc_nlink(new_dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2900
2901
2902
2903
2904
2905
  	}
  
  	old_dir->i_size -= BOGO_DIRENT_SIZE;
  	new_dir->i_size += BOGO_DIRENT_SIZE;
  	old_dir->i_ctime = old_dir->i_mtime =
  	new_dir->i_ctime = new_dir->i_mtime =
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2906
  	inode->i_ctime = current_time(old_dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2907
2908
2909
2910
2911
2912
2913
2914
  	return 0;
  }
  
  static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
  {
  	int error;
  	int len;
  	struct inode *inode;
9276aad6c   Hugh Dickins   tmpfs: remove_shm...
2915
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2916
2917
  
  	len = strlen(symname) + 1;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2918
  	if (len > PAGE_SIZE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2919
  		return -ENAMETOOLONG;
0825a6f98   Joe Perches   mm: use octal not...
2920
2921
  	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK | 0777, 0,
  				VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2922
2923
  	if (!inode)
  		return -ENOSPC;
9d8f13ba3   Mimi Zohar   security: new sec...
2924
  	error = security_inode_init_security(inode, dir, &dentry->d_name,
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2925
  					     shmem_initxattrs, NULL);
570bc1c2e   Stephen Smalley   [PATCH] tmpfs: En...
2926
2927
2928
2929
2930
2931
2932
  	if (error) {
  		if (error != -EOPNOTSUPP) {
  			iput(inode);
  			return error;
  		}
  		error = 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2933
  	inode->i_size = len-1;
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2934
  	if (len <= SHORT_SYMLINK_LEN) {
3ed47db34   Al Viro   make sure that fr...
2935
2936
  		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
  		if (!inode->i_link) {
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2937
2938
2939
2940
  			iput(inode);
  			return -ENOMEM;
  		}
  		inode->i_op = &shmem_short_symlink_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2941
  	} else {
e8ecde25f   Al Viro   Make sure that hi...
2942
  		inode_nohighmem(inode);
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2943
  		error = shmem_getpage(inode, 0, &page, SGP_WRITE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2944
2945
2946
2947
  		if (error) {
  			iput(inode);
  			return error;
  		}
14fcc23fd   Hugh Dickins   tmpfs: fix kernel...
2948
  		inode->i_mapping->a_ops = &shmem_aops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2949
  		inode->i_op = &shmem_symlink_inode_operations;
21fc61c73   Al Viro   don't put symlink...
2950
  		memcpy(page_address(page), symname, len);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2951
  		SetPageUptodate(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2952
  		set_page_dirty(page);
6746aff74   Wu Fengguang   HWPOISON: shmem: ...
2953
  		unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
2954
  		put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2955
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2956
  	dir->i_size += BOGO_DIRENT_SIZE;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
2957
  	dir->i_ctime = dir->i_mtime = current_time(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2958
2959
2960
2961
  	d_instantiate(dentry, inode);
  	dget(dentry);
  	return 0;
  }
fceef393a   Al Viro   switch ->get_link...
2962
  static void shmem_put_link(void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2963
  {
fceef393a   Al Viro   switch ->get_link...
2964
2965
  	mark_page_accessed(arg);
  	put_page(arg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2966
  }
6b2553918   Al Viro   replace ->follow_...
2967
  static const char *shmem_get_link(struct dentry *dentry,
fceef393a   Al Viro   switch ->get_link...
2968
2969
  				  struct inode *inode,
  				  struct delayed_call *done)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2970
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2971
  	struct page *page = NULL;
6b2553918   Al Viro   replace ->follow_...
2972
  	int error;
6a6c99049   Al Viro   teach shmem_get_l...
2973
2974
2975
2976
2977
2978
2979
2980
2981
  	if (!dentry) {
  		page = find_get_page(inode->i_mapping, 0);
  		if (!page)
  			return ERR_PTR(-ECHILD);
  		if (!PageUptodate(page)) {
  			put_page(page);
  			return ERR_PTR(-ECHILD);
  		}
  	} else {
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
2982
  		error = shmem_getpage(inode, 0, &page, SGP_READ);
6a6c99049   Al Viro   teach shmem_get_l...
2983
2984
2985
2986
  		if (error)
  			return ERR_PTR(error);
  		unlock_page(page);
  	}
fceef393a   Al Viro   switch ->get_link...
2987
  	set_delayed_call(done, shmem_put_link, page);
21fc61c73   Al Viro   don't put symlink...
2988
  	return page_address(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2989
  }
b09e0fa4b   Eric Paris   tmpfs: implement ...
2990
  #ifdef CONFIG_TMPFS_XATTR
467118102   Randy Dunlap   mm/shmem and tiny...
2991
  /*
b09e0fa4b   Eric Paris   tmpfs: implement ...
2992
2993
   * Superblocks without xattr inode operations may get some security.* xattr
   * support from the LSM "for free". As soon as we have any other xattrs
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2994
2995
2996
   * like ACLs, we also need to implement the security.* handlers at
   * filesystem level, though.
   */
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2997
  /*
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2998
2999
3000
3001
3002
3003
3004
3005
   * Callback for security_inode_init_security() for acquiring xattrs.
   */
  static int shmem_initxattrs(struct inode *inode,
  			    const struct xattr *xattr_array,
  			    void *fs_info)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	const struct xattr *xattr;
38f386574   Aristeu Rozanski   xattr: extract si...
3006
  	struct simple_xattr *new_xattr;
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
3007
3008
3009
  	size_t len;
  
  	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
38f386574   Aristeu Rozanski   xattr: extract si...
3010
  		new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
  		if (!new_xattr)
  			return -ENOMEM;
  
  		len = strlen(xattr->name) + 1;
  		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
  					  GFP_KERNEL);
  		if (!new_xattr->name) {
  			kfree(new_xattr);
  			return -ENOMEM;
  		}
  
  		memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
  		       XATTR_SECURITY_PREFIX_LEN);
  		memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
  		       xattr->name, len);
38f386574   Aristeu Rozanski   xattr: extract si...
3026
  		simple_xattr_list_add(&info->xattrs, new_xattr);
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
3027
3028
3029
3030
  	}
  
  	return 0;
  }
aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3031
  static int shmem_xattr_handler_get(const struct xattr_handler *handler,
b296821a7   Al Viro   xattr_handler: pa...
3032
3033
  				   struct dentry *unused, struct inode *inode,
  				   const char *name, void *buffer, size_t size)
b09e0fa4b   Eric Paris   tmpfs: implement ...
3034
  {
b296821a7   Al Viro   xattr_handler: pa...
3035
  	struct shmem_inode_info *info = SHMEM_I(inode);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3036

aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3037
  	name = xattr_full_name(handler, name);
38f386574   Aristeu Rozanski   xattr: extract si...
3038
  	return simple_xattr_get(&info->xattrs, name, buffer, size);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3039
  }
aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3040
  static int shmem_xattr_handler_set(const struct xattr_handler *handler,
593012268   Al Viro   switch xattr_hand...
3041
3042
3043
  				   struct dentry *unused, struct inode *inode,
  				   const char *name, const void *value,
  				   size_t size, int flags)
b09e0fa4b   Eric Paris   tmpfs: implement ...
3044
  {
593012268   Al Viro   switch xattr_hand...
3045
  	struct shmem_inode_info *info = SHMEM_I(inode);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3046

aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3047
  	name = xattr_full_name(handler, name);
38f386574   Aristeu Rozanski   xattr: extract si...
3048
  	return simple_xattr_set(&info->xattrs, name, value, size, flags);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3049
  }
aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3050
3051
3052
3053
3054
  static const struct xattr_handler shmem_security_xattr_handler = {
  	.prefix = XATTR_SECURITY_PREFIX,
  	.get = shmem_xattr_handler_get,
  	.set = shmem_xattr_handler_set,
  };
b09e0fa4b   Eric Paris   tmpfs: implement ...
3055

aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3056
3057
3058
3059
3060
  static const struct xattr_handler shmem_trusted_xattr_handler = {
  	.prefix = XATTR_TRUSTED_PREFIX,
  	.get = shmem_xattr_handler_get,
  	.set = shmem_xattr_handler_set,
  };
b09e0fa4b   Eric Paris   tmpfs: implement ...
3061

aa7c5241c   Andreas Gruenbacher   tmpfs: Use xattr ...
3062
3063
3064
3065
3066
3067
3068
3069
3070
  static const struct xattr_handler *shmem_xattr_handlers[] = {
  #ifdef CONFIG_TMPFS_POSIX_ACL
  	&posix_acl_access_xattr_handler,
  	&posix_acl_default_xattr_handler,
  #endif
  	&shmem_security_xattr_handler,
  	&shmem_trusted_xattr_handler,
  	NULL
  };
b09e0fa4b   Eric Paris   tmpfs: implement ...
3071
3072
3073
  
  static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
75c3cfa85   David Howells   VFS: assorted wei...
3074
  	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
786534b92   Andreas Gruenbacher   tmpfs: listxattr ...
3075
  	return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
b09e0fa4b   Eric Paris   tmpfs: implement ...
3076
3077
  }
  #endif /* CONFIG_TMPFS_XATTR */
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
3078
  static const struct inode_operations shmem_short_symlink_operations = {
6b2553918   Al Viro   replace ->follow_...
3079
  	.get_link	= simple_get_link,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3080
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3081
  	.listxattr	= shmem_listxattr,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3082
3083
3084
3085
  #endif
  };
  
  static const struct inode_operations shmem_symlink_inode_operations = {
6b2553918   Al Viro   replace ->follow_...
3086
  	.get_link	= shmem_get_link,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3087
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3088
  	.listxattr	= shmem_listxattr,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3089
  #endif
b09e0fa4b   Eric Paris   tmpfs: implement ...
3090
  };
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3091

91828a405   David M. Grimes   [PATCH] knfsd: ad...
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
  static struct dentry *shmem_get_parent(struct dentry *child)
  {
  	return ERR_PTR(-ESTALE);
  }
  
  static int shmem_match(struct inode *ino, void *vfh)
  {
  	__u32 *fh = vfh;
  	__u64 inum = fh[2];
  	inum = (inum << 32) | fh[1];
  	return ino->i_ino == inum && fh[0] == ino->i_generation;
  }
12ba780d6   Amir Goldstein   tmpfs: allow deco...
3104
3105
3106
3107
3108
3109
3110
  /* Find any alias of inode, but prefer a hashed alias */
  static struct dentry *shmem_find_alias(struct inode *inode)
  {
  	struct dentry *alias = d_find_alias(inode);
  
  	return alias ?: d_find_any_alias(inode);
  }
480b116c9   Christoph Hellwig   shmem: new export...
3111
3112
  static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
  		struct fid *fid, int fh_len, int fh_type)
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3113
  {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3114
  	struct inode *inode;
480b116c9   Christoph Hellwig   shmem: new export...
3115
  	struct dentry *dentry = NULL;
35c2a7f49   Hugh Dickins   tmpfs,ceph,gfs2,i...
3116
  	u64 inum;
480b116c9   Christoph Hellwig   shmem: new export...
3117
3118
3119
  
  	if (fh_len < 3)
  		return NULL;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3120

35c2a7f49   Hugh Dickins   tmpfs,ceph,gfs2,i...
3121
3122
  	inum = fid->raw[2];
  	inum = (inum << 32) | fid->raw[1];
480b116c9   Christoph Hellwig   shmem: new export...
3123
3124
  	inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
  			shmem_match, fid->raw);
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3125
  	if (inode) {
12ba780d6   Amir Goldstein   tmpfs: allow deco...
3126
  		dentry = shmem_find_alias(inode);
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3127
3128
  		iput(inode);
  	}
480b116c9   Christoph Hellwig   shmem: new export...
3129
  	return dentry;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3130
  }
b0b0382bb   Al Viro   ->encode_fh() API...
3131
3132
  static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
  				struct inode *parent)
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3133
  {
5fe0c2378   Aneesh Kumar K.V   exportfs: Return ...
3134
3135
  	if (*len < 3) {
  		*len = 3;
94e07a759   Namjae Jeon   fs: encode_fh: re...
3136
  		return FILEID_INVALID;
5fe0c2378   Aneesh Kumar K.V   exportfs: Return ...
3137
  	}
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3138

1d3382cbf   Al Viro   new helper: inode...
3139
  	if (inode_unhashed(inode)) {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3140
3141
3142
3143
3144
3145
3146
  		/* Unfortunately insert_inode_hash is not idempotent,
  		 * so as we hash inodes here rather than at creation
  		 * time, we need a lock to ensure we only try
  		 * to do it once
  		 */
  		static DEFINE_SPINLOCK(lock);
  		spin_lock(&lock);
1d3382cbf   Al Viro   new helper: inode...
3147
  		if (inode_unhashed(inode))
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
  			__insert_inode_hash(inode,
  					    inode->i_ino + inode->i_generation);
  		spin_unlock(&lock);
  	}
  
  	fh[0] = inode->i_generation;
  	fh[1] = inode->i_ino;
  	fh[2] = ((__u64)inode->i_ino) >> 32;
  
  	*len = 3;
  	return 1;
  }
396551644   Christoph Hellwig   exportfs: make st...
3160
  static const struct export_operations shmem_export_ops = {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3161
  	.get_parent     = shmem_get_parent,
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3162
  	.encode_fh      = shmem_encode_fh,
480b116c9   Christoph Hellwig   shmem: new export...
3163
  	.fh_to_dentry	= shmem_fh_to_dentry,
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3164
  };
680d794ba   akpm@linux-foundation.org   mount options: fi...
3165
3166
  static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
  			       bool remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3167
3168
  {
  	char *this_char, *value, *rest;
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3169
  	struct mempolicy *mpol = NULL;
8751e0395   Eric W. Biederman   userns: Convert t...
3170
3171
  	uid_t uid;
  	gid_t gid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3172

b00dc3ad7   Hugh Dickins   [PATCH] tmpfs: fi...
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
  	while (options != NULL) {
  		this_char = options;
  		for (;;) {
  			/*
  			 * NUL-terminate this option: unfortunately,
  			 * mount options form a comma-separated list,
  			 * but mpol's nodelist may also contain commas.
  			 */
  			options = strchr(options, ',');
  			if (options == NULL)
  				break;
  			options++;
  			if (!isdigit(*options)) {
  				options[-1] = '\0';
  				break;
  			}
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3190
3191
3192
3193
3194
  		if (!*this_char)
  			continue;
  		if ((value = strchr(this_char,'=')) != NULL) {
  			*value++ = 0;
  		} else {
1170532bb   Joe Perches   mm: convert print...
3195
3196
3197
  			pr_err("tmpfs: No value for mount option '%s'
  ",
  			       this_char);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3198
  			goto error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
  		}
  
  		if (!strcmp(this_char,"size")) {
  			unsigned long long size;
  			size = memparse(value,&rest);
  			if (*rest == '%') {
  				size <<= PAGE_SHIFT;
  				size *= totalram_pages;
  				do_div(size, 100);
  				rest++;
  			}
  			if (*rest)
  				goto bad_val;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3212
  			sbinfo->max_blocks =
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
3213
  				DIV_ROUND_UP(size, PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3214
  		} else if (!strcmp(this_char,"nr_blocks")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3215
  			sbinfo->max_blocks = memparse(value, &rest);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3216
3217
3218
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"nr_inodes")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3219
  			sbinfo->max_inodes = memparse(value, &rest);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3220
3221
3222
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"mode")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3223
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3224
  				continue;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3225
  			sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3226
3227
3228
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"uid")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3229
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3230
  				continue;
8751e0395   Eric W. Biederman   userns: Convert t...
3231
  			uid = simple_strtoul(value, &rest, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3232
3233
  			if (*rest)
  				goto bad_val;
8751e0395   Eric W. Biederman   userns: Convert t...
3234
3235
3236
  			sbinfo->uid = make_kuid(current_user_ns(), uid);
  			if (!uid_valid(sbinfo->uid))
  				goto bad_val;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3237
  		} else if (!strcmp(this_char,"gid")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3238
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3239
  				continue;
8751e0395   Eric W. Biederman   userns: Convert t...
3240
  			gid = simple_strtoul(value, &rest, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3241
3242
  			if (*rest)
  				goto bad_val;
8751e0395   Eric W. Biederman   userns: Convert t...
3243
3244
3245
  			sbinfo->gid = make_kgid(current_user_ns(), gid);
  			if (!gid_valid(sbinfo->gid))
  				goto bad_val;
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3246
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
  		} else if (!strcmp(this_char, "huge")) {
  			int huge;
  			huge = shmem_parse_huge(value);
  			if (huge < 0)
  				goto bad_val;
  			if (!has_transparent_hugepage() &&
  					huge != SHMEM_HUGE_NEVER)
  				goto bad_val;
  			sbinfo->huge = huge;
  #endif
  #ifdef CONFIG_NUMA
7339ff830   Robin Holt   [PATCH] Add tmpfs...
3258
  		} else if (!strcmp(this_char,"mpol")) {
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3259
3260
3261
  			mpol_put(mpol);
  			mpol = NULL;
  			if (mpol_parse_str(value, &mpol))
7339ff830   Robin Holt   [PATCH] Add tmpfs...
3262
  				goto bad_val;
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3263
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3264
  		} else {
1170532bb   Joe Perches   mm: convert print...
3265
3266
  			pr_err("tmpfs: Bad mount option %s
  ", this_char);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3267
  			goto error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3268
3269
  		}
  	}
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3270
  	sbinfo->mpol = mpol;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3271
3272
3273
  	return 0;
  
  bad_val:
1170532bb   Joe Perches   mm: convert print...
3274
3275
  	pr_err("tmpfs: Bad value '%s' for mount option '%s'
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3276
  	       value, this_char);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3277
3278
  error:
  	mpol_put(mpol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3279
3280
3281
3282
3283
3284
3285
  	return 1;
  
  }
  
  static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
680d794ba   akpm@linux-foundation.org   mount options: fi...
3286
  	struct shmem_sb_info config = *sbinfo;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3287
3288
  	unsigned long inodes;
  	int error = -EINVAL;
5f00110f7   Greg Thelen   tmpfs: fix use-af...
3289
  	config.mpol = NULL;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3290
  	if (shmem_parse_options(data, &config, true))
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3291
  		return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3292

0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3293
  	spin_lock(&sbinfo->stat_lock);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3294
  	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
7e496299d   Tim Chen   tmpfs: make tmpfs...
3295
  	if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3296
  		goto out;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3297
  	if (config.max_inodes < inodes)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3298
3299
  		goto out;
  	/*
54af60421   Hugh Dickins   tmpfs: convert sh...
3300
  	 * Those tests disallow limited->unlimited while any are in use;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3301
3302
3303
  	 * but we must separately disallow unlimited->limited, because
  	 * in that case we have no record of how much is already in use.
  	 */
680d794ba   akpm@linux-foundation.org   mount options: fi...
3304
  	if (config.max_blocks && !sbinfo->max_blocks)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3305
  		goto out;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3306
  	if (config.max_inodes && !sbinfo->max_inodes)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3307
3308
3309
  		goto out;
  
  	error = 0;
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3310
  	sbinfo->huge = config.huge;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3311
  	sbinfo->max_blocks  = config.max_blocks;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3312
3313
  	sbinfo->max_inodes  = config.max_inodes;
  	sbinfo->free_inodes = config.max_inodes - inodes;
71fe804b6   Lee Schermerhorn   mempolicy: use st...
3314

5f00110f7   Greg Thelen   tmpfs: fix use-af...
3315
3316
3317
3318
3319
3320
3321
  	/*
  	 * Preserve previous mempolicy unless mpol remount option was specified.
  	 */
  	if (config.mpol) {
  		mpol_put(sbinfo->mpol);
  		sbinfo->mpol = config.mpol;	/* transfers initial ref */
  	}
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3322
3323
3324
  out:
  	spin_unlock(&sbinfo->stat_lock);
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3325
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
3326

34c80b1d9   Al Viro   vfs: switch ->sho...
3327
  static int shmem_show_options(struct seq_file *seq, struct dentry *root)
680d794ba   akpm@linux-foundation.org   mount options: fi...
3328
  {
34c80b1d9   Al Viro   vfs: switch ->sho...
3329
  	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
680d794ba   akpm@linux-foundation.org   mount options: fi...
3330
3331
3332
  
  	if (sbinfo->max_blocks != shmem_default_max_blocks())
  		seq_printf(seq, ",size=%luk",
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
3333
  			sbinfo->max_blocks << (PAGE_SHIFT - 10));
680d794ba   akpm@linux-foundation.org   mount options: fi...
3334
3335
  	if (sbinfo->max_inodes != shmem_default_max_inodes())
  		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
0825a6f98   Joe Perches   mm: use octal not...
3336
  	if (sbinfo->mode != (0777 | S_ISVTX))
09208d150   Al Viro   shmem, ramfs: pro...
3337
  		seq_printf(seq, ",mode=%03ho", sbinfo->mode);
8751e0395   Eric W. Biederman   userns: Convert t...
3338
3339
3340
3341
3342
3343
  	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
  		seq_printf(seq, ",uid=%u",
  				from_kuid_munged(&init_user_ns, sbinfo->uid));
  	if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
  		seq_printf(seq, ",gid=%u",
  				from_kgid_munged(&init_user_ns, sbinfo->gid));
e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3344
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3345
3346
3347
3348
  	/* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
  	if (sbinfo->huge)
  		seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
  #endif
71fe804b6   Lee Schermerhorn   mempolicy: use st...
3349
  	shmem_show_mpol(seq, sbinfo->mpol);
680d794ba   akpm@linux-foundation.org   mount options: fi...
3350
3351
  	return 0;
  }
9183df25f   David Herrmann   shm: add memfd_cr...
3352

680d794ba   akpm@linux-foundation.org   mount options: fi...
3353
  #endif /* CONFIG_TMPFS */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3354
3355
3356
  
  static void shmem_put_super(struct super_block *sb)
  {
602586a83   Hugh Dickins   shmem: put_super ...
3357
3358
3359
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  
  	percpu_counter_destroy(&sbinfo->used_blocks);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
3360
  	mpol_put(sbinfo->mpol);
602586a83   Hugh Dickins   shmem: put_super ...
3361
  	kfree(sbinfo);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3362
3363
  	sb->s_fs_info = NULL;
  }
2b2af54a5   Kay Sievers   Driver Core: devt...
3364
  int shmem_fill_super(struct super_block *sb, void *data, int silent)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3365
3366
  {
  	struct inode *inode;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3367
  	struct shmem_sb_info *sbinfo;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3368
3369
3370
  	int err = -ENOMEM;
  
  	/* Round up to L1_CACHE_BYTES to resist false sharing */
425fbf047   Pekka Enberg   shmem: initialize...
3371
  	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
680d794ba   akpm@linux-foundation.org   mount options: fi...
3372
3373
3374
  				L1_CACHE_BYTES), GFP_KERNEL);
  	if (!sbinfo)
  		return -ENOMEM;
0825a6f98   Joe Perches   mm: use octal not...
3375
  	sbinfo->mode = 0777 | S_ISVTX;
76aac0e9a   David Howells   CRED: Wrap task c...
3376
3377
  	sbinfo->uid = current_fsuid();
  	sbinfo->gid = current_fsgid();
680d794ba   akpm@linux-foundation.org   mount options: fi...
3378
  	sb->s_fs_info = sbinfo;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3379

0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3380
  #ifdef CONFIG_TMPFS
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3381
3382
3383
3384
3385
  	/*
  	 * Per default we only allow half of the physical ram per
  	 * tmpfs instance, limiting inodes to one per page of lowmem;
  	 * but the internal instance is left unlimited.
  	 */
1751e8a6c   Linus Torvalds   Rename superblock...
3386
  	if (!(sb->s_flags & SB_KERNMOUNT)) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
3387
3388
3389
3390
3391
3392
  		sbinfo->max_blocks = shmem_default_max_blocks();
  		sbinfo->max_inodes = shmem_default_max_inodes();
  		if (shmem_parse_options(data, sbinfo, false)) {
  			err = -EINVAL;
  			goto failed;
  		}
ca4e05195   Al Viro   shm_mnt is as lon...
3393
  	} else {
1751e8a6c   Linus Torvalds   Rename superblock...
3394
  		sb->s_flags |= SB_NOUSER;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3395
  	}
91828a405   David M. Grimes   [PATCH] knfsd: ad...
3396
  	sb->s_export_op = &shmem_export_ops;
1751e8a6c   Linus Torvalds   Rename superblock...
3397
  	sb->s_flags |= SB_NOSEC;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3398
  #else
1751e8a6c   Linus Torvalds   Rename superblock...
3399
  	sb->s_flags |= SB_NOUSER;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3400
  #endif
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3401
  	spin_lock_init(&sbinfo->stat_lock);
908c7f194   Tejun Heo   percpu_counter: a...
3402
  	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
602586a83   Hugh Dickins   shmem: put_super ...
3403
  		goto failed;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3404
  	sbinfo->free_inodes = sbinfo->max_inodes;
779750d20   Kirill A. Shutemov   shmem: split huge...
3405
3406
  	spin_lock_init(&sbinfo->shrinklist_lock);
  	INIT_LIST_HEAD(&sbinfo->shrinklist);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3407

285b2c4fd   Hugh Dickins   tmpfs: demolish o...
3408
  	sb->s_maxbytes = MAX_LFS_FILESIZE;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
3409
3410
  	sb->s_blocksize = PAGE_SIZE;
  	sb->s_blocksize_bits = PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3411
3412
  	sb->s_magic = TMPFS_MAGIC;
  	sb->s_op = &shmem_ops;
cfd95a9cf   Robin H. Johnson   [PATCH] tmpfs: ti...
3413
  	sb->s_time_gran = 1;
b09e0fa4b   Eric Paris   tmpfs: implement ...
3414
  #ifdef CONFIG_TMPFS_XATTR
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3415
  	sb->s_xattr = shmem_xattr_handlers;
b09e0fa4b   Eric Paris   tmpfs: implement ...
3416
3417
  #endif
  #ifdef CONFIG_TMPFS_POSIX_ACL
1751e8a6c   Linus Torvalds   Rename superblock...
3418
  	sb->s_flags |= SB_POSIXACL;
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3419
  #endif
2b4db7961   Amir Goldstein   tmpfs: generate r...
3420
  	uuid_gen(&sb->s_uuid);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
3421

454abafe9   Dmitry Monakhov   ramfs: replace in...
3422
  	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3423
3424
  	if (!inode)
  		goto failed;
680d794ba   akpm@linux-foundation.org   mount options: fi...
3425
3426
  	inode->i_uid = sbinfo->uid;
  	inode->i_gid = sbinfo->gid;
318ceed08   Al Viro   tidy up after d_m...
3427
3428
  	sb->s_root = d_make_root(inode);
  	if (!sb->s_root)
48fde701a   Al Viro   switch open-coded...
3429
  		goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3430
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3431
3432
3433
3434
  failed:
  	shmem_put_super(sb);
  	return err;
  }
fcc234f88   Pekka Enberg   [PATCH] mm: kill ...
3435
  static struct kmem_cache *shmem_inode_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3436
3437
3438
  
  static struct inode *shmem_alloc_inode(struct super_block *sb)
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3439
3440
3441
  	struct shmem_inode_info *info;
  	info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
  	if (!info)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3442
  		return NULL;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3443
  	return &info->vfs_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3444
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3445
  static void shmem_destroy_callback(struct rcu_head *head)
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
3446
3447
  {
  	struct inode *inode = container_of(head, struct inode, i_rcu);
84e710da2   Al Viro   parallel lookups ...
3448
3449
  	if (S_ISLNK(inode->i_mode))
  		kfree(inode->i_link);
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
3450
3451
  	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3452
3453
  static void shmem_destroy_inode(struct inode *inode)
  {
09208d150   Al Viro   shmem, ramfs: pro...
3454
  	if (S_ISREG(inode->i_mode))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3455
  		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3456
  	call_rcu(&inode->i_rcu, shmem_destroy_callback);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3457
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3458
  static void shmem_init_inode(void *foo)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3459
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3460
3461
  	struct shmem_inode_info *info = foo;
  	inode_init_once(&info->vfs_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3462
  }
9a8ec03ed   weiping zhang   shmem: convert sh...
3463
  static void shmem_init_inodecache(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3464
3465
3466
  {
  	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
  				sizeof(struct shmem_inode_info),
5d097056c   Vladimir Davydov   kmemcg: account c...
3467
  				0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3468
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3469
  static void shmem_destroy_inodecache(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3470
  {
1a1d92c10   Alexey Dobriyan   [PATCH] Really ig...
3471
  	kmem_cache_destroy(shmem_inode_cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3472
  }
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
3473
  static const struct address_space_operations shmem_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3474
  	.writepage	= shmem_writepage,
767193253   Ken Chen   [PATCH] simplify ...
3475
  	.set_page_dirty	= __set_page_dirty_no_writeback,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3476
  #ifdef CONFIG_TMPFS
800d15a53   Nick Piggin   implement simple ...
3477
3478
  	.write_begin	= shmem_write_begin,
  	.write_end	= shmem_write_end,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3479
  #endif
1c93923cc   Andrew Morton   include/linux/mig...
3480
  #ifdef CONFIG_MIGRATION
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
3481
  	.migratepage	= migrate_page,
1c93923cc   Andrew Morton   include/linux/mig...
3482
  #endif
aa261f549   Andi Kleen   HWPOISON: Enable ...
3483
  	.error_remove_page = generic_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3484
  };
15ad7cdcf   Helge Deller   [PATCH] struct se...
3485
  static const struct file_operations shmem_file_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3486
  	.mmap		= shmem_mmap,
c01d5b300   Hugh Dickins   shmem: get_unmapp...
3487
  	.get_unmapped_area = shmem_get_unmapped_area,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3488
  #ifdef CONFIG_TMPFS
220f2ac91   Hugh Dickins   tmpfs: support SE...
3489
  	.llseek		= shmem_file_llseek,
2ba5bbed0   Al Viro   shmem: switch to ...
3490
  	.read_iter	= shmem_file_read_iter,
8174202b3   Al Viro   write_iter varian...
3491
  	.write_iter	= generic_file_write_iter,
1b061d924   Christoph Hellwig   rename the generi...
3492
  	.fsync		= noop_fsync,
82c156f85   Al Viro   switch generic_fi...
3493
  	.splice_read	= generic_file_splice_read,
f6cb85d00   Al Viro   shmem: switch to ...
3494
  	.splice_write	= iter_file_splice_write,
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
3495
  	.fallocate	= shmem_fallocate,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3496
3497
  #endif
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
3498
  static const struct inode_operations shmem_inode_operations = {
44a30220b   Yu Zhao   shmem: recalculat...
3499
  	.getattr	= shmem_getattr,
94c1e62df   Hugh Dickins   tmpfs: take contr...
3500
  	.setattr	= shmem_setattr,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3501
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3502
  	.listxattr	= shmem_listxattr,
feda821e7   Christoph Hellwig   fs: remove generi...
3503
  	.set_acl	= simple_set_acl,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3504
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3505
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
3506
  static const struct inode_operations shmem_dir_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3507
3508
3509
3510
3511
3512
3513
3514
3515
  #ifdef CONFIG_TMPFS
  	.create		= shmem_create,
  	.lookup		= simple_lookup,
  	.link		= shmem_link,
  	.unlink		= shmem_unlink,
  	.symlink	= shmem_symlink,
  	.mkdir		= shmem_mkdir,
  	.rmdir		= shmem_rmdir,
  	.mknod		= shmem_mknod,
2773bf00a   Miklos Szeredi   fs: rename "renam...
3516
  	.rename		= shmem_rename2,
60545d0d4   Al Viro   [O_TMPFILE] it's ...
3517
  	.tmpfile	= shmem_tmpfile,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3518
  #endif
b09e0fa4b   Eric Paris   tmpfs: implement ...
3519
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3520
  	.listxattr	= shmem_listxattr,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3521
  #endif
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3522
  #ifdef CONFIG_TMPFS_POSIX_ACL
94c1e62df   Hugh Dickins   tmpfs: take contr...
3523
  	.setattr	= shmem_setattr,
feda821e7   Christoph Hellwig   fs: remove generi...
3524
  	.set_acl	= simple_set_acl,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3525
3526
  #endif
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
3527
  static const struct inode_operations shmem_special_inode_operations = {
b09e0fa4b   Eric Paris   tmpfs: implement ...
3528
  #ifdef CONFIG_TMPFS_XATTR
b09e0fa4b   Eric Paris   tmpfs: implement ...
3529
  	.listxattr	= shmem_listxattr,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3530
  #endif
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3531
  #ifdef CONFIG_TMPFS_POSIX_ACL
94c1e62df   Hugh Dickins   tmpfs: take contr...
3532
  	.setattr	= shmem_setattr,
feda821e7   Christoph Hellwig   fs: remove generi...
3533
  	.set_acl	= simple_set_acl,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3534
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3535
  };
759b9775c   Hugh Dickins   [PATCH] shmem and...
3536
  static const struct super_operations shmem_ops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3537
3538
3539
3540
3541
  	.alloc_inode	= shmem_alloc_inode,
  	.destroy_inode	= shmem_destroy_inode,
  #ifdef CONFIG_TMPFS
  	.statfs		= shmem_statfs,
  	.remount_fs	= shmem_remount_fs,
680d794ba   akpm@linux-foundation.org   mount options: fi...
3542
  	.show_options	= shmem_show_options,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3543
  #endif
1f895f75d   Al Viro   switch shmem.c to...
3544
  	.evict_inode	= shmem_evict_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3545
3546
  	.drop_inode	= generic_delete_inode,
  	.put_super	= shmem_put_super,
779750d20   Kirill A. Shutemov   shmem: split huge...
3547
3548
3549
3550
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
  	.nr_cached_objects	= shmem_unused_huge_count,
  	.free_cached_objects	= shmem_unused_huge_scan,
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3551
  };
f0f37e2f7   Alexey Dobriyan   const: mark struc...
3552
  static const struct vm_operations_struct shmem_vm_ops = {
54cb8821d   Nick Piggin   mm: merge populat...
3553
  	.fault		= shmem_fault,
d7c175517   Ning Qu   mm: implement ->m...
3554
  	.map_pages	= filemap_map_pages,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3555
3556
3557
3558
3559
  #ifdef CONFIG_NUMA
  	.set_policy     = shmem_set_policy,
  	.get_policy     = shmem_get_policy,
  #endif
  };
3c26ff6e4   Al Viro   convert get_sb_no...
3560
3561
  static struct dentry *shmem_mount(struct file_system_type *fs_type,
  	int flags, const char *dev_name, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3562
  {
3c26ff6e4   Al Viro   convert get_sb_no...
3563
  	return mount_nodev(fs_type, flags, data, shmem_fill_super);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3564
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3565
  static struct file_system_type shmem_fs_type = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3566
3567
  	.owner		= THIS_MODULE,
  	.name		= "tmpfs",
3c26ff6e4   Al Viro   convert get_sb_no...
3568
  	.mount		= shmem_mount,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3569
  	.kill_sb	= kill_litter_super,
2b8576cb0   Eric W. Biederman   userns: Allow the...
3570
  	.fs_flags	= FS_USERNS_MOUNT,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3571
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3572

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3573
  int __init shmem_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3574
3575
  {
  	int error;
16203a7a9   Rob Landley   initmpfs: make ro...
3576
3577
3578
  	/* If rootfs called this, don't re-init */
  	if (shmem_inode_cachep)
  		return 0;
9a8ec03ed   weiping zhang   shmem: convert sh...
3579
  	shmem_init_inodecache();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3580

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3581
  	error = register_filesystem(&shmem_fs_type);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3582
  	if (error) {
1170532bb   Joe Perches   mm: convert print...
3583
3584
  		pr_err("Could not register tmpfs
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3585
3586
  		goto out2;
  	}
95dc112a5   Greg Kroah-Hartman   [PATCH] devfs: Re...
3587

ca4e05195   Al Viro   shm_mnt is as lon...
3588
  	shm_mnt = kern_mount(&shmem_fs_type);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3589
3590
  	if (IS_ERR(shm_mnt)) {
  		error = PTR_ERR(shm_mnt);
1170532bb   Joe Perches   mm: convert print...
3591
3592
  		pr_err("Could not kern_mount tmpfs
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3593
3594
  		goto out1;
  	}
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3595

e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3596
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
435c0b87d   Kirill A. Shutemov   mm, shmem: fix ha...
3597
  	if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3598
3599
3600
3601
  		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
  	else
  		shmem_huge = 0; /* just in case it was patched */
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3602
3603
3604
  	return 0;
  
  out1:
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3605
  	unregister_filesystem(&shmem_fs_type);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3606
  out2:
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3607
  	shmem_destroy_inodecache();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3608
3609
3610
  	shm_mnt = ERR_PTR(error);
  	return error;
  }
853ac43ab   Matt Mackall   shmem: unify regu...
3611

e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3612
  #if defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && defined(CONFIG_SYSFS)
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
  static ssize_t shmem_enabled_show(struct kobject *kobj,
  		struct kobj_attribute *attr, char *buf)
  {
  	int values[] = {
  		SHMEM_HUGE_ALWAYS,
  		SHMEM_HUGE_WITHIN_SIZE,
  		SHMEM_HUGE_ADVISE,
  		SHMEM_HUGE_NEVER,
  		SHMEM_HUGE_DENY,
  		SHMEM_HUGE_FORCE,
  	};
  	int i, count;
  
  	for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) {
  		const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s ";
  
  		count += sprintf(buf + count, fmt,
  				shmem_format_huge(values[i]));
  	}
  	buf[count - 1] = '
  ';
  	return count;
  }
  
  static ssize_t shmem_enabled_store(struct kobject *kobj,
  		struct kobj_attribute *attr, const char *buf, size_t count)
  {
  	char tmp[16];
  	int huge;
  
  	if (count + 1 > sizeof(tmp))
  		return -EINVAL;
  	memcpy(tmp, buf, count);
  	tmp[count] = '\0';
  	if (count && tmp[count - 1] == '
  ')
  		tmp[count - 1] = '\0';
  
  	huge = shmem_parse_huge(tmp);
  	if (huge == -EINVAL)
  		return -EINVAL;
  	if (!has_transparent_hugepage() &&
  			huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
  		return -EINVAL;
  
  	shmem_huge = huge;
435c0b87d   Kirill A. Shutemov   mm, shmem: fix ha...
3659
  	if (shmem_huge > SHMEM_HUGE_DENY)
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3660
3661
3662
3663
3664
3665
  		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
  	return count;
  }
  
  struct kobj_attribute shmem_enabled_attr =
  	__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
3b33719c9   Arnd Bergmann   thp: move shmem_h...
3666
  #endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
3667

3b33719c9   Arnd Bergmann   thp: move shmem_h...
3668
  #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
  bool shmem_huge_enabled(struct vm_area_struct *vma)
  {
  	struct inode *inode = file_inode(vma->vm_file);
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  	loff_t i_size;
  	pgoff_t off;
  
  	if (shmem_huge == SHMEM_HUGE_FORCE)
  		return true;
  	if (shmem_huge == SHMEM_HUGE_DENY)
  		return false;
  	switch (sbinfo->huge) {
  		case SHMEM_HUGE_NEVER:
  			return false;
  		case SHMEM_HUGE_ALWAYS:
  			return true;
  		case SHMEM_HUGE_WITHIN_SIZE:
  			off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
  			i_size = round_up(i_size_read(inode), PAGE_SIZE);
  			if (i_size >= HPAGE_PMD_SIZE &&
  					i_size >> PAGE_SHIFT >= off)
  				return true;
c8402871d   Gustavo A. R. Silva   mm/shmem.c: mark ...
3691
  			/* fall through */
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
3692
3693
3694
3695
3696
3697
3698
3699
  		case SHMEM_HUGE_ADVISE:
  			/* TODO: implement fadvise() hints */
  			return (vma->vm_flags & VM_HUGEPAGE);
  		default:
  			VM_BUG_ON(1);
  			return false;
  	}
  }
3b33719c9   Arnd Bergmann   thp: move shmem_h...
3700
  #endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
5a6e75f81   Kirill A. Shutemov   shmem: prepare hu...
3701

853ac43ab   Matt Mackall   shmem: unify regu...
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
  #else /* !CONFIG_SHMEM */
  
  /*
   * tiny-shmem: simple shmemfs and tmpfs using ramfs code
   *
   * This is intended for small system where the benefits of the full
   * shmem code (swap-backed and resource-limited) are outweighed by
   * their complexity. On systems without swap this code should be
   * effectively equivalent, but much lighter weight.
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3712
  static struct file_system_type shmem_fs_type = {
853ac43ab   Matt Mackall   shmem: unify regu...
3713
  	.name		= "tmpfs",
3c26ff6e4   Al Viro   convert get_sb_no...
3714
  	.mount		= ramfs_mount,
853ac43ab   Matt Mackall   shmem: unify regu...
3715
  	.kill_sb	= kill_litter_super,
2b8576cb0   Eric W. Biederman   userns: Allow the...
3716
  	.fs_flags	= FS_USERNS_MOUNT,
853ac43ab   Matt Mackall   shmem: unify regu...
3717
  };
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3718
  int __init shmem_init(void)
853ac43ab   Matt Mackall   shmem: unify regu...
3719
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3720
  	BUG_ON(register_filesystem(&shmem_fs_type) != 0);
853ac43ab   Matt Mackall   shmem: unify regu...
3721

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3722
  	shm_mnt = kern_mount(&shmem_fs_type);
853ac43ab   Matt Mackall   shmem: unify regu...
3723
3724
3725
3726
  	BUG_ON(IS_ERR(shm_mnt));
  
  	return 0;
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3727
  int shmem_unuse(swp_entry_t swap, struct page *page)
853ac43ab   Matt Mackall   shmem: unify regu...
3728
3729
3730
  {
  	return 0;
  }
3f96b79ad   Hugh Dickins   tmpfs: depend on ...
3731
3732
3733
3734
  int shmem_lock(struct file *file, int lock, struct user_struct *user)
  {
  	return 0;
  }
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
3735
3736
3737
  void shmem_unlock_mapping(struct address_space *mapping)
  {
  }
c01d5b300   Hugh Dickins   shmem: get_unmapp...
3738
3739
3740
3741
3742
3743
3744
3745
  #ifdef CONFIG_MMU
  unsigned long shmem_get_unmapped_area(struct file *file,
  				      unsigned long addr, unsigned long len,
  				      unsigned long pgoff, unsigned long flags)
  {
  	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
  }
  #endif
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3746
  void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
94c1e62df   Hugh Dickins   tmpfs: take contr...
3747
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3748
  	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
94c1e62df   Hugh Dickins   tmpfs: take contr...
3749
3750
  }
  EXPORT_SYMBOL_GPL(shmem_truncate_range);
0b0a0806b   Hugh Dickins   shmem: fix shared...
3751
3752
  #define shmem_vm_ops				generic_file_vm_ops
  #define shmem_file_operations			ramfs_file_operations
454abafe9   Dmitry Monakhov   ramfs: replace in...
3753
  #define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
0b0a0806b   Hugh Dickins   shmem: fix shared...
3754
3755
  #define shmem_acct_size(flags, size)		0
  #define shmem_unacct_size(flags, size)		do {} while (0)
853ac43ab   Matt Mackall   shmem: unify regu...
3756
3757
3758
3759
  
  #endif /* CONFIG_SHMEM */
  
  /* common code */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3760

703321b60   Matthew Auld   mm/shmem: introdu...
3761
  static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size,
c72770909   Eric Paris   security: shmem: ...
3762
  				       unsigned long flags, unsigned int i_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3763
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3764
  	struct inode *inode;
93dec2da7   Al Viro   ... and switch sh...
3765
  	struct file *res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3766

703321b60   Matthew Auld   mm/shmem: introdu...
3767
3768
  	if (IS_ERR(mnt))
  		return ERR_CAST(mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3769

285b2c4fd   Hugh Dickins   tmpfs: demolish o...
3770
  	if (size < 0 || size > MAX_LFS_FILESIZE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3771
3772
3773
3774
  		return ERR_PTR(-EINVAL);
  
  	if (shmem_acct_size(flags, size))
  		return ERR_PTR(-ENOMEM);
93dec2da7   Al Viro   ... and switch sh...
3775
3776
  	inode = shmem_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0,
  				flags);
dac2d1f6c   Al Viro   __shmem_file_setu...
3777
3778
3779
3780
  	if (unlikely(!inode)) {
  		shmem_unacct_size(flags, size);
  		return ERR_PTR(-ENOSPC);
  	}
c72770909   Eric Paris   security: shmem: ...
3781
  	inode->i_flags |= i_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3782
  	inode->i_size = size;
6d6b77f16   Miklos Szeredi   filesystems: add ...
3783
  	clear_nlink(inode);	/* It is unlinked */
26567cdbb   Al Viro   fix nommu breakag...
3784
  	res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
93dec2da7   Al Viro   ... and switch sh...
3785
3786
3787
  	if (!IS_ERR(res))
  		res = alloc_file_pseudo(inode, mnt, name, O_RDWR,
  				&shmem_file_operations);
26567cdbb   Al Viro   fix nommu breakag...
3788
  	if (IS_ERR(res))
93dec2da7   Al Viro   ... and switch sh...
3789
  		iput(inode);
6b4d0b279   Al Viro   clean shmem_file_...
3790
  	return res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3791
  }
c72770909   Eric Paris   security: shmem: ...
3792
3793
3794
3795
3796
  
  /**
   * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
   * 	kernel internal.  There will be NO LSM permission checks against the
   * 	underlying inode.  So users of this interface must do LSM checks at a
e1832f292   Stephen Smalley   ipc: use private ...
3797
3798
   *	higher layer.  The users are the big_key and shm implementations.  LSM
   *	checks are provided at the key or shm level rather than the inode.
c72770909   Eric Paris   security: shmem: ...
3799
3800
3801
3802
3803
3804
   * @name: name for dentry (to be seen in /proc/<pid>/maps
   * @size: size to be set for the file
   * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
   */
  struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
  {
703321b60   Matthew Auld   mm/shmem: introdu...
3805
  	return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE);
c72770909   Eric Paris   security: shmem: ...
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
  }
  
  /**
   * shmem_file_setup - get an unlinked file living in tmpfs
   * @name: name for dentry (to be seen in /proc/<pid>/maps
   * @size: size to be set for the file
   * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
   */
  struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
  {
703321b60   Matthew Auld   mm/shmem: introdu...
3816
  	return __shmem_file_setup(shm_mnt, name, size, flags, 0);
c72770909   Eric Paris   security: shmem: ...
3817
  }
395e0ddc4   Keith Packard   Export shmem_file...
3818
  EXPORT_SYMBOL_GPL(shmem_file_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3819

467118102   Randy Dunlap   mm/shmem and tiny...
3820
  /**
703321b60   Matthew Auld   mm/shmem: introdu...
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
   * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs
   * @mnt: the tmpfs mount where the file will be created
   * @name: name for dentry (to be seen in /proc/<pid>/maps
   * @size: size to be set for the file
   * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
   */
  struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name,
  				       loff_t size, unsigned long flags)
  {
  	return __shmem_file_setup(mnt, name, size, flags, 0);
  }
  EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt);
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3835
   * shmem_zero_setup - setup a shared anonymous mapping
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3836
3837
3838
3839
3840
3841
   * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
   */
  int shmem_zero_setup(struct vm_area_struct *vma)
  {
  	struct file *file;
  	loff_t size = vma->vm_end - vma->vm_start;
66fc13039   Hugh Dickins   mm: shmem_zero_se...
3842
3843
3844
3845
3846
3847
  	/*
  	 * Cloning a new file under mmap_sem leads to a lock ordering conflict
  	 * between XFS directory reading and selinux: since this file is only
  	 * accessible to the user through its mapping, use S_PRIVATE flag to
  	 * bypass file security, in the same way as shmem_kernel_file_setup().
  	 */
703321b60   Matthew Auld   mm/shmem: introdu...
3848
  	file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3849
3850
3851
3852
3853
3854
3855
  	if (IS_ERR(file))
  		return PTR_ERR(file);
  
  	if (vma->vm_file)
  		fput(vma->vm_file);
  	vma->vm_file = file;
  	vma->vm_ops = &shmem_vm_ops;
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
3856

e496cf3d7   Kirill A. Shutemov   thp: introduce CO...
3857
  	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
f3f0e1d21   Kirill A. Shutemov   khugepaged: add s...
3858
3859
3860
3861
  			((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
  			(vma->vm_end & HPAGE_PMD_MASK)) {
  		khugepaged_enter(vma, vma->vm_flags);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3862
3863
  	return 0;
  }
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
  
  /**
   * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
   * @mapping:	the page's address_space
   * @index:	the page index
   * @gfp:	the page allocator flags to use if allocating
   *
   * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
   * with any new page allocations done using the specified allocation flags.
   * But read_cache_page_gfp() uses the ->readpage() method: which does not
   * suit tmpfs, since it may have pages in swapcache, and needs to find those
   * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
   *
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3877
3878
   * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in
   * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3879
3880
3881
3882
   */
  struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
  					 pgoff_t index, gfp_t gfp)
  {
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3883
3884
  #ifdef CONFIG_SHMEM
  	struct inode *inode = mapping->host;
9276aad6c   Hugh Dickins   tmpfs: remove_shm...
3885
  	struct page *page;
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3886
3887
3888
  	int error;
  
  	BUG_ON(mapping->a_ops != &shmem_aops);
9e18eb293   Andres Lagar-Cavilla   tmpfs: mem_cgroup...
3889
  	error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
cfda05267   Mike Rapoport   userfaultfd: shme...
3890
  				  gfp, NULL, NULL, NULL);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3891
3892
3893
3894
3895
3896
3897
3898
3899
  	if (error)
  		page = ERR_PTR(error);
  	else
  		unlock_page(page);
  	return page;
  #else
  	/*
  	 * The tiny !SHMEM case uses ramfs without swap
  	 */
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3900
  	return read_cache_page_gfp(mapping, index, gfp);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3901
  #endif
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3902
3903
  }
  EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);