Blame view

mm/shmem.c 89.6 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
  /*
   * Resizable virtual memory filesystem for Linux.
   *
   * Copyright (C) 2000 Linus Torvalds.
   *		 2000 Transmeta Corp.
   *		 2000-2001 Christoph Rohland
   *		 2000-2001 SAP AG
   *		 2002 Red Hat Inc.
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
9
10
   * Copyright (C) 2002-2011 Hugh Dickins.
   * Copyright (C) 2011 Google Inc.
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
11
   * Copyright (C) 2002-2005 VERITAS Software Corporation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
17
   * Copyright (C) 2004 Andi Kleen, SuSE Labs
   *
   * Extended attribute support for tmpfs:
   * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
   *
853ac43ab   Matt Mackall   shmem: unify regu...
18
19
20
   * tiny-shmem:
   * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
   * This file is released under the GPL.
   */
853ac43ab   Matt Mackall   shmem: unify regu...
23
24
25
26
  #include <linux/fs.h>
  #include <linux/init.h>
  #include <linux/vfs.h>
  #include <linux/mount.h>
250297edf   Andrew Morton   mm/shmem.c: remov...
27
  #include <linux/ramfs.h>
caefba174   Hugh Dickins   shmem: respect MA...
28
  #include <linux/pagemap.h>
853ac43ab   Matt Mackall   shmem: unify regu...
29
30
  #include <linux/file.h>
  #include <linux/mm.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
31
  #include <linux/export.h>
853ac43ab   Matt Mackall   shmem: unify regu...
32
  #include <linux/swap.h>
e2e40f2c1   Christoph Hellwig   fs: move struct k...
33
  #include <linux/uio.h>
853ac43ab   Matt Mackall   shmem: unify regu...
34
35
36
37
  
  static struct vfsmount *shm_mnt;
  
  #ifdef CONFIG_SHMEM
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
38
39
40
41
42
  /*
   * This virtual memory filesystem is heavily based on the ramfs. It
   * extends ramfs by the ability to use swap and honor resource limits
   * which makes it a completely usable filesystem.
   */
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
43
  #include <linux/xattr.h>
a56942551   Christoph Hellwig   knfsd: exportfs: ...
44
  #include <linux/exportfs.h>
1c7c474c3   Christoph Hellwig   make generic_acl ...
45
  #include <linux/posix_acl.h>
feda821e7   Christoph Hellwig   fs: remove generi...
46
  #include <linux/posix_acl_xattr.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
47
  #include <linux/mman.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
49
50
51
  #include <linux/string.h>
  #include <linux/slab.h>
  #include <linux/backing-dev.h>
  #include <linux/shmem_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
  #include <linux/writeback.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
  #include <linux/blkdev.h>
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
54
  #include <linux/pagevec.h>
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
55
  #include <linux/percpu_counter.h>
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
56
  #include <linux/falloc.h>
708e3508c   Hugh Dickins   tmpfs: clone shme...
57
  #include <linux/splice.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
58
59
60
61
  #include <linux/security.h>
  #include <linux/swapops.h>
  #include <linux/mempolicy.h>
  #include <linux/namei.h>
b00dc3ad7   Hugh Dickins   [PATCH] tmpfs: fi...
62
  #include <linux/ctype.h>
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
63
  #include <linux/migrate.h>
c1f60a5a4   Christoph Lameter   [PATCH] reduce MA...
64
  #include <linux/highmem.h>
680d794ba   akpm@linux-foundation.org   mount options: fi...
65
  #include <linux/seq_file.h>
925629278   Mimi Zohar   integrity: specia...
66
  #include <linux/magic.h>
9183df25f   David Herrmann   shm: add memfd_cr...
67
  #include <linux/syscalls.h>
40e041a2c   David Herrmann   shm: add sealing API
68
  #include <linux/fcntl.h>
9183df25f   David Herrmann   shm: add memfd_cr...
69
  #include <uapi/linux/memfd.h>
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
70

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
  #include <asm/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
72
  #include <asm/pgtable.h>
caefba174   Hugh Dickins   shmem: respect MA...
73
  #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
74
  #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
76
  /* Pretend that each entry is of this size in directory's i_size */
  #define BOGO_DIRENT_SIZE 20
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
77
78
  /* Symlink up to this size is kmalloc'ed instead of using a swappable page */
  #define SHORT_SYMLINK_LEN 128
1aac14003   Hugh Dickins   tmpfs: quit when ...
79
  /*
f00cdc6df   Hugh Dickins   shmem: fix faulti...
80
81
82
   * shmem_fallocate communicates with shmem_fault or shmem_writepage via
   * inode->i_private (with i_mutex making sure that it has only one user at
   * a time): we would prefer not to enlarge the shmem inode just for that.
1aac14003   Hugh Dickins   tmpfs: quit when ...
83
84
   */
  struct shmem_falloc {
8e205f779   Hugh Dickins   shmem: fix faulti...
85
  	wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
1aac14003   Hugh Dickins   tmpfs: quit when ...
86
87
88
89
90
  	pgoff_t start;		/* start of range currently being fallocated */
  	pgoff_t next;		/* the next page offset to be fallocated */
  	pgoff_t nr_falloced;	/* how many new pages have been fallocated */
  	pgoff_t nr_unswapped;	/* how often writepage refused to swap out */
  };
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
91
  /* Flag allocation requirements to shmem_getpage */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
  enum sgp_type {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
94
  	SGP_READ,	/* don't exceed i_size, don't allocate page */
  	SGP_CACHE,	/* don't exceed i_size, may allocate page */
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
95
  	SGP_DIRTY,	/* like SGP_CACHE, but set new page dirty */
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
96
97
  	SGP_WRITE,	/* may exceed i_size, may allocate !Uptodate page */
  	SGP_FALLOC,	/* like SGP_WRITE, but make existing page Uptodate */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98
  };
b76db7354   Andrew Morton   mount-options-fix...
99
  #ifdef CONFIG_TMPFS
680d794ba   akpm@linux-foundation.org   mount options: fi...
100
101
102
103
104
105
106
107
108
  static unsigned long shmem_default_max_blocks(void)
  {
  	return totalram_pages / 2;
  }
  
  static unsigned long shmem_default_max_inodes(void)
  {
  	return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
  }
b76db7354   Andrew Morton   mount-options-fix...
109
  #endif
680d794ba   akpm@linux-foundation.org   mount options: fi...
110

bde05d1cc   Hugh Dickins   shmem: replace pa...
111
112
113
  static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
  static int shmem_replace_page(struct page **pagep, gfp_t gfp,
  				struct shmem_inode_info *info, pgoff_t index);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
114
115
116
117
118
119
120
121
122
  static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
  	struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
  
  static inline int shmem_getpage(struct inode *inode, pgoff_t index,
  	struct page **pagep, enum sgp_type sgp, int *fault_type)
  {
  	return shmem_getpage_gfp(inode, index, pagep, sgp,
  			mapping_gfp_mask(inode->i_mapping), fault_type);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
125
126
127
128
129
130
131
132
133
134
135
136
  static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
  {
  	return sb->s_fs_info;
  }
  
  /*
   * shmem_file_setup pre-accounts the whole fixed size of a VM object,
   * for shared memory and for shared anonymous (/dev/zero) mappings
   * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
   * consistent with the pre-accounting of private mappings ...
   */
  static inline int shmem_acct_size(unsigned long flags, loff_t size)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
137
  	return (flags & VM_NORESERVE) ?
191c54244   Al Viro   mm: collapse secu...
138
  		0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
140
141
142
  }
  
  static inline void shmem_unacct_size(unsigned long flags, loff_t size)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
143
  	if (!(flags & VM_NORESERVE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
144
145
  		vm_unacct_memory(VM_ACCT(size));
  }
771425179   Konstantin Khlebnikov   shmem: update mem...
146
147
148
149
150
151
152
153
154
155
156
157
  static inline int shmem_reacct_size(unsigned long flags,
  		loff_t oldsize, loff_t newsize)
  {
  	if (!(flags & VM_NORESERVE)) {
  		if (VM_ACCT(newsize) > VM_ACCT(oldsize))
  			return security_vm_enough_memory_mm(current->mm,
  					VM_ACCT(newsize) - VM_ACCT(oldsize));
  		else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
  			vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
  	}
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
159
160
161
162
163
164
165
  /*
   * ... whereas tmpfs objects are accounted incrementally as
   * pages are allocated, in order to allow huge sparse files.
   * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
   * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
   */
  static inline int shmem_acct_block(unsigned long flags)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
166
  	return (flags & VM_NORESERVE) ?
191c54244   Al Viro   mm: collapse secu...
167
  		security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_CACHE_SIZE)) : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
170
171
  }
  
  static inline void shmem_unacct_blocks(unsigned long flags, long pages)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
172
  	if (flags & VM_NORESERVE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
174
  		vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
  }
759b9775c   Hugh Dickins   [PATCH] shmem and...
175
  static const struct super_operations shmem_ops;
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
176
  static const struct address_space_operations shmem_aops;
15ad7cdcf   Helge Deller   [PATCH] struct se...
177
  static const struct file_operations shmem_file_operations;
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
178
179
180
  static const struct inode_operations shmem_inode_operations;
  static const struct inode_operations shmem_dir_inode_operations;
  static const struct inode_operations shmem_special_inode_operations;
f0f37e2f7   Alexey Dobriyan   const: mark struc...
181
  static const struct vm_operations_struct shmem_vm_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
  static LIST_HEAD(shmem_swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
184
  static DEFINE_MUTEX(shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185

5b04c6890   Pavel Emelyanov   shmem: factor out...
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
  static int shmem_reserve_inode(struct super_block *sb)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	if (sbinfo->max_inodes) {
  		spin_lock(&sbinfo->stat_lock);
  		if (!sbinfo->free_inodes) {
  			spin_unlock(&sbinfo->stat_lock);
  			return -ENOSPC;
  		}
  		sbinfo->free_inodes--;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	return 0;
  }
  
  static void shmem_free_inode(struct super_block *sb)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	if (sbinfo->max_inodes) {
  		spin_lock(&sbinfo->stat_lock);
  		sbinfo->free_inodes++;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  }
467118102   Randy Dunlap   mm/shmem and tiny...
210
  /**
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
211
   * shmem_recalc_inode - recalculate the block usage of an inode
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
   * @inode: inode to recalc
   *
   * We have to calculate the free blocks since the mm can drop
   * undirtied hole pages behind our back.
   *
   * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
   * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
   *
   * It has to be called with the spinlock held.
   */
  static void shmem_recalc_inode(struct inode *inode)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	long freed;
  
  	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
  	if (freed > 0) {
54af60421   Hugh Dickins   tmpfs: convert sh...
229
230
231
  		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  		if (sbinfo->max_blocks)
  			percpu_counter_add(&sbinfo->used_blocks, -freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
  		info->alloced -= freed;
54af60421   Hugh Dickins   tmpfs: convert sh...
233
  		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
  		shmem_unacct_blocks(info->flags, freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235
236
  	}
  }
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
237
238
239
240
241
242
243
  /*
   * Replace item expected in radix tree by a new item, while holding tree lock.
   */
  static int shmem_radix_tree_replace(struct address_space *mapping,
  			pgoff_t index, void *expected, void *replacement)
  {
  	void **pslot;
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
244
  	void *item;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
245
246
  
  	VM_BUG_ON(!expected);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
247
  	VM_BUG_ON(!replacement);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
248
  	pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
249
250
251
  	if (!pslot)
  		return -ENOENT;
  	item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
252
253
  	if (item != expected)
  		return -ENOENT;
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
254
  	radix_tree_replace_slot(pslot, replacement);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
255
256
257
258
  	return 0;
  }
  
  /*
d18992286   Hugh Dickins   shmem: fix negati...
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
   * Sometimes, before we decide whether to proceed or to fail, we must check
   * that an entry was not already brought back from swap by a racing thread.
   *
   * Checking page is not enough: by the time a SwapCache page is locked, it
   * might be reused, and again be SwapCache, using the same swap as before.
   */
  static bool shmem_confirm_swap(struct address_space *mapping,
  			       pgoff_t index, swp_entry_t swap)
  {
  	void *item;
  
  	rcu_read_lock();
  	item = radix_tree_lookup(&mapping->page_tree, index);
  	rcu_read_unlock();
  	return item == swp_to_radix_entry(swap);
  }
  
  /*
46f65ec15   Hugh Dickins   tmpfs: convert sh...
277
278
279
280
   * Like add_to_page_cache_locked, but error if expected item has gone.
   */
  static int shmem_add_to_page_cache(struct page *page,
  				   struct address_space *mapping,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
281
  				   pgoff_t index, void *expected)
46f65ec15   Hugh Dickins   tmpfs: convert sh...
282
  {
b065b4321   Hugh Dickins   shmem: cleanup sh...
283
  	int error;
46f65ec15   Hugh Dickins   tmpfs: convert sh...
284

309381fea   Sasha Levin   mm: dump page whe...
285
286
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
287

b065b4321   Hugh Dickins   shmem: cleanup sh...
288
289
290
291
292
  	page_cache_get(page);
  	page->mapping = mapping;
  	page->index = index;
  
  	spin_lock_irq(&mapping->tree_lock);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
293
  	if (!expected)
b065b4321   Hugh Dickins   shmem: cleanup sh...
294
295
296
297
  		error = radix_tree_insert(&mapping->page_tree, index, page);
  	else
  		error = shmem_radix_tree_replace(mapping, index, expected,
  								 page);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
298
  	if (!error) {
b065b4321   Hugh Dickins   shmem: cleanup sh...
299
300
301
302
303
304
305
306
  		mapping->nrpages++;
  		__inc_zone_page_state(page, NR_FILE_PAGES);
  		__inc_zone_page_state(page, NR_SHMEM);
  		spin_unlock_irq(&mapping->tree_lock);
  	} else {
  		page->mapping = NULL;
  		spin_unlock_irq(&mapping->tree_lock);
  		page_cache_release(page);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
307
  	}
46f65ec15   Hugh Dickins   tmpfs: convert sh...
308
309
310
311
  	return error;
  }
  
  /*
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
   * Like delete_from_page_cache, but substitutes swap for page.
   */
  static void shmem_delete_from_page_cache(struct page *page, void *radswap)
  {
  	struct address_space *mapping = page->mapping;
  	int error;
  
  	spin_lock_irq(&mapping->tree_lock);
  	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
  	page->mapping = NULL;
  	mapping->nrpages--;
  	__dec_zone_page_state(page, NR_FILE_PAGES);
  	__dec_zone_page_state(page, NR_SHMEM);
  	spin_unlock_irq(&mapping->tree_lock);
  	page_cache_release(page);
  	BUG_ON(error);
  }
  
  /*
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
331
332
333
334
335
   * Remove swap entry from radix tree, free the swap and its page cache.
   */
  static int shmem_free_swap(struct address_space *mapping,
  			   pgoff_t index, void *radswap)
  {
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
336
  	void *old;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
337
338
  
  	spin_lock_irq(&mapping->tree_lock);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
339
  	old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
340
  	spin_unlock_irq(&mapping->tree_lock);
6dbaf22ce   Johannes Weiner   mm: shmem: save o...
341
342
343
344
  	if (old != radswap)
  		return -ENOENT;
  	free_swap_and_cache(radix_to_swp_entry(radswap));
  	return 0;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
345
346
347
  }
  
  /*
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
   * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
   */
  void shmem_unlock_mapping(struct address_space *mapping)
  {
  	struct pagevec pvec;
  	pgoff_t indices[PAGEVEC_SIZE];
  	pgoff_t index = 0;
  
  	pagevec_init(&pvec, 0);
  	/*
  	 * Minor point, but we might as well stop if someone else SHM_LOCKs it.
  	 */
  	while (!mapping_unevictable(mapping)) {
  		/*
  		 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
  		 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
  		 */
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
365
366
  		pvec.nr = find_get_entries(mapping, index,
  					   PAGEVEC_SIZE, pvec.pages, indices);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
367
368
369
  		if (!pvec.nr)
  			break;
  		index = indices[pvec.nr - 1] + 1;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
370
  		pagevec_remove_exceptionals(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
371
372
373
374
  		check_move_unevictable_pages(pvec.pages, pvec.nr);
  		pagevec_release(&pvec);
  		cond_resched();
  	}
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
375
376
377
378
  }
  
  /*
   * Remove range of pages and swap entries from radix tree, and free them.
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
379
   * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
380
   */
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
381
382
  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
  								 bool unfalloc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
383
  {
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
384
  	struct address_space *mapping = inode->i_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
385
  	struct shmem_inode_info *info = SHMEM_I(inode);
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
386
  	pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
387
388
389
  	pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
  	unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
  	unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
390
  	struct pagevec pvec;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
391
392
  	pgoff_t indices[PAGEVEC_SIZE];
  	long nr_swaps_freed = 0;
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
393
  	pgoff_t index;
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
394
  	int i;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
395
396
  	if (lend == -1)
  		end = -1;	/* unsigned, so actually very big */
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
397
398
399
  
  	pagevec_init(&pvec, 0);
  	index = start;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
400
  	while (index < end) {
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
401
402
403
  		pvec.nr = find_get_entries(mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE),
  			pvec.pages, indices);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
404
405
  		if (!pvec.nr)
  			break;
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
406
407
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
408
  			index = indices[i];
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
409
  			if (index >= end)
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
410
  				break;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
411
  			if (radix_tree_exceptional_entry(page)) {
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
412
413
  				if (unfalloc)
  					continue;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
414
415
  				nr_swaps_freed += !shmem_free_swap(mapping,
  								index, page);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
416
  				continue;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
417
418
419
  			}
  
  			if (!trylock_page(page))
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
420
  				continue;
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
421
422
  			if (!unfalloc || !PageUptodate(page)) {
  				if (page->mapping == mapping) {
309381fea   Sasha Levin   mm: dump page whe...
423
  					VM_BUG_ON_PAGE(PageWriteback(page), page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
424
425
  					truncate_inode_page(mapping, page);
  				}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
426
  			}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
427
428
  			unlock_page(page);
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
429
  		pagevec_remove_exceptionals(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
430
  		pagevec_release(&pvec);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
431
432
433
  		cond_resched();
  		index++;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434

83e4fa9c1   Hugh Dickins   tmpfs: support fa...
435
  	if (partial_start) {
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
436
437
438
  		struct page *page = NULL;
  		shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
  		if (page) {
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
  			unsigned int top = PAGE_CACHE_SIZE;
  			if (start > end) {
  				top = partial_end;
  				partial_end = 0;
  			}
  			zero_user_segment(page, partial_start, top);
  			set_page_dirty(page);
  			unlock_page(page);
  			page_cache_release(page);
  		}
  	}
  	if (partial_end) {
  		struct page *page = NULL;
  		shmem_getpage(inode, end, &page, SGP_READ, NULL);
  		if (page) {
  			zero_user_segment(page, 0, partial_end);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
455
456
457
458
459
  			set_page_dirty(page);
  			unlock_page(page);
  			page_cache_release(page);
  		}
  	}
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
460
461
  	if (start >= end)
  		return;
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
462
463
  
  	index = start;
b1a366500   Hugh Dickins   shmem: fix splici...
464
  	while (index < end) {
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
465
  		cond_resched();
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
466
467
  
  		pvec.nr = find_get_entries(mapping, index,
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
468
  				min(end - index, (pgoff_t)PAGEVEC_SIZE),
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
469
  				pvec.pages, indices);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
470
  		if (!pvec.nr) {
b1a366500   Hugh Dickins   shmem: fix splici...
471
472
  			/* If all gone or hole-punch or unfalloc, we're done */
  			if (index == start || end != -1)
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
473
  				break;
b1a366500   Hugh Dickins   shmem: fix splici...
474
  			/* But if truncating, restart to make sure all gone */
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
475
476
477
  			index = start;
  			continue;
  		}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
478
479
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
480
  			index = indices[i];
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
481
  			if (index >= end)
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
482
  				break;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
483
  			if (radix_tree_exceptional_entry(page)) {
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
484
485
  				if (unfalloc)
  					continue;
b1a366500   Hugh Dickins   shmem: fix splici...
486
487
488
489
490
491
  				if (shmem_free_swap(mapping, index, page)) {
  					/* Swap was replaced by page: retry */
  					index--;
  					break;
  				}
  				nr_swaps_freed++;
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
492
493
  				continue;
  			}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
494
  			lock_page(page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
495
496
  			if (!unfalloc || !PageUptodate(page)) {
  				if (page->mapping == mapping) {
309381fea   Sasha Levin   mm: dump page whe...
497
  					VM_BUG_ON_PAGE(PageWriteback(page), page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
498
  					truncate_inode_page(mapping, page);
b1a366500   Hugh Dickins   shmem: fix splici...
499
500
501
502
503
  				} else {
  					/* Page was replaced by swap: retry */
  					unlock_page(page);
  					index--;
  					break;
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
504
  				}
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
505
  			}
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
506
507
  			unlock_page(page);
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
508
  		pagevec_remove_exceptionals(&pvec);
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
509
  		pagevec_release(&pvec);
bda97eab0   Hugh Dickins   tmpfs: copy trunc...
510
511
  		index++;
  	}
94c1e62df   Hugh Dickins   tmpfs: take contr...
512

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
513
  	spin_lock(&info->lock);
7a5d0fbb2   Hugh Dickins   tmpfs: convert sh...
514
  	info->swapped -= nr_swaps_freed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
515
516
  	shmem_recalc_inode(inode);
  	spin_unlock(&info->lock);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
517
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518

1635f6a74   Hugh Dickins   tmpfs: undo fallo...
519
520
521
  void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
  {
  	shmem_undo_range(inode, lstart, lend, false);
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
522
  	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
523
  }
94c1e62df   Hugh Dickins   tmpfs: take contr...
524
  EXPORT_SYMBOL_GPL(shmem_truncate_range);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525

94c1e62df   Hugh Dickins   tmpfs: take contr...
526
  static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
527
  {
75c3cfa85   David Howells   VFS: assorted wei...
528
  	struct inode *inode = d_inode(dentry);
40e041a2c   David Herrmann   shm: add sealing API
529
  	struct shmem_inode_info *info = SHMEM_I(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
530
  	int error;
db78b877f   Christoph Hellwig   always call inode...
531
532
533
  	error = inode_change_ok(inode, attr);
  	if (error)
  		return error;
94c1e62df   Hugh Dickins   tmpfs: take contr...
534
535
536
  	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
  		loff_t oldsize = inode->i_size;
  		loff_t newsize = attr->ia_size;
3889e6e76   npiggin@suse.de   tmpfs: convert to...
537

40e041a2c   David Herrmann   shm: add sealing API
538
539
540
541
  		/* protected by i_mutex */
  		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
  		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
  			return -EPERM;
94c1e62df   Hugh Dickins   tmpfs: take contr...
542
  		if (newsize != oldsize) {
771425179   Konstantin Khlebnikov   shmem: update mem...
543
544
545
546
  			error = shmem_reacct_size(SHMEM_I(inode)->flags,
  					oldsize, newsize);
  			if (error)
  				return error;
94c1e62df   Hugh Dickins   tmpfs: take contr...
547
548
549
550
551
552
553
554
555
556
  			i_size_write(inode, newsize);
  			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
  		}
  		if (newsize < oldsize) {
  			loff_t holebegin = round_up(newsize, PAGE_SIZE);
  			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
  			shmem_truncate_range(inode, newsize, (loff_t)-1);
  			/* unmap again to remove racily COWed private pages */
  			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
  	}
db78b877f   Christoph Hellwig   always call inode...
558
  	setattr_copy(inode, attr);
db78b877f   Christoph Hellwig   always call inode...
559
  	if (attr->ia_valid & ATTR_MODE)
feda821e7   Christoph Hellwig   fs: remove generi...
560
  		error = posix_acl_chmod(inode, inode->i_mode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
562
  	return error;
  }
1f895f75d   Al Viro   switch shmem.c to...
563
  static void shmem_evict_inode(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
564
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565
  	struct shmem_inode_info *info = SHMEM_I(inode);
3889e6e76   npiggin@suse.de   tmpfs: convert to...
566
  	if (inode->i_mapping->a_ops == &shmem_aops) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567
568
  		shmem_unacct_size(info->flags, inode->i_size);
  		inode->i_size = 0;
3889e6e76   npiggin@suse.de   tmpfs: convert to...
569
  		shmem_truncate_range(inode, 0, (loff_t)-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
570
  		if (!list_empty(&info->swaplist)) {
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
571
  			mutex_lock(&shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
572
  			list_del_init(&info->swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
573
  			mutex_unlock(&shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
574
  		}
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
575
576
  	} else
  		kfree(info->symlink);
b09e0fa4b   Eric Paris   tmpfs: implement ...
577

38f386574   Aristeu Rozanski   xattr: extract si...
578
  	simple_xattrs_free(&info->xattrs);
0f3c42f52   Hugh Dickins   tmpfs: change fin...
579
  	WARN_ON(inode->i_blocks);
5b04c6890   Pavel Emelyanov   shmem: factor out...
580
  	shmem_free_inode(inode->i_sb);
dbd5768f8   Jan Kara   vfs: Rename end_w...
581
  	clear_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
582
  }
46f65ec15   Hugh Dickins   tmpfs: convert sh...
583
584
585
  /*
   * If swap found in inode, free it and move page from swapcache to filecache.
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
586
  static int shmem_unuse_inode(struct shmem_inode_info *info,
bde05d1cc   Hugh Dickins   shmem: replace pa...
587
  			     swp_entry_t swap, struct page **pagep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
588
  {
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
589
  	struct address_space *mapping = info->vfs_inode.i_mapping;
46f65ec15   Hugh Dickins   tmpfs: convert sh...
590
  	void *radswap;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
591
  	pgoff_t index;
bde05d1cc   Hugh Dickins   shmem: replace pa...
592
593
  	gfp_t gfp;
  	int error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594

46f65ec15   Hugh Dickins   tmpfs: convert sh...
595
  	radswap = swp_to_radix_entry(swap);
e504f3fdd   Hugh Dickins   tmpfs radix_tree:...
596
  	index = radix_tree_locate_item(&mapping->page_tree, radswap);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
597
  	if (index == -1)
00501b531   Johannes Weiner   mm: memcontrol: r...
598
  		return -EAGAIN;	/* tell shmem_unuse we found nothing */
2e0e26c76   Hugh Dickins   tmpfs: open a win...
599

1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
600
601
  	/*
  	 * Move _head_ to start search for next from here.
1f895f75d   Al Viro   switch shmem.c to...
602
  	 * But be careful: shmem_evict_inode checks list_empty without taking
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
603
  	 * mutex, and there's an instant in list_move_tail when info->swaplist
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
604
  	 * would appear empty, if it were the only one on shmem_swaplist.
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
605
606
607
  	 */
  	if (shmem_swaplist.next != &info->swaplist)
  		list_move_tail(&shmem_swaplist, &info->swaplist);
2e0e26c76   Hugh Dickins   tmpfs: open a win...
608

bde05d1cc   Hugh Dickins   shmem: replace pa...
609
610
611
612
613
614
615
  	gfp = mapping_gfp_mask(mapping);
  	if (shmem_should_replace_page(*pagep, gfp)) {
  		mutex_unlock(&shmem_swaplist_mutex);
  		error = shmem_replace_page(pagep, gfp, info, index);
  		mutex_lock(&shmem_swaplist_mutex);
  		/*
  		 * We needed to drop mutex to make that restrictive page
0142ef6cd   Hugh Dickins   shmem: replace_pa...
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
  		 * allocation, but the inode might have been freed while we
  		 * dropped it: although a racing shmem_evict_inode() cannot
  		 * complete without emptying the radix_tree, our page lock
  		 * on this swapcache page is not enough to prevent that -
  		 * free_swap_and_cache() of our swap entry will only
  		 * trylock_page(), removing swap from radix_tree whatever.
  		 *
  		 * We must not proceed to shmem_add_to_page_cache() if the
  		 * inode has been freed, but of course we cannot rely on
  		 * inode or mapping or info to check that.  However, we can
  		 * safely check if our swap entry is still in use (and here
  		 * it can't have got reused for another page): if it's still
  		 * in use, then the inode cannot have been freed yet, and we
  		 * can safely proceed (if it's no longer in use, that tells
  		 * nothing about the inode, but we don't need to unuse swap).
bde05d1cc   Hugh Dickins   shmem: replace pa...
631
632
633
634
  		 */
  		if (!page_swapcount(*pagep))
  			error = -ENOENT;
  	}
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
635
  	/*
778dd893a   Hugh Dickins   tmpfs: fix race b...
636
637
638
  	 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
  	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
  	 * beneath us (pagelock doesn't help until the page is in pagecache).
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
639
  	 */
bde05d1cc   Hugh Dickins   shmem: replace pa...
640
641
  	if (!error)
  		error = shmem_add_to_page_cache(*pagep, mapping, index,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
642
  						radswap);
48f170fb7   Hugh Dickins   tmpfs: simplify u...
643
  	if (error != -ENOMEM) {
46f65ec15   Hugh Dickins   tmpfs: convert sh...
644
645
646
647
  		/*
  		 * Truncation and eviction use free_swap_and_cache(), which
  		 * only does trylock page: if we raced, best clean up here.
  		 */
bde05d1cc   Hugh Dickins   shmem: replace pa...
648
649
  		delete_from_swap_cache(*pagep);
  		set_page_dirty(*pagep);
46f65ec15   Hugh Dickins   tmpfs: convert sh...
650
651
652
653
654
655
  		if (!error) {
  			spin_lock(&info->lock);
  			info->swapped--;
  			spin_unlock(&info->lock);
  			swap_free(swap);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
656
  	}
2e0e26c76   Hugh Dickins   tmpfs: open a win...
657
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658
659
660
  }
  
  /*
46f65ec15   Hugh Dickins   tmpfs: convert sh...
661
   * Search through swapped inodes to find and replace swap by page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
663
  int shmem_unuse(swp_entry_t swap, struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
664
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
665
  	struct list_head *this, *next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
  	struct shmem_inode_info *info;
00501b531   Johannes Weiner   mm: memcontrol: r...
667
  	struct mem_cgroup *memcg;
bde05d1cc   Hugh Dickins   shmem: replace pa...
668
669
670
671
  	int error = 0;
  
  	/*
  	 * There's a faint possibility that swap page was replaced before
0142ef6cd   Hugh Dickins   shmem: replace_pa...
672
  	 * caller locked it: caller will come back later with the right page.
bde05d1cc   Hugh Dickins   shmem: replace pa...
673
  	 */
0142ef6cd   Hugh Dickins   shmem: replace_pa...
674
  	if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
bde05d1cc   Hugh Dickins   shmem: replace pa...
675
  		goto out;
778dd893a   Hugh Dickins   tmpfs: fix race b...
676
677
678
679
680
  
  	/*
  	 * Charge page using GFP_KERNEL while we can wait, before taking
  	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
  	 * Charged back to the user (not to caller) when swap account is used.
778dd893a   Hugh Dickins   tmpfs: fix race b...
681
  	 */
00501b531   Johannes Weiner   mm: memcontrol: r...
682
  	error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
778dd893a   Hugh Dickins   tmpfs: fix race b...
683
684
  	if (error)
  		goto out;
46f65ec15   Hugh Dickins   tmpfs: convert sh...
685
  	/* No radix_tree_preload: swap entry keeps a place for page in tree */
00501b531   Johannes Weiner   mm: memcontrol: r...
686
  	error = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
687

cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
688
  	mutex_lock(&shmem_swaplist_mutex);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
689
690
  	list_for_each_safe(this, next, &shmem_swaplist) {
  		info = list_entry(this, struct shmem_inode_info, swaplist);
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
691
  		if (info->swapped)
00501b531   Johannes Weiner   mm: memcontrol: r...
692
  			error = shmem_unuse_inode(info, swap, &page);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
693
694
  		else
  			list_del_init(&info->swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
695
  		cond_resched();
00501b531   Johannes Weiner   mm: memcontrol: r...
696
  		if (error != -EAGAIN)
778dd893a   Hugh Dickins   tmpfs: fix race b...
697
  			break;
00501b531   Johannes Weiner   mm: memcontrol: r...
698
  		/* found nothing in this: move on to search the next */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
699
  	}
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
700
  	mutex_unlock(&shmem_swaplist_mutex);
778dd893a   Hugh Dickins   tmpfs: fix race b...
701

00501b531   Johannes Weiner   mm: memcontrol: r...
702
703
704
705
706
707
  	if (error) {
  		if (error != -ENOMEM)
  			error = 0;
  		mem_cgroup_cancel_charge(page, memcg);
  	} else
  		mem_cgroup_commit_charge(page, memcg, true);
778dd893a   Hugh Dickins   tmpfs: fix race b...
708
  out:
aaa468653   Hugh Dickins   swap_info: note S...
709
710
  	unlock_page(page);
  	page_cache_release(page);
778dd893a   Hugh Dickins   tmpfs: fix race b...
711
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
712
713
714
715
716
717
718
719
  }
  
  /*
   * Move the page from the page cache to the swap cache.
   */
  static int shmem_writepage(struct page *page, struct writeback_control *wbc)
  {
  	struct shmem_inode_info *info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720
  	struct address_space *mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
721
  	struct inode *inode;
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
722
723
  	swp_entry_t swap;
  	pgoff_t index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
725
  
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
726
727
728
729
730
731
  	mapping = page->mapping;
  	index = page->index;
  	inode = mapping->host;
  	info = SHMEM_I(inode);
  	if (info->flags & VM_LOCKED)
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
732
  	if (!total_swap_pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
733
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
734
  	/*
97b713ba3   Christoph Hellwig   fs: kill BDI_CAP_...
735
736
737
738
739
  	 * Our capabilities prevent regular writeback or sync from ever calling
  	 * shmem_writepage; but a stacking filesystem might use ->writepage of
  	 * its underlying filesystem, in which case tmpfs should write out to
  	 * swap only in response to memory pressure, and not for the writeback
  	 * threads or sync.
d9fe526a8   Hugh Dickins   tmpfs: allow file...
740
  	 */
48f170fb7   Hugh Dickins   tmpfs: simplify u...
741
742
743
744
  	if (!wbc->for_reclaim) {
  		WARN_ON_ONCE(1);	/* Still happens? Tell us about it! */
  		goto redirty;
  	}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
745
746
747
748
749
  
  	/*
  	 * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
  	 * value into swapfile.c, the only way we can correctly account for a
  	 * fallocated page arriving here is now to initialize it and write it.
1aac14003   Hugh Dickins   tmpfs: quit when ...
750
751
752
753
754
755
  	 *
  	 * That's okay for a page already fallocated earlier, but if we have
  	 * not yet completed the fallocation, then (a) we want to keep track
  	 * of this page in case we have to undo it, and (b) it may not be a
  	 * good idea to continue anyway, once we're pushing into swap.  So
  	 * reactivate the page, and let shmem_fallocate() quit when too many.
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
756
757
  	 */
  	if (!PageUptodate(page)) {
1aac14003   Hugh Dickins   tmpfs: quit when ...
758
759
760
761
762
  		if (inode->i_private) {
  			struct shmem_falloc *shmem_falloc;
  			spin_lock(&inode->i_lock);
  			shmem_falloc = inode->i_private;
  			if (shmem_falloc &&
8e205f779   Hugh Dickins   shmem: fix faulti...
763
  			    !shmem_falloc->waitq &&
1aac14003   Hugh Dickins   tmpfs: quit when ...
764
765
766
767
768
769
770
771
772
  			    index >= shmem_falloc->start &&
  			    index < shmem_falloc->next)
  				shmem_falloc->nr_unswapped++;
  			else
  				shmem_falloc = NULL;
  			spin_unlock(&inode->i_lock);
  			if (shmem_falloc)
  				goto redirty;
  		}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
773
774
775
776
  		clear_highpage(page);
  		flush_dcache_page(page);
  		SetPageUptodate(page);
  	}
48f170fb7   Hugh Dickins   tmpfs: simplify u...
777
778
779
  	swap = get_swap_page();
  	if (!swap.val)
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
780

b1dea800a   Hugh Dickins   tmpfs: fix race b...
781
782
  	/*
  	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
783
784
  	 * if it's not already there.  Do it now before the page is
  	 * moved to swap cache, when its pagelock no longer protects
b1dea800a   Hugh Dickins   tmpfs: fix race b...
785
  	 * the inode from eviction.  But don't unlock the mutex until
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
786
787
  	 * we've incremented swapped, because shmem_unuse_inode() will
  	 * prune a !swapped inode from the swaplist under this mutex.
b1dea800a   Hugh Dickins   tmpfs: fix race b...
788
  	 */
48f170fb7   Hugh Dickins   tmpfs: simplify u...
789
790
791
  	mutex_lock(&shmem_swaplist_mutex);
  	if (list_empty(&info->swaplist))
  		list_add_tail(&info->swaplist, &shmem_swaplist);
b1dea800a   Hugh Dickins   tmpfs: fix race b...
792

48f170fb7   Hugh Dickins   tmpfs: simplify u...
793
  	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
aaa468653   Hugh Dickins   swap_info: note S...
794
  		swap_shmem_alloc(swap);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
795
796
797
798
799
  		shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
  
  		spin_lock(&info->lock);
  		info->swapped++;
  		shmem_recalc_inode(inode);
826267cf1   Hugh Dickins   tmpfs: fix race b...
800
  		spin_unlock(&info->lock);
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
801
802
  
  		mutex_unlock(&shmem_swaplist_mutex);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
803
  		BUG_ON(page_mapped(page));
9fab5619b   Hugh Dickins   shmem: writepage ...
804
  		swap_writepage(page, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
805
806
  		return 0;
  	}
6922c0c7a   Hugh Dickins   tmpfs: convert sh...
807
  	mutex_unlock(&shmem_swaplist_mutex);
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
808
  	swapcache_free(swap);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
809
810
  redirty:
  	set_page_dirty(page);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
811
812
813
814
  	if (wbc->for_reclaim)
  		return AOP_WRITEPAGE_ACTIVATE;	/* Return with page locked */
  	unlock_page(page);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
815
816
817
  }
  
  #ifdef CONFIG_NUMA
680d794ba   akpm@linux-foundation.org   mount options: fi...
818
  #ifdef CONFIG_TMPFS
71fe804b6   Lee Schermerhorn   mempolicy: use st...
819
  static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
680d794ba   akpm@linux-foundation.org   mount options: fi...
820
  {
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
821
  	char buffer[64];
680d794ba   akpm@linux-foundation.org   mount options: fi...
822

71fe804b6   Lee Schermerhorn   mempolicy: use st...
823
  	if (!mpol || mpol->mode == MPOL_DEFAULT)
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
824
  		return;		/* show nothing */
680d794ba   akpm@linux-foundation.org   mount options: fi...
825

a7a88b237   Hugh Dickins   mempolicy: remove...
826
  	mpol_to_str(buffer, sizeof(buffer), mpol);
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
827
828
  
  	seq_printf(seq, ",mpol=%s", buffer);
680d794ba   akpm@linux-foundation.org   mount options: fi...
829
  }
71fe804b6   Lee Schermerhorn   mempolicy: use st...
830
831
832
833
834
835
836
837
838
839
840
841
  
  static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  {
  	struct mempolicy *mpol = NULL;
  	if (sbinfo->mpol) {
  		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
  		mpol = sbinfo->mpol;
  		mpol_get(mpol);
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	return mpol;
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
842
  #endif /* CONFIG_TMPFS */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
843
844
  static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
  			struct shmem_inode_info *info, pgoff_t index)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
845
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
846
  	struct vm_area_struct pvma;
18a2f371f   Mel Gorman   tmpfs: fix shared...
847
  	struct page *page;
52cd3b074   Lee Schermerhorn   mempolicy: rework...
848

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
849
  	/* Create a pseudo vma that just contains the policy */
c4cc6d07b   Hugh Dickins   swapin_readahead:...
850
  	pvma.vm_start = 0;
09c231cb8   Nathan Zimmer   tmpfs: distribute...
851
852
  	/* Bias interleave by inode number to distribute better across nodes */
  	pvma.vm_pgoff = index + info->vfs_inode.i_ino;
c4cc6d07b   Hugh Dickins   swapin_readahead:...
853
  	pvma.vm_ops = NULL;
18a2f371f   Mel Gorman   tmpfs: fix shared...
854
855
856
857
858
859
860
861
  	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
  
  	page = swapin_readahead(swap, gfp, &pvma, 0);
  
  	/* Drop reference taken by mpol_shared_policy_lookup() */
  	mpol_cond_put(pvma.vm_policy);
  
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
862
  }
02098feaa   Hugh Dickins   swapin needs gfp_...
863
  static struct page *shmem_alloc_page(gfp_t gfp,
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
864
  			struct shmem_inode_info *info, pgoff_t index)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
865
866
  {
  	struct vm_area_struct pvma;
18a2f371f   Mel Gorman   tmpfs: fix shared...
867
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
868

c4cc6d07b   Hugh Dickins   swapin_readahead:...
869
870
  	/* Create a pseudo vma that just contains the policy */
  	pvma.vm_start = 0;
09c231cb8   Nathan Zimmer   tmpfs: distribute...
871
872
  	/* Bias interleave by inode number to distribute better across nodes */
  	pvma.vm_pgoff = index + info->vfs_inode.i_ino;
c4cc6d07b   Hugh Dickins   swapin_readahead:...
873
  	pvma.vm_ops = NULL;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
874
  	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
52cd3b074   Lee Schermerhorn   mempolicy: rework...
875

18a2f371f   Mel Gorman   tmpfs: fix shared...
876
877
878
879
880
881
  	page = alloc_page_vma(gfp, &pvma, 0);
  
  	/* Drop reference taken by mpol_shared_policy_lookup() */
  	mpol_cond_put(pvma.vm_policy);
  
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
882
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
883
884
  #else /* !CONFIG_NUMA */
  #ifdef CONFIG_TMPFS
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
885
  static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
680d794ba   akpm@linux-foundation.org   mount options: fi...
886
887
888
  {
  }
  #endif /* CONFIG_TMPFS */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
889
890
  static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
  			struct shmem_inode_info *info, pgoff_t index)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
891
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
892
  	return swapin_readahead(swap, gfp, NULL, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
893
  }
02098feaa   Hugh Dickins   swapin needs gfp_...
894
  static inline struct page *shmem_alloc_page(gfp_t gfp,
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
895
  			struct shmem_inode_info *info, pgoff_t index)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
896
  {
e84e2e132   Hugh Dickins   tmpfs: restore mi...
897
  	return alloc_page(gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
898
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
899
  #endif /* CONFIG_NUMA */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
900

71fe804b6   Lee Schermerhorn   mempolicy: use st...
901
902
903
904
905
906
  #if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
  static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  {
  	return NULL;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
907
  /*
bde05d1cc   Hugh Dickins   shmem: replace pa...
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
   * When a page is moved from swapcache to shmem filecache (either by the
   * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
   * shmem_unuse_inode()), it may have been read in earlier from swap, in
   * ignorance of the mapping it belongs to.  If that mapping has special
   * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
   * we may need to copy to a suitable page before moving to filecache.
   *
   * In a future release, this may well be extended to respect cpuset and
   * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
   * but for now it is a simple matter of zone.
   */
  static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
  {
  	return page_zonenum(page) > gfp_zone(gfp);
  }
  
  static int shmem_replace_page(struct page **pagep, gfp_t gfp,
  				struct shmem_inode_info *info, pgoff_t index)
  {
  	struct page *oldpage, *newpage;
  	struct address_space *swap_mapping;
  	pgoff_t swap_index;
  	int error;
  
  	oldpage = *pagep;
  	swap_index = page_private(oldpage);
  	swap_mapping = page_mapping(oldpage);
  
  	/*
  	 * We have arrived here because our zones are constrained, so don't
  	 * limit chance of success by further cpuset and node constraints.
  	 */
  	gfp &= ~GFP_CONSTRAINT_MASK;
  	newpage = shmem_alloc_page(gfp, info, index);
  	if (!newpage)
  		return -ENOMEM;
bde05d1cc   Hugh Dickins   shmem: replace pa...
944

bde05d1cc   Hugh Dickins   shmem: replace pa...
945
946
  	page_cache_get(newpage);
  	copy_highpage(newpage, oldpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
947
  	flush_dcache_page(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
948

bde05d1cc   Hugh Dickins   shmem: replace pa...
949
  	__set_page_locked(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
950
  	SetPageUptodate(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
951
  	SetPageSwapBacked(newpage);
bde05d1cc   Hugh Dickins   shmem: replace pa...
952
  	set_page_private(newpage, swap_index);
bde05d1cc   Hugh Dickins   shmem: replace pa...
953
954
955
956
957
958
959
960
961
  	SetPageSwapCache(newpage);
  
  	/*
  	 * Our caller will very soon move newpage out of swapcache, but it's
  	 * a nice clean interface for us to replace oldpage by newpage there.
  	 */
  	spin_lock_irq(&swap_mapping->tree_lock);
  	error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
  								   newpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
962
963
964
965
  	if (!error) {
  		__inc_zone_page_state(newpage, NR_FILE_PAGES);
  		__dec_zone_page_state(oldpage, NR_FILE_PAGES);
  	}
bde05d1cc   Hugh Dickins   shmem: replace pa...
966
  	spin_unlock_irq(&swap_mapping->tree_lock);
bde05d1cc   Hugh Dickins   shmem: replace pa...
967

0142ef6cd   Hugh Dickins   shmem: replace_pa...
968
969
970
971
972
973
974
975
  	if (unlikely(error)) {
  		/*
  		 * Is this possible?  I think not, now that our callers check
  		 * both PageSwapCache and page_private after getting page lock;
  		 * but be defensive.  Reverse old to newpage for clear and free.
  		 */
  		oldpage = newpage;
  	} else {
f5e03a498   Michal Hocko   memcg, shmem: fix...
976
  		mem_cgroup_migrate(oldpage, newpage, true);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
977
978
979
  		lru_cache_add_anon(newpage);
  		*pagep = newpage;
  	}
bde05d1cc   Hugh Dickins   shmem: replace pa...
980
981
982
983
984
985
986
  
  	ClearPageSwapCache(oldpage);
  	set_page_private(oldpage, 0);
  
  	unlock_page(oldpage);
  	page_cache_release(oldpage);
  	page_cache_release(oldpage);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
987
  	return error;
bde05d1cc   Hugh Dickins   shmem: replace pa...
988
989
990
  }
  
  /*
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
991
   * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
992
993
994
995
996
   *
   * If we allocate a new one we do not mark it dirty. That's up to the
   * vm. If we swap it in we mark it dirty since we also free the swap
   * entry since a page cannot live in both the swap and page cache
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
997
  static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
998
  	struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999
1000
  {
  	struct address_space *mapping = inode->i_mapping;
54af60421   Hugh Dickins   tmpfs: convert sh...
1001
  	struct shmem_inode_info *info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002
  	struct shmem_sb_info *sbinfo;
00501b531   Johannes Weiner   mm: memcontrol: r...
1003
  	struct mem_cgroup *memcg;
27ab70062   Hugh Dickins   tmpfs: simplify f...
1004
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1005
1006
  	swp_entry_t swap;
  	int error;
54af60421   Hugh Dickins   tmpfs: convert sh...
1007
  	int once = 0;
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1008
  	int alloced = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1009

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1010
  	if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1011
  		return -EFBIG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1012
  repeat:
54af60421   Hugh Dickins   tmpfs: convert sh...
1013
  	swap.val = 0;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
1014
  	page = find_lock_entry(mapping, index);
54af60421   Hugh Dickins   tmpfs: convert sh...
1015
1016
1017
1018
  	if (radix_tree_exceptional_entry(page)) {
  		swap = radix_to_swp_entry(page);
  		page = NULL;
  	}
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1019
  	if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
54af60421   Hugh Dickins   tmpfs: convert sh...
1020
1021
1022
1023
  	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
  		error = -EINVAL;
  		goto failed;
  	}
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1024
1025
  	if (page && sgp == SGP_WRITE)
  		mark_page_accessed(page);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1026
1027
1028
1029
1030
1031
1032
1033
  	/* fallocated page? */
  	if (page && !PageUptodate(page)) {
  		if (sgp != SGP_READ)
  			goto clear;
  		unlock_page(page);
  		page_cache_release(page);
  		page = NULL;
  	}
54af60421   Hugh Dickins   tmpfs: convert sh...
1034
  	if (page || (sgp == SGP_READ && !swap.val)) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1035
1036
  		*pagep = page;
  		return 0;
27ab70062   Hugh Dickins   tmpfs: simplify f...
1037
1038
1039
  	}
  
  	/*
54af60421   Hugh Dickins   tmpfs: convert sh...
1040
1041
  	 * Fast cache lookup did not find it:
  	 * bring it back from swap or allocate.
27ab70062   Hugh Dickins   tmpfs: simplify f...
1042
  	 */
54af60421   Hugh Dickins   tmpfs: convert sh...
1043
1044
  	info = SHMEM_I(inode);
  	sbinfo = SHMEM_SB(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1045

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1046
1047
  	if (swap.val) {
  		/* Look it up and read it in.. */
27ab70062   Hugh Dickins   tmpfs: simplify f...
1048
1049
  		page = lookup_swap_cache(swap);
  		if (!page) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1050
  			/* here we actually do the io */
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1051
1052
  			if (fault_type)
  				*fault_type |= VM_FAULT_MAJOR;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1053
  			page = shmem_swapin(swap, gfp, info, index);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1054
  			if (!page) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1055
1056
  				error = -ENOMEM;
  				goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1057
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1058
1059
1060
  		}
  
  		/* We have to do this with page locked to prevent races */
54af60421   Hugh Dickins   tmpfs: convert sh...
1061
  		lock_page(page);
0142ef6cd   Hugh Dickins   shmem: replace_pa...
1062
  		if (!PageSwapCache(page) || page_private(page) != swap.val ||
d18992286   Hugh Dickins   shmem: fix negati...
1063
  		    !shmem_confirm_swap(mapping, index, swap)) {
bde05d1cc   Hugh Dickins   shmem: replace pa...
1064
  			error = -EEXIST;	/* try again */
d18992286   Hugh Dickins   shmem: fix negati...
1065
  			goto unlock;
bde05d1cc   Hugh Dickins   shmem: replace pa...
1066
  		}
27ab70062   Hugh Dickins   tmpfs: simplify f...
1067
  		if (!PageUptodate(page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1068
  			error = -EIO;
54af60421   Hugh Dickins   tmpfs: convert sh...
1069
  			goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1070
  		}
54af60421   Hugh Dickins   tmpfs: convert sh...
1071
  		wait_on_page_writeback(page);
bde05d1cc   Hugh Dickins   shmem: replace pa...
1072
1073
1074
1075
  		if (shmem_should_replace_page(page, gfp)) {
  			error = shmem_replace_page(&page, gfp, info, index);
  			if (error)
  				goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1076
  		}
27ab70062   Hugh Dickins   tmpfs: simplify f...
1077

00501b531   Johannes Weiner   mm: memcontrol: r...
1078
  		error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
d18992286   Hugh Dickins   shmem: fix negati...
1079
  		if (!error) {
aa3b18955   Hugh Dickins   tmpfs: convert me...
1080
  			error = shmem_add_to_page_cache(page, mapping, index,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
1081
  						swp_to_radix_entry(swap));
215c02bc3   Hugh Dickins   tmpfs: fix shmem_...
1082
1083
1084
1085
1086
1087
1088
1089
  			/*
  			 * We already confirmed swap under page lock, and make
  			 * no memory allocation here, so usually no possibility
  			 * of error; but free_swap_and_cache() only trylocks a
  			 * page, so it is just possible that the entry has been
  			 * truncated or holepunched since swap was confirmed.
  			 * shmem_undo_range() will have done some of the
  			 * unaccounting, now delete_from_swap_cache() will do
93aa7d952   Vladimir Davydov   swap: remove unus...
1090
  			 * the rest.
215c02bc3   Hugh Dickins   tmpfs: fix shmem_...
1091
1092
1093
  			 * Reset swap.val? No, leave it so "failed" goes back to
  			 * "repeat": reading a hole and writing should succeed.
  			 */
00501b531   Johannes Weiner   mm: memcontrol: r...
1094
1095
  			if (error) {
  				mem_cgroup_cancel_charge(page, memcg);
215c02bc3   Hugh Dickins   tmpfs: fix shmem_...
1096
  				delete_from_swap_cache(page);
00501b531   Johannes Weiner   mm: memcontrol: r...
1097
  			}
d18992286   Hugh Dickins   shmem: fix negati...
1098
  		}
54af60421   Hugh Dickins   tmpfs: convert sh...
1099
1100
  		if (error)
  			goto failed;
00501b531   Johannes Weiner   mm: memcontrol: r...
1101
  		mem_cgroup_commit_charge(page, memcg, true);
54af60421   Hugh Dickins   tmpfs: convert sh...
1102
  		spin_lock(&info->lock);
285b2c4fd   Hugh Dickins   tmpfs: demolish o...
1103
  		info->swapped--;
54af60421   Hugh Dickins   tmpfs: convert sh...
1104
  		shmem_recalc_inode(inode);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1105
  		spin_unlock(&info->lock);
54af60421   Hugh Dickins   tmpfs: convert sh...
1106

66d2f4d28   Hugh Dickins   shmem: fix init_p...
1107
1108
  		if (sgp == SGP_WRITE)
  			mark_page_accessed(page);
54af60421   Hugh Dickins   tmpfs: convert sh...
1109
  		delete_from_swap_cache(page);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1110
1111
  		set_page_dirty(page);
  		swap_free(swap);
54af60421   Hugh Dickins   tmpfs: convert sh...
1112
1113
1114
1115
  	} else {
  		if (shmem_acct_block(info->flags)) {
  			error = -ENOSPC;
  			goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1116
  		}
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
1117
  		if (sbinfo->max_blocks) {
fc5da22ae   Hugh Dickins   tmpfs: fix off-by...
1118
  			if (percpu_counter_compare(&sbinfo->used_blocks,
54af60421   Hugh Dickins   tmpfs: convert sh...
1119
1120
1121
1122
  						sbinfo->max_blocks) >= 0) {
  				error = -ENOSPC;
  				goto unacct;
  			}
7e496299d   Tim Chen   tmpfs: make tmpfs...
1123
  			percpu_counter_inc(&sbinfo->used_blocks);
54af60421   Hugh Dickins   tmpfs: convert sh...
1124
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125

54af60421   Hugh Dickins   tmpfs: convert sh...
1126
1127
1128
1129
  		page = shmem_alloc_page(gfp, info, index);
  		if (!page) {
  			error = -ENOMEM;
  			goto decused;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1130
  		}
07a427884   Mel Gorman   mm: shmem: avoid ...
1131
  		__SetPageSwapBacked(page);
54af60421   Hugh Dickins   tmpfs: convert sh...
1132
  		__set_page_locked(page);
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1133
  		if (sgp == SGP_WRITE)
eb39d618f   Hugh Dickins   mm: replace init_...
1134
  			__SetPageReferenced(page);
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1135

00501b531   Johannes Weiner   mm: memcontrol: r...
1136
  		error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
54af60421   Hugh Dickins   tmpfs: convert sh...
1137
1138
  		if (error)
  			goto decused;
5e4c0d974   Jan Kara   lib/radix-tree.c:...
1139
  		error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
b065b4321   Hugh Dickins   shmem: cleanup sh...
1140
1141
  		if (!error) {
  			error = shmem_add_to_page_cache(page, mapping, index,
fed400a18   Wang Sheng-Hui   mm/shmem.c: remov...
1142
  							NULL);
b065b4321   Hugh Dickins   shmem: cleanup sh...
1143
1144
1145
  			radix_tree_preload_end();
  		}
  		if (error) {
00501b531   Johannes Weiner   mm: memcontrol: r...
1146
  			mem_cgroup_cancel_charge(page, memcg);
b065b4321   Hugh Dickins   shmem: cleanup sh...
1147
1148
  			goto decused;
  		}
00501b531   Johannes Weiner   mm: memcontrol: r...
1149
  		mem_cgroup_commit_charge(page, memcg, false);
54af60421   Hugh Dickins   tmpfs: convert sh...
1150
1151
1152
  		lru_cache_add_anon(page);
  
  		spin_lock(&info->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1153
  		info->alloced++;
54af60421   Hugh Dickins   tmpfs: convert sh...
1154
1155
  		inode->i_blocks += BLOCKS_PER_PAGE;
  		shmem_recalc_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1156
  		spin_unlock(&info->lock);
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1157
  		alloced = true;
54af60421   Hugh Dickins   tmpfs: convert sh...
1158

ec9516fbc   Hugh Dickins   tmpfs: optimize c...
1159
  		/*
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1160
1161
1162
1163
1164
1165
1166
1167
1168
  		 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
  		 */
  		if (sgp == SGP_FALLOC)
  			sgp = SGP_WRITE;
  clear:
  		/*
  		 * Let SGP_WRITE caller clear ends if write does not fill page;
  		 * but SGP_FALLOC on a page fallocated earlier must initialize
  		 * it now, lest undo on failure cancel our earlier guarantee.
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
1169
1170
1171
1172
1173
1174
  		 */
  		if (sgp != SGP_WRITE) {
  			clear_highpage(page);
  			flush_dcache_page(page);
  			SetPageUptodate(page);
  		}
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
1175
  		if (sgp == SGP_DIRTY)
27ab70062   Hugh Dickins   tmpfs: simplify f...
1176
  			set_page_dirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1177
  	}
bde05d1cc   Hugh Dickins   shmem: replace pa...
1178

54af60421   Hugh Dickins   tmpfs: convert sh...
1179
  	/* Perhaps the file has been truncated since we checked */
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1180
  	if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
54af60421   Hugh Dickins   tmpfs: convert sh...
1181
1182
  	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
  		error = -EINVAL;
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1183
1184
1185
1186
  		if (alloced)
  			goto trunc;
  		else
  			goto failed;
e83c32e8f   Hugh Dickins   tmpfs: simplify p...
1187
  	}
54af60421   Hugh Dickins   tmpfs: convert sh...
1188
1189
  	*pagep = page;
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1190

59a16ead5   Hugh Dickins   tmpfs: fix spurio...
1191
  	/*
54af60421   Hugh Dickins   tmpfs: convert sh...
1192
  	 * Error recovery.
59a16ead5   Hugh Dickins   tmpfs: fix spurio...
1193
  	 */
54af60421   Hugh Dickins   tmpfs: convert sh...
1194
  trunc:
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1195
  	info = SHMEM_I(inode);
54af60421   Hugh Dickins   tmpfs: convert sh...
1196
1197
1198
1199
1200
  	ClearPageDirty(page);
  	delete_from_page_cache(page);
  	spin_lock(&info->lock);
  	info->alloced--;
  	inode->i_blocks -= BLOCKS_PER_PAGE;
59a16ead5   Hugh Dickins   tmpfs: fix spurio...
1201
  	spin_unlock(&info->lock);
54af60421   Hugh Dickins   tmpfs: convert sh...
1202
  decused:
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
1203
  	sbinfo = SHMEM_SB(inode->i_sb);
54af60421   Hugh Dickins   tmpfs: convert sh...
1204
1205
1206
1207
1208
  	if (sbinfo->max_blocks)
  		percpu_counter_add(&sbinfo->used_blocks, -1);
  unacct:
  	shmem_unacct_blocks(info->flags, 1);
  failed:
d18992286   Hugh Dickins   shmem: fix negati...
1209
1210
1211
1212
  	if (swap.val && error != -EINVAL &&
  	    !shmem_confirm_swap(mapping, index, swap))
  		error = -EEXIST;
  unlock:
27ab70062   Hugh Dickins   tmpfs: simplify f...
1213
  	if (page) {
54af60421   Hugh Dickins   tmpfs: convert sh...
1214
  		unlock_page(page);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1215
  		page_cache_release(page);
54af60421   Hugh Dickins   tmpfs: convert sh...
1216
1217
1218
1219
1220
1221
  	}
  	if (error == -ENOSPC && !once++) {
  		info = SHMEM_I(inode);
  		spin_lock(&info->lock);
  		shmem_recalc_inode(inode);
  		spin_unlock(&info->lock);
27ab70062   Hugh Dickins   tmpfs: simplify f...
1222
  		goto repeat;
ff36b8016   Shaohua Li   shmem: reduce pag...
1223
  	}
d18992286   Hugh Dickins   shmem: fix negati...
1224
  	if (error == -EEXIST)	/* from above or from radix_tree_insert */
54af60421   Hugh Dickins   tmpfs: convert sh...
1225
1226
  		goto repeat;
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1227
  }
d0217ac04   Nick Piggin   mm: fault feedbac...
1228
  static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1229
  {
496ad9aa8   Al Viro   new helper: file_...
1230
  	struct inode *inode = file_inode(vma->vm_file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1231
  	int error;
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1232
  	int ret = VM_FAULT_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1233

f00cdc6df   Hugh Dickins   shmem: fix faulti...
1234
1235
1236
1237
  	/*
  	 * Trinity finds that probing a hole which tmpfs is punching can
  	 * prevent the hole-punch from ever completing: which in turn
  	 * locks writers out with its hold on i_mutex.  So refrain from
8e205f779   Hugh Dickins   shmem: fix faulti...
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
  	 * faulting pages into the hole while it's being punched.  Although
  	 * shmem_undo_range() does remove the additions, it may be unable to
  	 * keep up, as each new page needs its own unmap_mapping_range() call,
  	 * and the i_mmap tree grows ever slower to scan if new vmas are added.
  	 *
  	 * It does not matter if we sometimes reach this check just before the
  	 * hole-punch begins, so that one fault then races with the punch:
  	 * we just need to make racing faults a rare case.
  	 *
  	 * The implementation below would be much simpler if we just used a
  	 * standard mutex or completion: but we cannot take i_mutex in fault,
  	 * and bloating every shmem inode for this unlikely case would be sad.
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1250
1251
1252
1253
1254
1255
  	 */
  	if (unlikely(inode->i_private)) {
  		struct shmem_falloc *shmem_falloc;
  
  		spin_lock(&inode->i_lock);
  		shmem_falloc = inode->i_private;
8e205f779   Hugh Dickins   shmem: fix faulti...
1256
1257
1258
1259
1260
1261
1262
1263
  		if (shmem_falloc &&
  		    shmem_falloc->waitq &&
  		    vmf->pgoff >= shmem_falloc->start &&
  		    vmf->pgoff < shmem_falloc->next) {
  			wait_queue_head_t *shmem_falloc_waitq;
  			DEFINE_WAIT(shmem_fault_wait);
  
  			ret = VM_FAULT_NOPAGE;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1264
1265
  			if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
  			   !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
8e205f779   Hugh Dickins   shmem: fix faulti...
1266
  				/* It's polite to up mmap_sem if we can */
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1267
  				up_read(&vma->vm_mm->mmap_sem);
8e205f779   Hugh Dickins   shmem: fix faulti...
1268
  				ret = VM_FAULT_RETRY;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1269
  			}
8e205f779   Hugh Dickins   shmem: fix faulti...
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
  
  			shmem_falloc_waitq = shmem_falloc->waitq;
  			prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
  					TASK_UNINTERRUPTIBLE);
  			spin_unlock(&inode->i_lock);
  			schedule();
  
  			/*
  			 * shmem_falloc_waitq points into the shmem_fallocate()
  			 * stack of the hole-punching task: shmem_falloc_waitq
  			 * is usually invalid by the time we reach here, but
  			 * finish_wait() does not dereference it in that case;
  			 * though i_lock needed lest racing with wake_up_all().
  			 */
  			spin_lock(&inode->i_lock);
  			finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
  			spin_unlock(&inode->i_lock);
  			return ret;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1288
  		}
8e205f779   Hugh Dickins   shmem: fix faulti...
1289
  		spin_unlock(&inode->i_lock);
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1290
  	}
27d54b398   Hugh Dickins   shmem: SGP_QUICK ...
1291
  	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
d0217ac04   Nick Piggin   mm: fault feedbac...
1292
1293
  	if (error)
  		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1294

456f998ec   Ying Han   memcg: add the pa...
1295
1296
1297
1298
  	if (ret & VM_FAULT_MAJOR) {
  		count_vm_event(PGMAJFAULT);
  		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
  	}
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
1299
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1300
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
  #ifdef CONFIG_NUMA
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1302
  static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1303
  {
496ad9aa8   Al Viro   new helper: file_...
1304
  	struct inode *inode = file_inode(vma->vm_file);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1305
  	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1306
  }
d8dc74f21   Adrian Bunk   mm/shmem.c: make ...
1307
1308
  static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
  					  unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1309
  {
496ad9aa8   Al Viro   new helper: file_...
1310
  	struct inode *inode = file_inode(vma->vm_file);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1311
  	pgoff_t index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1312

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1313
1314
  	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
  	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1315
1316
1317
1318
1319
  }
  #endif
  
  int shmem_lock(struct file *file, int lock, struct user_struct *user)
  {
496ad9aa8   Al Viro   new helper: file_...
1320
  	struct inode *inode = file_inode(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1321
1322
1323
1324
1325
1326
1327
1328
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	int retval = -ENOMEM;
  
  	spin_lock(&info->lock);
  	if (lock && !(info->flags & VM_LOCKED)) {
  		if (!user_shm_lock(inode->i_size, user))
  			goto out_nomem;
  		info->flags |= VM_LOCKED;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
1329
  		mapping_set_unevictable(file->f_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1330
1331
1332
1333
  	}
  	if (!lock && (info->flags & VM_LOCKED) && user) {
  		user_shm_unlock(inode->i_size, user);
  		info->flags &= ~VM_LOCKED;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
1334
  		mapping_clear_unevictable(file->f_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1335
1336
  	}
  	retval = 0;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
1337

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1338
1339
1340
1341
  out_nomem:
  	spin_unlock(&info->lock);
  	return retval;
  }
9b83a6a85   Adrian Bunk   [PATCH] mm/{,tiny...
1342
  static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1343
1344
1345
1346
1347
  {
  	file_accessed(file);
  	vma->vm_ops = &shmem_vm_ops;
  	return 0;
  }
454abafe9   Dmitry Monakhov   ramfs: replace in...
1348
  static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
09208d150   Al Viro   shmem, ramfs: pro...
1349
  				     umode_t mode, dev_t dev, unsigned long flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1350
1351
1352
1353
  {
  	struct inode *inode;
  	struct shmem_inode_info *info;
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
5b04c6890   Pavel Emelyanov   shmem: factor out...
1354
1355
  	if (shmem_reserve_inode(sb))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1356
1357
1358
  
  	inode = new_inode(sb);
  	if (inode) {
85fe4025c   Christoph Hellwig   fs: do not assign...
1359
  		inode->i_ino = get_next_ino();
454abafe9   Dmitry Monakhov   ramfs: replace in...
1360
  		inode_init_owner(inode, dir, mode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1361
  		inode->i_blocks = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1362
  		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
1363
  		inode->i_generation = get_seconds();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1364
1365
1366
  		info = SHMEM_I(inode);
  		memset(info, 0, (char *)inode - (char *)info);
  		spin_lock_init(&info->lock);
40e041a2c   David Herrmann   shm: add sealing API
1367
  		info->seals = F_SEAL_SEAL;
0b0a0806b   Hugh Dickins   shmem: fix shared...
1368
  		info->flags = flags & VM_NORESERVE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1369
  		INIT_LIST_HEAD(&info->swaplist);
38f386574   Aristeu Rozanski   xattr: extract si...
1370
  		simple_xattrs_init(&info->xattrs);
72c04902d   Al Viro   Get "no acls for ...
1371
  		cache_no_acl(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1372
1373
1374
  
  		switch (mode & S_IFMT) {
  		default:
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
1375
  			inode->i_op = &shmem_special_inode_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1376
1377
1378
  			init_special_inode(inode, mode, dev);
  			break;
  		case S_IFREG:
14fcc23fd   Hugh Dickins   tmpfs: fix kernel...
1379
  			inode->i_mapping->a_ops = &shmem_aops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1380
1381
  			inode->i_op = &shmem_inode_operations;
  			inode->i_fop = &shmem_file_operations;
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1382
1383
  			mpol_shared_policy_init(&info->policy,
  						 shmem_get_sbmpol(sbinfo));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1384
1385
  			break;
  		case S_IFDIR:
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
1386
  			inc_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
  			/* Some things misbehave if size == 0 on a directory */
  			inode->i_size = 2 * BOGO_DIRENT_SIZE;
  			inode->i_op = &shmem_dir_inode_operations;
  			inode->i_fop = &simple_dir_operations;
  			break;
  		case S_IFLNK:
  			/*
  			 * Must not load anything in the rbtree,
  			 * mpol_free_shared_policy will not be called.
  			 */
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1397
  			mpol_shared_policy_init(&info->policy, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1398
1399
  			break;
  		}
5b04c6890   Pavel Emelyanov   shmem: factor out...
1400
1401
  	} else
  		shmem_free_inode(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1402
1403
  	return inode;
  }
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
1404
1405
  bool shmem_mapping(struct address_space *mapping)
  {
f0774d884   Sasha Levin   mm: shmem: check ...
1406
1407
  	if (!mapping->host)
  		return false;
97b713ba3   Christoph Hellwig   fs: kill BDI_CAP_...
1408
  	return mapping->host->i_sb->s_op == &shmem_ops;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
1409
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1410
  #ifdef CONFIG_TMPFS
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
1411
  static const struct inode_operations shmem_symlink_inode_operations;
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
1412
  static const struct inode_operations shmem_short_symlink_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1413

6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
1414
1415
1416
1417
1418
  #ifdef CONFIG_TMPFS_XATTR
  static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
  #else
  #define shmem_initxattrs NULL
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1419
  static int
800d15a53   Nick Piggin   implement simple ...
1420
1421
1422
  shmem_write_begin(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1423
  {
800d15a53   Nick Piggin   implement simple ...
1424
  	struct inode *inode = mapping->host;
40e041a2c   David Herrmann   shm: add sealing API
1425
  	struct shmem_inode_info *info = SHMEM_I(inode);
800d15a53   Nick Piggin   implement simple ...
1426
  	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
40e041a2c   David Herrmann   shm: add sealing API
1427
1428
1429
1430
1431
1432
1433
1434
  
  	/* i_mutex is held by caller */
  	if (unlikely(info->seals)) {
  		if (info->seals & F_SEAL_WRITE)
  			return -EPERM;
  		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
  			return -EPERM;
  	}
66d2f4d28   Hugh Dickins   shmem: fix init_p...
1435
  	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
800d15a53   Nick Piggin   implement simple ...
1436
1437
1438
1439
1440
1441
1442
1443
  }
  
  static int
  shmem_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
  	struct inode *inode = mapping->host;
d3602444e   Hugh Dickins   shmem_getpage ret...
1444
1445
  	if (pos + copied > inode->i_size)
  		i_size_write(inode, pos + copied);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
1446
1447
1448
1449
1450
1451
1452
1453
  	if (!PageUptodate(page)) {
  		if (copied < PAGE_CACHE_SIZE) {
  			unsigned from = pos & (PAGE_CACHE_SIZE - 1);
  			zero_user_segments(page, 0, from,
  					from + copied, PAGE_CACHE_SIZE);
  		}
  		SetPageUptodate(page);
  	}
800d15a53   Nick Piggin   implement simple ...
1454
  	set_page_dirty(page);
6746aff74   Wu Fengguang   HWPOISON: shmem: ...
1455
  	unlock_page(page);
800d15a53   Nick Piggin   implement simple ...
1456
  	page_cache_release(page);
800d15a53   Nick Piggin   implement simple ...
1457
  	return copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1458
  }
2ba5bbed0   Al Viro   shmem: switch to ...
1459
  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1460
  {
6e58e79db   Al Viro   introduce copy_pa...
1461
1462
  	struct file *file = iocb->ki_filp;
  	struct inode *inode = file_inode(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1463
  	struct address_space *mapping = inode->i_mapping;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1464
1465
  	pgoff_t index;
  	unsigned long offset;
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
1466
  	enum sgp_type sgp = SGP_READ;
f7c1d0742   Geert Uytterhoeven   mm: Initialize er...
1467
  	int error = 0;
cb66a7a1f   Al Viro   kill generic_segm...
1468
  	ssize_t retval = 0;
6e58e79db   Al Viro   introduce copy_pa...
1469
  	loff_t *ppos = &iocb->ki_pos;
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
1470
1471
1472
1473
1474
1475
  
  	/*
  	 * Might this read be for a stacking filesystem?  Then when reading
  	 * holes of a sparse file, we actually need to allocate those pages,
  	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
  	 */
777eda2c5   Al Viro   new helper: iter_...
1476
  	if (!iter_is_iovec(to))
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
1477
  		sgp = SGP_DIRTY;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1478
1479
1480
1481
1482
1483
  
  	index = *ppos >> PAGE_CACHE_SHIFT;
  	offset = *ppos & ~PAGE_CACHE_MASK;
  
  	for (;;) {
  		struct page *page = NULL;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
1484
1485
  		pgoff_t end_index;
  		unsigned long nr, ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
  		loff_t i_size = i_size_read(inode);
  
  		end_index = i_size >> PAGE_CACHE_SHIFT;
  		if (index > end_index)
  			break;
  		if (index == end_index) {
  			nr = i_size & ~PAGE_CACHE_MASK;
  			if (nr <= offset)
  				break;
  		}
6e58e79db   Al Viro   introduce copy_pa...
1496
1497
1498
1499
  		error = shmem_getpage(inode, index, &page, sgp, NULL);
  		if (error) {
  			if (error == -EINVAL)
  				error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1500
1501
  			break;
  		}
d3602444e   Hugh Dickins   shmem_getpage ret...
1502
1503
  		if (page)
  			unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1504
1505
1506
  
  		/*
  		 * We must evaluate after, since reads (unlike writes)
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
1507
  		 * are called without i_mutex protection against truncate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
  		 */
  		nr = PAGE_CACHE_SIZE;
  		i_size = i_size_read(inode);
  		end_index = i_size >> PAGE_CACHE_SHIFT;
  		if (index == end_index) {
  			nr = i_size & ~PAGE_CACHE_MASK;
  			if (nr <= offset) {
  				if (page)
  					page_cache_release(page);
  				break;
  			}
  		}
  		nr -= offset;
  
  		if (page) {
  			/*
  			 * If users can be writing to this page using arbitrary
  			 * virtual addresses, take care about potential aliasing
  			 * before reading the page on the kernel side.
  			 */
  			if (mapping_writably_mapped(mapping))
  				flush_dcache_page(page);
  			/*
  			 * Mark the page accessed if we read the beginning.
  			 */
  			if (!offset)
  				mark_page_accessed(page);
b5810039a   Nick Piggin   [PATCH] core remo...
1535
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1536
  			page = ZERO_PAGE(0);
b5810039a   Nick Piggin   [PATCH] core remo...
1537
1538
  			page_cache_get(page);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1539
1540
1541
1542
  
  		/*
  		 * Ok, we have the page, and it's up-to-date, so
  		 * now we can copy it to user space...
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1543
  		 */
2ba5bbed0   Al Viro   shmem: switch to ...
1544
  		ret = copy_page_to_iter(page, offset, nr, to);
6e58e79db   Al Viro   introduce copy_pa...
1545
  		retval += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1546
1547
1548
1549
1550
  		offset += ret;
  		index += offset >> PAGE_CACHE_SHIFT;
  		offset &= ~PAGE_CACHE_MASK;
  
  		page_cache_release(page);
2ba5bbed0   Al Viro   shmem: switch to ...
1551
  		if (!iov_iter_count(to))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1552
  			break;
6e58e79db   Al Viro   introduce copy_pa...
1553
1554
1555
1556
  		if (ret < nr) {
  			error = -EFAULT;
  			break;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1557
1558
1559
1560
  		cond_resched();
  	}
  
  	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
6e58e79db   Al Viro   introduce copy_pa...
1561
1562
  	file_accessed(file);
  	return retval ? retval : error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1563
  }
708e3508c   Hugh Dickins   tmpfs: clone shme...
1564
1565
1566
1567
1568
  static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
  				struct pipe_inode_info *pipe, size_t len,
  				unsigned int flags)
  {
  	struct address_space *mapping = in->f_mapping;
71f0e07a6   Hugh Dickins   tmpfs: refine shm...
1569
  	struct inode *inode = mapping->host;
708e3508c   Hugh Dickins   tmpfs: clone shme...
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
  	unsigned int loff, nr_pages, req_pages;
  	struct page *pages[PIPE_DEF_BUFFERS];
  	struct partial_page partial[PIPE_DEF_BUFFERS];
  	struct page *page;
  	pgoff_t index, end_index;
  	loff_t isize, left;
  	int error, page_nr;
  	struct splice_pipe_desc spd = {
  		.pages = pages,
  		.partial = partial,
047fe3605   Eric Dumazet   splice: fix racy ...
1580
  		.nr_pages_max = PIPE_DEF_BUFFERS,
708e3508c   Hugh Dickins   tmpfs: clone shme...
1581
1582
1583
1584
  		.flags = flags,
  		.ops = &page_cache_pipe_buf_ops,
  		.spd_release = spd_release_page,
  	};
71f0e07a6   Hugh Dickins   tmpfs: refine shm...
1585
  	isize = i_size_read(inode);
708e3508c   Hugh Dickins   tmpfs: clone shme...
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
  	if (unlikely(*ppos >= isize))
  		return 0;
  
  	left = isize - *ppos;
  	if (unlikely(left < len))
  		len = left;
  
  	if (splice_grow_spd(pipe, &spd))
  		return -ENOMEM;
  
  	index = *ppos >> PAGE_CACHE_SHIFT;
  	loff = *ppos & ~PAGE_CACHE_MASK;
  	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
a786c06d9   Al Viro   missing bits of "...
1599
  	nr_pages = min(req_pages, spd.nr_pages_max);
708e3508c   Hugh Dickins   tmpfs: clone shme...
1600

708e3508c   Hugh Dickins   tmpfs: clone shme...
1601
1602
1603
  	spd.nr_pages = find_get_pages_contig(mapping, index,
  						nr_pages, spd.pages);
  	index += spd.nr_pages;
708e3508c   Hugh Dickins   tmpfs: clone shme...
1604
  	error = 0;
708e3508c   Hugh Dickins   tmpfs: clone shme...
1605

71f0e07a6   Hugh Dickins   tmpfs: refine shm...
1606
  	while (spd.nr_pages < nr_pages) {
71f0e07a6   Hugh Dickins   tmpfs: refine shm...
1607
1608
1609
1610
  		error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
  		if (error)
  			break;
  		unlock_page(page);
708e3508c   Hugh Dickins   tmpfs: clone shme...
1611
1612
1613
  		spd.pages[spd.nr_pages++] = page;
  		index++;
  	}
708e3508c   Hugh Dickins   tmpfs: clone shme...
1614
1615
1616
  	index = *ppos >> PAGE_CACHE_SHIFT;
  	nr_pages = spd.nr_pages;
  	spd.nr_pages = 0;
71f0e07a6   Hugh Dickins   tmpfs: refine shm...
1617

708e3508c   Hugh Dickins   tmpfs: clone shme...
1618
1619
1620
1621
1622
  	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
  		unsigned int this_len;
  
  		if (!len)
  			break;
708e3508c   Hugh Dickins   tmpfs: clone shme...
1623
1624
  		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
  		page = spd.pages[page_nr];
71f0e07a6   Hugh Dickins   tmpfs: refine shm...
1625
  		if (!PageUptodate(page) || page->mapping != mapping) {
71f0e07a6   Hugh Dickins   tmpfs: refine shm...
1626
1627
1628
  			error = shmem_getpage(inode, index, &page,
  							SGP_CACHE, NULL);
  			if (error)
708e3508c   Hugh Dickins   tmpfs: clone shme...
1629
  				break;
71f0e07a6   Hugh Dickins   tmpfs: refine shm...
1630
1631
1632
  			unlock_page(page);
  			page_cache_release(spd.pages[page_nr]);
  			spd.pages[page_nr] = page;
708e3508c   Hugh Dickins   tmpfs: clone shme...
1633
  		}
71f0e07a6   Hugh Dickins   tmpfs: refine shm...
1634
1635
  
  		isize = i_size_read(inode);
708e3508c   Hugh Dickins   tmpfs: clone shme...
1636
1637
1638
  		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
  		if (unlikely(!isize || index > end_index))
  			break;
708e3508c   Hugh Dickins   tmpfs: clone shme...
1639
1640
  		if (end_index == index) {
  			unsigned int plen;
708e3508c   Hugh Dickins   tmpfs: clone shme...
1641
1642
1643
  			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
  			if (plen <= loff)
  				break;
708e3508c   Hugh Dickins   tmpfs: clone shme...
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
  			this_len = min(this_len, plen - loff);
  			len = this_len;
  		}
  
  		spd.partial[page_nr].offset = loff;
  		spd.partial[page_nr].len = this_len;
  		len -= this_len;
  		loff = 0;
  		spd.nr_pages++;
  		index++;
  	}
708e3508c   Hugh Dickins   tmpfs: clone shme...
1655
1656
  	while (page_nr < nr_pages)
  		page_cache_release(spd.pages[page_nr++]);
708e3508c   Hugh Dickins   tmpfs: clone shme...
1657
1658
1659
  
  	if (spd.nr_pages)
  		error = splice_to_pipe(pipe, &spd);
047fe3605   Eric Dumazet   splice: fix racy ...
1660
  	splice_shrink_spd(&spd);
708e3508c   Hugh Dickins   tmpfs: clone shme...
1661
1662
1663
1664
1665
1666
1667
  
  	if (error > 0) {
  		*ppos += error;
  		file_accessed(in);
  	}
  	return error;
  }
220f2ac91   Hugh Dickins   tmpfs: support SE...
1668
1669
1670
1671
  /*
   * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
   */
  static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
965c8e59c   Andrew Morton   lseek: the "whenc...
1672
  				    pgoff_t index, pgoff_t end, int whence)
220f2ac91   Hugh Dickins   tmpfs: support SE...
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
  {
  	struct page *page;
  	struct pagevec pvec;
  	pgoff_t indices[PAGEVEC_SIZE];
  	bool done = false;
  	int i;
  
  	pagevec_init(&pvec, 0);
  	pvec.nr = 1;		/* start small: we may be there already */
  	while (!done) {
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
1683
  		pvec.nr = find_get_entries(mapping, index,
220f2ac91   Hugh Dickins   tmpfs: support SE...
1684
1685
  					pvec.nr, pvec.pages, indices);
  		if (!pvec.nr) {
965c8e59c   Andrew Morton   lseek: the "whenc...
1686
  			if (whence == SEEK_DATA)
220f2ac91   Hugh Dickins   tmpfs: support SE...
1687
1688
1689
1690
1691
  				index = end;
  			break;
  		}
  		for (i = 0; i < pvec.nr; i++, index++) {
  			if (index < indices[i]) {
965c8e59c   Andrew Morton   lseek: the "whenc...
1692
  				if (whence == SEEK_HOLE) {
220f2ac91   Hugh Dickins   tmpfs: support SE...
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
  					done = true;
  					break;
  				}
  				index = indices[i];
  			}
  			page = pvec.pages[i];
  			if (page && !radix_tree_exceptional_entry(page)) {
  				if (!PageUptodate(page))
  					page = NULL;
  			}
  			if (index >= end ||
965c8e59c   Andrew Morton   lseek: the "whenc...
1704
1705
  			    (page && whence == SEEK_DATA) ||
  			    (!page && whence == SEEK_HOLE)) {
220f2ac91   Hugh Dickins   tmpfs: support SE...
1706
1707
1708
1709
  				done = true;
  				break;
  			}
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
1710
  		pagevec_remove_exceptionals(&pvec);
220f2ac91   Hugh Dickins   tmpfs: support SE...
1711
1712
1713
1714
1715
1716
  		pagevec_release(&pvec);
  		pvec.nr = PAGEVEC_SIZE;
  		cond_resched();
  	}
  	return index;
  }
965c8e59c   Andrew Morton   lseek: the "whenc...
1717
  static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
220f2ac91   Hugh Dickins   tmpfs: support SE...
1718
1719
1720
1721
1722
  {
  	struct address_space *mapping = file->f_mapping;
  	struct inode *inode = mapping->host;
  	pgoff_t start, end;
  	loff_t new_offset;
965c8e59c   Andrew Morton   lseek: the "whenc...
1723
1724
  	if (whence != SEEK_DATA && whence != SEEK_HOLE)
  		return generic_file_llseek_size(file, offset, whence,
220f2ac91   Hugh Dickins   tmpfs: support SE...
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
  					MAX_LFS_FILESIZE, i_size_read(inode));
  	mutex_lock(&inode->i_mutex);
  	/* We're holding i_mutex so we can access i_size directly */
  
  	if (offset < 0)
  		offset = -EINVAL;
  	else if (offset >= inode->i_size)
  		offset = -ENXIO;
  	else {
  		start = offset >> PAGE_CACHE_SHIFT;
  		end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
965c8e59c   Andrew Morton   lseek: the "whenc...
1736
  		new_offset = shmem_seek_hole_data(mapping, start, end, whence);
220f2ac91   Hugh Dickins   tmpfs: support SE...
1737
1738
1739
1740
  		new_offset <<= PAGE_CACHE_SHIFT;
  		if (new_offset > offset) {
  			if (new_offset < inode->i_size)
  				offset = new_offset;
965c8e59c   Andrew Morton   lseek: the "whenc...
1741
  			else if (whence == SEEK_DATA)
220f2ac91   Hugh Dickins   tmpfs: support SE...
1742
1743
1744
1745
1746
  				offset = -ENXIO;
  			else
  				offset = inode->i_size;
  		}
  	}
387aae6fd   Hugh Dickins   tmpfs: fix SEEK_D...
1747
1748
  	if (offset >= 0)
  		offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
220f2ac91   Hugh Dickins   tmpfs: support SE...
1749
1750
1751
  	mutex_unlock(&inode->i_mutex);
  	return offset;
  }
05f65b5c7   David Herrmann   shm: wait for pin...
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
  /*
   * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
   * so reuse a tag which we firmly believe is never set or cleared on shmem.
   */
  #define SHMEM_TAG_PINNED        PAGECACHE_TAG_TOWRITE
  #define LAST_SCAN               4       /* about 150ms max */
  
  static void shmem_tag_pins(struct address_space *mapping)
  {
  	struct radix_tree_iter iter;
  	void **slot;
  	pgoff_t start;
  	struct page *page;
  
  	lru_add_drain();
  	start = 0;
  	rcu_read_lock();
  
  restart:
  	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
  		page = radix_tree_deref_slot(slot);
  		if (!page || radix_tree_exception(page)) {
  			if (radix_tree_deref_retry(page))
  				goto restart;
  		} else if (page_count(page) - page_mapcount(page) > 1) {
  			spin_lock_irq(&mapping->tree_lock);
  			radix_tree_tag_set(&mapping->page_tree, iter.index,
  					   SHMEM_TAG_PINNED);
  			spin_unlock_irq(&mapping->tree_lock);
  		}
  
  		if (need_resched()) {
  			cond_resched_rcu();
  			start = iter.index + 1;
  			goto restart;
  		}
  	}
  	rcu_read_unlock();
  }
  
  /*
   * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
   * via get_user_pages(), drivers might have some pending I/O without any active
   * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
   * and see whether it has an elevated ref-count. If so, we tag them and wait for
   * them to be dropped.
   * The caller must guarantee that no new user will acquire writable references
   * to those pages to avoid races.
   */
40e041a2c   David Herrmann   shm: add sealing API
1801
1802
  static int shmem_wait_for_pins(struct address_space *mapping)
  {
05f65b5c7   David Herrmann   shm: wait for pin...
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
  	struct radix_tree_iter iter;
  	void **slot;
  	pgoff_t start;
  	struct page *page;
  	int error, scan;
  
  	shmem_tag_pins(mapping);
  
  	error = 0;
  	for (scan = 0; scan <= LAST_SCAN; scan++) {
  		if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
  			break;
  
  		if (!scan)
  			lru_add_drain_all();
  		else if (schedule_timeout_killable((HZ << scan) / 200))
  			scan = LAST_SCAN;
  
  		start = 0;
  		rcu_read_lock();
  restart:
  		radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
  					   start, SHMEM_TAG_PINNED) {
  
  			page = radix_tree_deref_slot(slot);
  			if (radix_tree_exception(page)) {
  				if (radix_tree_deref_retry(page))
  					goto restart;
  
  				page = NULL;
  			}
  
  			if (page &&
  			    page_count(page) - page_mapcount(page) != 1) {
  				if (scan < LAST_SCAN)
  					goto continue_resched;
  
  				/*
  				 * On the last scan, we clean up all those tags
  				 * we inserted; but make a note that we still
  				 * found pages pinned.
  				 */
  				error = -EBUSY;
  			}
  
  			spin_lock_irq(&mapping->tree_lock);
  			radix_tree_tag_clear(&mapping->page_tree,
  					     iter.index, SHMEM_TAG_PINNED);
  			spin_unlock_irq(&mapping->tree_lock);
  continue_resched:
  			if (need_resched()) {
  				cond_resched_rcu();
  				start = iter.index + 1;
  				goto restart;
  			}
  		}
  		rcu_read_unlock();
  	}
  
  	return error;
40e041a2c   David Herrmann   shm: add sealing API
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
  }
  
  #define F_ALL_SEALS (F_SEAL_SEAL | \
  		     F_SEAL_SHRINK | \
  		     F_SEAL_GROW | \
  		     F_SEAL_WRITE)
  
  int shmem_add_seals(struct file *file, unsigned int seals)
  {
  	struct inode *inode = file_inode(file);
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	int error;
  
  	/*
  	 * SEALING
  	 * Sealing allows multiple parties to share a shmem-file but restrict
  	 * access to a specific subset of file operations. Seals can only be
  	 * added, but never removed. This way, mutually untrusted parties can
  	 * share common memory regions with a well-defined policy. A malicious
  	 * peer can thus never perform unwanted operations on a shared object.
  	 *
  	 * Seals are only supported on special shmem-files and always affect
  	 * the whole underlying inode. Once a seal is set, it may prevent some
  	 * kinds of access to the file. Currently, the following seals are
  	 * defined:
  	 *   SEAL_SEAL: Prevent further seals from being set on this file
  	 *   SEAL_SHRINK: Prevent the file from shrinking
  	 *   SEAL_GROW: Prevent the file from growing
  	 *   SEAL_WRITE: Prevent write access to the file
  	 *
  	 * As we don't require any trust relationship between two parties, we
  	 * must prevent seals from being removed. Therefore, sealing a file
  	 * only adds a given set of seals to the file, it never touches
  	 * existing seals. Furthermore, the "setting seals"-operation can be
  	 * sealed itself, which basically prevents any further seal from being
  	 * added.
  	 *
  	 * Semantics of sealing are only defined on volatile files. Only
  	 * anonymous shmem files support sealing. More importantly, seals are
  	 * never written to disk. Therefore, there's no plan to support it on
  	 * other file types.
  	 */
  
  	if (file->f_op != &shmem_file_operations)
  		return -EINVAL;
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EPERM;
  	if (seals & ~(unsigned int)F_ALL_SEALS)
  		return -EINVAL;
  
  	mutex_lock(&inode->i_mutex);
  
  	if (info->seals & F_SEAL_SEAL) {
  		error = -EPERM;
  		goto unlock;
  	}
  
  	if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
  		error = mapping_deny_writable(file->f_mapping);
  		if (error)
  			goto unlock;
  
  		error = shmem_wait_for_pins(file->f_mapping);
  		if (error) {
  			mapping_allow_writable(file->f_mapping);
  			goto unlock;
  		}
  	}
  
  	info->seals |= seals;
  	error = 0;
  
  unlock:
  	mutex_unlock(&inode->i_mutex);
  	return error;
  }
  EXPORT_SYMBOL_GPL(shmem_add_seals);
  
  int shmem_get_seals(struct file *file)
  {
  	if (file->f_op != &shmem_file_operations)
  		return -EINVAL;
  
  	return SHMEM_I(file_inode(file))->seals;
  }
  EXPORT_SYMBOL_GPL(shmem_get_seals);
  
  long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
  {
  	long error;
  
  	switch (cmd) {
  	case F_ADD_SEALS:
  		/* disallow upper 32bit */
  		if (arg > UINT_MAX)
  			return -EINVAL;
  
  		error = shmem_add_seals(file, arg);
  		break;
  	case F_GET_SEALS:
  		error = shmem_get_seals(file);
  		break;
  	default:
  		error = -EINVAL;
  		break;
  	}
  
  	return error;
  }
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
1972
1973
1974
  static long shmem_fallocate(struct file *file, int mode, loff_t offset,
  							 loff_t len)
  {
496ad9aa8   Al Viro   new helper: file_...
1975
  	struct inode *inode = file_inode(file);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
1976
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
40e041a2c   David Herrmann   shm: add sealing API
1977
  	struct shmem_inode_info *info = SHMEM_I(inode);
1aac14003   Hugh Dickins   tmpfs: quit when ...
1978
  	struct shmem_falloc shmem_falloc;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
1979
1980
  	pgoff_t start, index, end;
  	int error;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
1981

13ace4d0d   Hugh Dickins   tmpfs: ZERO_RANGE...
1982
1983
  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
  		return -EOPNOTSUPP;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
1984
1985
1986
1987
1988
1989
  	mutex_lock(&inode->i_mutex);
  
  	if (mode & FALLOC_FL_PUNCH_HOLE) {
  		struct address_space *mapping = file->f_mapping;
  		loff_t unmap_start = round_up(offset, PAGE_SIZE);
  		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
8e205f779   Hugh Dickins   shmem: fix faulti...
1990
  		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
1991

40e041a2c   David Herrmann   shm: add sealing API
1992
1993
1994
1995
1996
  		/* protected by i_mutex */
  		if (info->seals & F_SEAL_WRITE) {
  			error = -EPERM;
  			goto out;
  		}
8e205f779   Hugh Dickins   shmem: fix faulti...
1997
  		shmem_falloc.waitq = &shmem_falloc_waitq;
f00cdc6df   Hugh Dickins   shmem: fix faulti...
1998
1999
2000
2001
2002
  		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
  		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
  		spin_lock(&inode->i_lock);
  		inode->i_private = &shmem_falloc;
  		spin_unlock(&inode->i_lock);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2003
2004
2005
2006
2007
  		if ((u64)unmap_end > (u64)unmap_start)
  			unmap_mapping_range(mapping, unmap_start,
  					    1 + unmap_end - unmap_start, 0);
  		shmem_truncate_range(inode, offset, offset + len - 1);
  		/* No need to unmap again: hole-punching leaves COWed pages */
8e205f779   Hugh Dickins   shmem: fix faulti...
2008
2009
2010
2011
2012
  
  		spin_lock(&inode->i_lock);
  		inode->i_private = NULL;
  		wake_up_all(&shmem_falloc_waitq);
  		spin_unlock(&inode->i_lock);
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2013
  		error = 0;
8e205f779   Hugh Dickins   shmem: fix faulti...
2014
  		goto out;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2015
2016
2017
2018
2019
2020
  	}
  
  	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
  	error = inode_newsize_ok(inode, offset + len);
  	if (error)
  		goto out;
40e041a2c   David Herrmann   shm: add sealing API
2021
2022
2023
2024
  	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
  		error = -EPERM;
  		goto out;
  	}
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2025
2026
2027
2028
2029
2030
  	start = offset >> PAGE_CACHE_SHIFT;
  	end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
  	/* Try to avoid a swapstorm if len is impossible to satisfy */
  	if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
  		error = -ENOSPC;
  		goto out;
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2031
  	}
8e205f779   Hugh Dickins   shmem: fix faulti...
2032
  	shmem_falloc.waitq = NULL;
1aac14003   Hugh Dickins   tmpfs: quit when ...
2033
2034
2035
2036
2037
2038
2039
  	shmem_falloc.start = start;
  	shmem_falloc.next  = start;
  	shmem_falloc.nr_falloced = 0;
  	shmem_falloc.nr_unswapped = 0;
  	spin_lock(&inode->i_lock);
  	inode->i_private = &shmem_falloc;
  	spin_unlock(&inode->i_lock);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2040
2041
2042
2043
2044
2045
2046
2047
2048
  	for (index = start; index < end; index++) {
  		struct page *page;
  
  		/*
  		 * Good, the fallocate(2) manpage permits EINTR: we may have
  		 * been interrupted because we are using up too much memory.
  		 */
  		if (signal_pending(current))
  			error = -EINTR;
1aac14003   Hugh Dickins   tmpfs: quit when ...
2049
2050
  		else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
  			error = -ENOMEM;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2051
  		else
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
2052
  			error = shmem_getpage(inode, index, &page, SGP_FALLOC,
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2053
2054
  									NULL);
  		if (error) {
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
2055
2056
2057
2058
  			/* Remove the !PageUptodate pages we added */
  			shmem_undo_range(inode,
  				(loff_t)start << PAGE_CACHE_SHIFT,
  				(loff_t)index << PAGE_CACHE_SHIFT, true);
1aac14003   Hugh Dickins   tmpfs: quit when ...
2059
  			goto undone;
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2060
  		}
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2061
  		/*
1aac14003   Hugh Dickins   tmpfs: quit when ...
2062
2063
2064
2065
2066
2067
2068
2069
  		 * Inform shmem_writepage() how far we have reached.
  		 * No need for lock or barrier: we have the page lock.
  		 */
  		shmem_falloc.next++;
  		if (!PageUptodate(page))
  			shmem_falloc.nr_falloced++;
  
  		/*
1635f6a74   Hugh Dickins   tmpfs: undo fallo...
2070
2071
2072
  		 * If !PageUptodate, leave it that way so that freeable pages
  		 * can be recognized if we need to rollback on error later.
  		 * But set_page_dirty so that memory pressure will swap rather
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
  		 * than free the pages we are allocating (and SGP_CACHE pages
  		 * might still be clean: we now need to mark those dirty too).
  		 */
  		set_page_dirty(page);
  		unlock_page(page);
  		page_cache_release(page);
  		cond_resched();
  	}
  
  	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
  		i_size_write(inode, offset + len);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2084
  	inode->i_ctime = CURRENT_TIME;
1aac14003   Hugh Dickins   tmpfs: quit when ...
2085
2086
2087
2088
  undone:
  	spin_lock(&inode->i_lock);
  	inode->i_private = NULL;
  	spin_unlock(&inode->i_lock);
e2d12e22c   Hugh Dickins   tmpfs: support fa...
2089
  out:
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2090
2091
2092
  	mutex_unlock(&inode->i_mutex);
  	return error;
  }
726c33422   David Howells   [PATCH] VFS: Perm...
2093
  static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2094
  {
726c33422   David Howells   [PATCH] VFS: Perm...
2095
  	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2096
2097
2098
2099
  
  	buf->f_type = TMPFS_MAGIC;
  	buf->f_bsize = PAGE_CACHE_SIZE;
  	buf->f_namelen = NAME_MAX;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2100
  	if (sbinfo->max_blocks) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2101
  		buf->f_blocks = sbinfo->max_blocks;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2102
2103
2104
  		buf->f_bavail =
  		buf->f_bfree  = sbinfo->max_blocks -
  				percpu_counter_sum(&sbinfo->used_blocks);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2105
2106
  	}
  	if (sbinfo->max_inodes) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2107
2108
  		buf->f_files = sbinfo->max_inodes;
  		buf->f_ffree = sbinfo->free_inodes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2109
2110
2111
2112
2113
2114
2115
2116
2117
  	}
  	/* else leave those fields 0 like simple_statfs */
  	return 0;
  }
  
  /*
   * File creation. Allocate an inode, and we're done..
   */
  static int
1a67aafb5   Al Viro   switch ->mknod() ...
2118
  shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2119
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
2120
  	struct inode *inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2121
  	int error = -ENOSPC;
454abafe9   Dmitry Monakhov   ramfs: replace in...
2122
  	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2123
  	if (inode) {
feda821e7   Christoph Hellwig   fs: remove generi...
2124
2125
2126
  		error = simple_acl_create(dir, inode);
  		if (error)
  			goto out_iput;
2a7dba391   Eric Paris   fs/vfs/security: ...
2127
  		error = security_inode_init_security(inode, dir,
9d8f13ba3   Mimi Zohar   security: new sec...
2128
  						     &dentry->d_name,
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2129
  						     shmem_initxattrs, NULL);
feda821e7   Christoph Hellwig   fs: remove generi...
2130
2131
  		if (error && error != -EOPNOTSUPP)
  			goto out_iput;
37ec43cdc   Mimi Zohar   evm: calculate HM...
2132

718deb6b6   Al Viro   Fix breakage in s...
2133
  		error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2134
2135
2136
2137
  		dir->i_size += BOGO_DIRENT_SIZE;
  		dir->i_ctime = dir->i_mtime = CURRENT_TIME;
  		d_instantiate(dentry, inode);
  		dget(dentry); /* Extra count - pin the dentry in core */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2138
2139
  	}
  	return error;
feda821e7   Christoph Hellwig   fs: remove generi...
2140
2141
2142
  out_iput:
  	iput(inode);
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2143
  }
60545d0d4   Al Viro   [O_TMPFILE] it's ...
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
  static int
  shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
  {
  	struct inode *inode;
  	int error = -ENOSPC;
  
  	inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
  	if (inode) {
  		error = security_inode_init_security(inode, dir,
  						     NULL,
  						     shmem_initxattrs, NULL);
feda821e7   Christoph Hellwig   fs: remove generi...
2155
2156
2157
2158
2159
  		if (error && error != -EOPNOTSUPP)
  			goto out_iput;
  		error = simple_acl_create(dir, inode);
  		if (error)
  			goto out_iput;
60545d0d4   Al Viro   [O_TMPFILE] it's ...
2160
2161
2162
  		d_tmpfile(dentry, inode);
  	}
  	return error;
feda821e7   Christoph Hellwig   fs: remove generi...
2163
2164
2165
  out_iput:
  	iput(inode);
  	return error;
60545d0d4   Al Viro   [O_TMPFILE] it's ...
2166
  }
18bb1db3e   Al Viro   switch vfs_mkdir(...
2167
  static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2168
2169
2170
2171
2172
  {
  	int error;
  
  	if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
  		return error;
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2173
  	inc_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2174
2175
  	return 0;
  }
4acdaf27e   Al Viro   switch ->create()...
2176
  static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ebfc3b49a   Al Viro   don't pass nameid...
2177
  		bool excl)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2178
2179
2180
2181
2182
2183
2184
2185
2186
  {
  	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
  }
  
  /*
   * Link a file..
   */
  static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2187
  	struct inode *inode = d_inode(old_dentry);
5b04c6890   Pavel Emelyanov   shmem: factor out...
2188
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2189
2190
2191
2192
2193
2194
  
  	/*
  	 * No ordinary (disk based) filesystem counts links as inodes;
  	 * but each new link needs a new dentry, pinning lowmem, and
  	 * tmpfs dentries cannot be pruned until they are unlinked.
  	 */
5b04c6890   Pavel Emelyanov   shmem: factor out...
2195
2196
2197
  	ret = shmem_reserve_inode(inode->i_sb);
  	if (ret)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2198
2199
2200
  
  	dir->i_size += BOGO_DIRENT_SIZE;
  	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2201
  	inc_nlink(inode);
7de9c6ee3   Al Viro   new helper: ihold()
2202
  	ihold(inode);	/* New dentry reference */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2203
2204
  	dget(dentry);		/* Extra pinning count for the created dentry */
  	d_instantiate(dentry, inode);
5b04c6890   Pavel Emelyanov   shmem: factor out...
2205
2206
  out:
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2207
2208
2209
2210
  }
  
  static int shmem_unlink(struct inode *dir, struct dentry *dentry)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2211
  	struct inode *inode = d_inode(dentry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2212

5b04c6890   Pavel Emelyanov   shmem: factor out...
2213
2214
  	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
  		shmem_free_inode(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2215
2216
2217
  
  	dir->i_size -= BOGO_DIRENT_SIZE;
  	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2218
  	drop_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2219
2220
2221
2222
2223
2224
2225
2226
  	dput(dentry);	/* Undo the count from "create" - this does all the work */
  	return 0;
  }
  
  static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
  {
  	if (!simple_empty(dentry))
  		return -ENOTEMPTY;
75c3cfa85   David Howells   VFS: assorted wei...
2227
  	drop_nlink(d_inode(dentry));
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2228
  	drop_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2229
2230
  	return shmem_unlink(dir, dentry);
  }
37456771c   Miklos Szeredi   shmem: support RE...
2231
2232
  static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
  {
e36cb0b89   David Howells   VFS: (Scripted) C...
2233
2234
  	bool old_is_dir = d_is_dir(old_dentry);
  	bool new_is_dir = d_is_dir(new_dentry);
37456771c   Miklos Szeredi   shmem: support RE...
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
  
  	if (old_dir != new_dir && old_is_dir != new_is_dir) {
  		if (old_is_dir) {
  			drop_nlink(old_dir);
  			inc_nlink(new_dir);
  		} else {
  			drop_nlink(new_dir);
  			inc_nlink(old_dir);
  		}
  	}
  	old_dir->i_ctime = old_dir->i_mtime =
  	new_dir->i_ctime = new_dir->i_mtime =
75c3cfa85   David Howells   VFS: assorted wei...
2247
2248
  	d_inode(old_dentry)->i_ctime =
  	d_inode(new_dentry)->i_ctime = CURRENT_TIME;
37456771c   Miklos Szeredi   shmem: support RE...
2249
2250
2251
  
  	return 0;
  }
46fdb794e   Miklos Szeredi   shmem: support RE...
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
  static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
  {
  	struct dentry *whiteout;
  	int error;
  
  	whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
  	if (!whiteout)
  		return -ENOMEM;
  
  	error = shmem_mknod(old_dir, whiteout,
  			    S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
  	dput(whiteout);
  	if (error)
  		return error;
  
  	/*
  	 * Cheat and hash the whiteout while the old dentry is still in
  	 * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
  	 *
  	 * d_lookup() will consistently find one of them at this point,
  	 * not sure which one, but that isn't even important.
  	 */
  	d_rehash(whiteout);
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2277
2278
2279
2280
2281
2282
  /*
   * The VFS layer already does all the dentry stuff for rename,
   * we just have to decrement the usage count for the target if
   * it exists so that the VFS layer correctly free's it when it
   * gets overwritten.
   */
3b69ff51d   Miklos Szeredi   shmem: support RE...
2283
  static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2284
  {
75c3cfa85   David Howells   VFS: assorted wei...
2285
  	struct inode *inode = d_inode(old_dentry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2286
  	int they_are_dirs = S_ISDIR(inode->i_mode);
46fdb794e   Miklos Szeredi   shmem: support RE...
2287
  	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
3b69ff51d   Miklos Szeredi   shmem: support RE...
2288
  		return -EINVAL;
37456771c   Miklos Szeredi   shmem: support RE...
2289
2290
  	if (flags & RENAME_EXCHANGE)
  		return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2291
2292
  	if (!simple_empty(new_dentry))
  		return -ENOTEMPTY;
46fdb794e   Miklos Szeredi   shmem: support RE...
2293
2294
2295
2296
2297
2298
2299
  	if (flags & RENAME_WHITEOUT) {
  		int error;
  
  		error = shmem_whiteout(old_dir, old_dentry);
  		if (error)
  			return error;
  	}
75c3cfa85   David Howells   VFS: assorted wei...
2300
  	if (d_really_is_positive(new_dentry)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2301
  		(void) shmem_unlink(new_dir, new_dentry);
b928095b0   Miklos Szeredi   shmem: fix nlink ...
2302
  		if (they_are_dirs) {
75c3cfa85   David Howells   VFS: assorted wei...
2303
  			drop_nlink(d_inode(new_dentry));
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2304
  			drop_nlink(old_dir);
b928095b0   Miklos Szeredi   shmem: fix nlink ...
2305
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2306
  	} else if (they_are_dirs) {
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
2307
  		drop_nlink(old_dir);
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
2308
  		inc_nlink(new_dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
  	}
  
  	old_dir->i_size -= BOGO_DIRENT_SIZE;
  	new_dir->i_size += BOGO_DIRENT_SIZE;
  	old_dir->i_ctime = old_dir->i_mtime =
  	new_dir->i_ctime = new_dir->i_mtime =
  	inode->i_ctime = CURRENT_TIME;
  	return 0;
  }
  
  static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
  {
  	int error;
  	int len;
  	struct inode *inode;
9276aad6c   Hugh Dickins   tmpfs: remove_shm...
2324
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2325
2326
2327
2328
2329
2330
  	char *kaddr;
  	struct shmem_inode_info *info;
  
  	len = strlen(symname) + 1;
  	if (len > PAGE_CACHE_SIZE)
  		return -ENAMETOOLONG;
454abafe9   Dmitry Monakhov   ramfs: replace in...
2331
  	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2332
2333
  	if (!inode)
  		return -ENOSPC;
9d8f13ba3   Mimi Zohar   security: new sec...
2334
  	error = security_inode_init_security(inode, dir, &dentry->d_name,
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2335
  					     shmem_initxattrs, NULL);
570bc1c2e   Stephen Smalley   [PATCH] tmpfs: En...
2336
2337
2338
2339
2340
2341
2342
  	if (error) {
  		if (error != -EOPNOTSUPP) {
  			iput(inode);
  			return error;
  		}
  		error = 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2343
2344
  	info = SHMEM_I(inode);
  	inode->i_size = len-1;
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2345
2346
2347
2348
2349
2350
2351
  	if (len <= SHORT_SYMLINK_LEN) {
  		info->symlink = kmemdup(symname, len, GFP_KERNEL);
  		if (!info->symlink) {
  			iput(inode);
  			return -ENOMEM;
  		}
  		inode->i_op = &shmem_short_symlink_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2352
2353
2354
2355
2356
2357
  	} else {
  		error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
  		if (error) {
  			iput(inode);
  			return error;
  		}
14fcc23fd   Hugh Dickins   tmpfs: fix kernel...
2358
  		inode->i_mapping->a_ops = &shmem_aops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2359
  		inode->i_op = &shmem_symlink_inode_operations;
9b04c5fec   Cong Wang   mm: remove the se...
2360
  		kaddr = kmap_atomic(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2361
  		memcpy(kaddr, symname, len);
9b04c5fec   Cong Wang   mm: remove the se...
2362
  		kunmap_atomic(kaddr);
ec9516fbc   Hugh Dickins   tmpfs: optimize c...
2363
  		SetPageUptodate(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2364
  		set_page_dirty(page);
6746aff74   Wu Fengguang   HWPOISON: shmem: ...
2365
  		unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2366
2367
  		page_cache_release(page);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2368
2369
2370
2371
2372
2373
  	dir->i_size += BOGO_DIRENT_SIZE;
  	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
  	d_instantiate(dentry, inode);
  	dget(dentry);
  	return 0;
  }
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2374
  static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2375
  {
75c3cfa85   David Howells   VFS: assorted wei...
2376
  	nd_set_link(nd, SHMEM_I(d_inode(dentry))->symlink);
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
2377
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2378
  }
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
2379
  static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2380
2381
  {
  	struct page *page = NULL;
75c3cfa85   David Howells   VFS: assorted wei...
2382
  	int error = shmem_getpage(d_inode(dentry), 0, &page, SGP_READ, NULL);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2383
  	nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
d3602444e   Hugh Dickins   shmem_getpage ret...
2384
2385
  	if (page)
  		unlock_page(page);
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
2386
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2387
  }
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
2388
  static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2389
2390
  {
  	if (!IS_ERR(nd_get_link(nd))) {
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
2391
  		struct page *page = cookie;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2392
2393
2394
  		kunmap(page);
  		mark_page_accessed(page);
  		page_cache_release(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2395
2396
  	}
  }
b09e0fa4b   Eric Paris   tmpfs: implement ...
2397
  #ifdef CONFIG_TMPFS_XATTR
467118102   Randy Dunlap   mm/shmem and tiny...
2398
  /*
b09e0fa4b   Eric Paris   tmpfs: implement ...
2399
2400
   * Superblocks without xattr inode operations may get some security.* xattr
   * support from the LSM "for free". As soon as we have any other xattrs
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2401
2402
2403
   * like ACLs, we also need to implement the security.* handlers at
   * filesystem level, though.
   */
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2404
  /*
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2405
2406
2407
2408
2409
2410
2411
2412
   * Callback for security_inode_init_security() for acquiring xattrs.
   */
  static int shmem_initxattrs(struct inode *inode,
  			    const struct xattr *xattr_array,
  			    void *fs_info)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	const struct xattr *xattr;
38f386574   Aristeu Rozanski   xattr: extract si...
2413
  	struct simple_xattr *new_xattr;
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2414
2415
2416
  	size_t len;
  
  	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
38f386574   Aristeu Rozanski   xattr: extract si...
2417
  		new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
  		if (!new_xattr)
  			return -ENOMEM;
  
  		len = strlen(xattr->name) + 1;
  		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
  					  GFP_KERNEL);
  		if (!new_xattr->name) {
  			kfree(new_xattr);
  			return -ENOMEM;
  		}
  
  		memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
  		       XATTR_SECURITY_PREFIX_LEN);
  		memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
  		       xattr->name, len);
38f386574   Aristeu Rozanski   xattr: extract si...
2433
  		simple_xattr_list_add(&info->xattrs, new_xattr);
6d9d88d07   Jarkko Sakkinen   tmpfs: security x...
2434
2435
2436
2437
  	}
  
  	return 0;
  }
bb4354538   Stephen Hemminger   fs: xattr_handler...
2438
  static const struct xattr_handler *shmem_xattr_handlers[] = {
b09e0fa4b   Eric Paris   tmpfs: implement ...
2439
  #ifdef CONFIG_TMPFS_POSIX_ACL
feda821e7   Christoph Hellwig   fs: remove generi...
2440
2441
  	&posix_acl_access_xattr_handler,
  	&posix_acl_default_xattr_handler,
b09e0fa4b   Eric Paris   tmpfs: implement ...
2442
  #endif
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2443
2444
  	NULL
  };
b09e0fa4b   Eric Paris   tmpfs: implement ...
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
  
  static int shmem_xattr_validate(const char *name)
  {
  	struct { const char *prefix; size_t len; } arr[] = {
  		{ XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
  		{ XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
  	};
  	int i;
  
  	for (i = 0; i < ARRAY_SIZE(arr); i++) {
  		size_t preflen = arr[i].len;
  		if (strncmp(name, arr[i].prefix, preflen) == 0) {
  			if (!name[preflen])
  				return -EINVAL;
  			return 0;
  		}
  	}
  	return -EOPNOTSUPP;
  }
  
  static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
  			      void *buffer, size_t size)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2468
  	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
b09e0fa4b   Eric Paris   tmpfs: implement ...
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
  	int err;
  
  	/*
  	 * If this is a request for a synthetic attribute in the system.*
  	 * namespace use the generic infrastructure to resolve a handler
  	 * for it via sb->s_xattr.
  	 */
  	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
  		return generic_getxattr(dentry, name, buffer, size);
  
  	err = shmem_xattr_validate(name);
  	if (err)
  		return err;
38f386574   Aristeu Rozanski   xattr: extract si...
2482
  	return simple_xattr_get(&info->xattrs, name, buffer, size);
b09e0fa4b   Eric Paris   tmpfs: implement ...
2483
2484
2485
2486
2487
  }
  
  static int shmem_setxattr(struct dentry *dentry, const char *name,
  			  const void *value, size_t size, int flags)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2488
  	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
b09e0fa4b   Eric Paris   tmpfs: implement ...
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
  	int err;
  
  	/*
  	 * If this is a request for a synthetic attribute in the system.*
  	 * namespace use the generic infrastructure to resolve a handler
  	 * for it via sb->s_xattr.
  	 */
  	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
  		return generic_setxattr(dentry, name, value, size, flags);
  
  	err = shmem_xattr_validate(name);
  	if (err)
  		return err;
38f386574   Aristeu Rozanski   xattr: extract si...
2502
  	return simple_xattr_set(&info->xattrs, name, value, size, flags);
b09e0fa4b   Eric Paris   tmpfs: implement ...
2503
2504
2505
2506
  }
  
  static int shmem_removexattr(struct dentry *dentry, const char *name)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2507
  	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
b09e0fa4b   Eric Paris   tmpfs: implement ...
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
  	int err;
  
  	/*
  	 * If this is a request for a synthetic attribute in the system.*
  	 * namespace use the generic infrastructure to resolve a handler
  	 * for it via sb->s_xattr.
  	 */
  	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
  		return generic_removexattr(dentry, name);
  
  	err = shmem_xattr_validate(name);
  	if (err)
  		return err;
38f386574   Aristeu Rozanski   xattr: extract si...
2521
  	return simple_xattr_remove(&info->xattrs, name);
b09e0fa4b   Eric Paris   tmpfs: implement ...
2522
2523
2524
2525
  }
  
  static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
75c3cfa85   David Howells   VFS: assorted wei...
2526
  	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
38f386574   Aristeu Rozanski   xattr: extract si...
2527
  	return simple_xattr_list(&info->xattrs, buffer, size);
b09e0fa4b   Eric Paris   tmpfs: implement ...
2528
2529
  }
  #endif /* CONFIG_TMPFS_XATTR */
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2530
  static const struct inode_operations shmem_short_symlink_operations = {
b09e0fa4b   Eric Paris   tmpfs: implement ...
2531
  	.readlink	= generic_readlink,
69f07ec93   Hugh Dickins   tmpfs: use kmemdu...
2532
  	.follow_link	= shmem_follow_short_symlink,
b09e0fa4b   Eric Paris   tmpfs: implement ...
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
  #ifdef CONFIG_TMPFS_XATTR
  	.setxattr	= shmem_setxattr,
  	.getxattr	= shmem_getxattr,
  	.listxattr	= shmem_listxattr,
  	.removexattr	= shmem_removexattr,
  #endif
  };
  
  static const struct inode_operations shmem_symlink_inode_operations = {
  	.readlink	= generic_readlink,
  	.follow_link	= shmem_follow_link,
  	.put_link	= shmem_put_link,
  #ifdef CONFIG_TMPFS_XATTR
  	.setxattr	= shmem_setxattr,
  	.getxattr	= shmem_getxattr,
  	.listxattr	= shmem_listxattr,
  	.removexattr	= shmem_removexattr,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2550
  #endif
b09e0fa4b   Eric Paris   tmpfs: implement ...
2551
  };
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2552

91828a405   David M. Grimes   [PATCH] knfsd: ad...
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
  static struct dentry *shmem_get_parent(struct dentry *child)
  {
  	return ERR_PTR(-ESTALE);
  }
  
  static int shmem_match(struct inode *ino, void *vfh)
  {
  	__u32 *fh = vfh;
  	__u64 inum = fh[2];
  	inum = (inum << 32) | fh[1];
  	return ino->i_ino == inum && fh[0] == ino->i_generation;
  }
480b116c9   Christoph Hellwig   shmem: new export...
2565
2566
  static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
  		struct fid *fid, int fh_len, int fh_type)
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2567
  {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2568
  	struct inode *inode;
480b116c9   Christoph Hellwig   shmem: new export...
2569
  	struct dentry *dentry = NULL;
35c2a7f49   Hugh Dickins   tmpfs,ceph,gfs2,i...
2570
  	u64 inum;
480b116c9   Christoph Hellwig   shmem: new export...
2571
2572
2573
  
  	if (fh_len < 3)
  		return NULL;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2574

35c2a7f49   Hugh Dickins   tmpfs,ceph,gfs2,i...
2575
2576
  	inum = fid->raw[2];
  	inum = (inum << 32) | fid->raw[1];
480b116c9   Christoph Hellwig   shmem: new export...
2577
2578
  	inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
  			shmem_match, fid->raw);
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2579
  	if (inode) {
480b116c9   Christoph Hellwig   shmem: new export...
2580
  		dentry = d_find_alias(inode);
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2581
2582
  		iput(inode);
  	}
480b116c9   Christoph Hellwig   shmem: new export...
2583
  	return dentry;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2584
  }
b0b0382bb   Al Viro   ->encode_fh() API...
2585
2586
  static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
  				struct inode *parent)
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2587
  {
5fe0c2378   Aneesh Kumar K.V   exportfs: Return ...
2588
2589
  	if (*len < 3) {
  		*len = 3;
94e07a759   Namjae Jeon   fs: encode_fh: re...
2590
  		return FILEID_INVALID;
5fe0c2378   Aneesh Kumar K.V   exportfs: Return ...
2591
  	}
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2592

1d3382cbf   Al Viro   new helper: inode...
2593
  	if (inode_unhashed(inode)) {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2594
2595
2596
2597
2598
2599
2600
  		/* Unfortunately insert_inode_hash is not idempotent,
  		 * so as we hash inodes here rather than at creation
  		 * time, we need a lock to ensure we only try
  		 * to do it once
  		 */
  		static DEFINE_SPINLOCK(lock);
  		spin_lock(&lock);
1d3382cbf   Al Viro   new helper: inode...
2601
  		if (inode_unhashed(inode))
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
  			__insert_inode_hash(inode,
  					    inode->i_ino + inode->i_generation);
  		spin_unlock(&lock);
  	}
  
  	fh[0] = inode->i_generation;
  	fh[1] = inode->i_ino;
  	fh[2] = ((__u64)inode->i_ino) >> 32;
  
  	*len = 3;
  	return 1;
  }
396551644   Christoph Hellwig   exportfs: make st...
2614
  static const struct export_operations shmem_export_ops = {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2615
  	.get_parent     = shmem_get_parent,
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2616
  	.encode_fh      = shmem_encode_fh,
480b116c9   Christoph Hellwig   shmem: new export...
2617
  	.fh_to_dentry	= shmem_fh_to_dentry,
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2618
  };
680d794ba   akpm@linux-foundation.org   mount options: fi...
2619
2620
  static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
  			       bool remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2621
2622
  {
  	char *this_char, *value, *rest;
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
2623
  	struct mempolicy *mpol = NULL;
8751e0395   Eric W. Biederman   userns: Convert t...
2624
2625
  	uid_t uid;
  	gid_t gid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2626

b00dc3ad7   Hugh Dickins   [PATCH] tmpfs: fi...
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
  	while (options != NULL) {
  		this_char = options;
  		for (;;) {
  			/*
  			 * NUL-terminate this option: unfortunately,
  			 * mount options form a comma-separated list,
  			 * but mpol's nodelist may also contain commas.
  			 */
  			options = strchr(options, ',');
  			if (options == NULL)
  				break;
  			options++;
  			if (!isdigit(*options)) {
  				options[-1] = '\0';
  				break;
  			}
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2644
2645
2646
2647
2648
2649
2650
2651
2652
  		if (!*this_char)
  			continue;
  		if ((value = strchr(this_char,'=')) != NULL) {
  			*value++ = 0;
  		} else {
  			printk(KERN_ERR
  			    "tmpfs: No value for mount option '%s'
  ",
  			    this_char);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
2653
  			goto error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
  		}
  
  		if (!strcmp(this_char,"size")) {
  			unsigned long long size;
  			size = memparse(value,&rest);
  			if (*rest == '%') {
  				size <<= PAGE_SHIFT;
  				size *= totalram_pages;
  				do_div(size, 100);
  				rest++;
  			}
  			if (*rest)
  				goto bad_val;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2667
2668
  			sbinfo->max_blocks =
  				DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2669
  		} else if (!strcmp(this_char,"nr_blocks")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2670
  			sbinfo->max_blocks = memparse(value, &rest);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2671
2672
2673
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"nr_inodes")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2674
  			sbinfo->max_inodes = memparse(value, &rest);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2675
2676
2677
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"mode")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2678
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2679
  				continue;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2680
  			sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2681
2682
2683
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"uid")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2684
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2685
  				continue;
8751e0395   Eric W. Biederman   userns: Convert t...
2686
  			uid = simple_strtoul(value, &rest, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2687
2688
  			if (*rest)
  				goto bad_val;
8751e0395   Eric W. Biederman   userns: Convert t...
2689
2690
2691
  			sbinfo->uid = make_kuid(current_user_ns(), uid);
  			if (!uid_valid(sbinfo->uid))
  				goto bad_val;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2692
  		} else if (!strcmp(this_char,"gid")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2693
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2694
  				continue;
8751e0395   Eric W. Biederman   userns: Convert t...
2695
  			gid = simple_strtoul(value, &rest, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2696
2697
  			if (*rest)
  				goto bad_val;
8751e0395   Eric W. Biederman   userns: Convert t...
2698
2699
2700
  			sbinfo->gid = make_kgid(current_user_ns(), gid);
  			if (!gid_valid(sbinfo->gid))
  				goto bad_val;
7339ff830   Robin Holt   [PATCH] Add tmpfs...
2701
  		} else if (!strcmp(this_char,"mpol")) {
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
2702
2703
2704
  			mpol_put(mpol);
  			mpol = NULL;
  			if (mpol_parse_str(value, &mpol))
7339ff830   Robin Holt   [PATCH] Add tmpfs...
2705
  				goto bad_val;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2706
2707
2708
2709
  		} else {
  			printk(KERN_ERR "tmpfs: Bad mount option %s
  ",
  			       this_char);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
2710
  			goto error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2711
2712
  		}
  	}
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
2713
  	sbinfo->mpol = mpol;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2714
2715
2716
2717
2718
2719
  	return 0;
  
  bad_val:
  	printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'
  ",
  	       value, this_char);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
2720
2721
  error:
  	mpol_put(mpol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2722
2723
2724
2725
2726
2727
2728
  	return 1;
  
  }
  
  static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
680d794ba   akpm@linux-foundation.org   mount options: fi...
2729
  	struct shmem_sb_info config = *sbinfo;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2730
2731
  	unsigned long inodes;
  	int error = -EINVAL;
5f00110f7   Greg Thelen   tmpfs: fix use-af...
2732
  	config.mpol = NULL;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2733
  	if (shmem_parse_options(data, &config, true))
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2734
  		return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2735

0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2736
  	spin_lock(&sbinfo->stat_lock);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2737
  	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
7e496299d   Tim Chen   tmpfs: make tmpfs...
2738
  	if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2739
  		goto out;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2740
  	if (config.max_inodes < inodes)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2741
2742
  		goto out;
  	/*
54af60421   Hugh Dickins   tmpfs: convert sh...
2743
  	 * Those tests disallow limited->unlimited while any are in use;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2744
2745
2746
  	 * but we must separately disallow unlimited->limited, because
  	 * in that case we have no record of how much is already in use.
  	 */
680d794ba   akpm@linux-foundation.org   mount options: fi...
2747
  	if (config.max_blocks && !sbinfo->max_blocks)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2748
  		goto out;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2749
  	if (config.max_inodes && !sbinfo->max_inodes)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2750
2751
2752
  		goto out;
  
  	error = 0;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2753
  	sbinfo->max_blocks  = config.max_blocks;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2754
2755
  	sbinfo->max_inodes  = config.max_inodes;
  	sbinfo->free_inodes = config.max_inodes - inodes;
71fe804b6   Lee Schermerhorn   mempolicy: use st...
2756

5f00110f7   Greg Thelen   tmpfs: fix use-af...
2757
2758
2759
2760
2761
2762
2763
  	/*
  	 * Preserve previous mempolicy unless mpol remount option was specified.
  	 */
  	if (config.mpol) {
  		mpol_put(sbinfo->mpol);
  		sbinfo->mpol = config.mpol;	/* transfers initial ref */
  	}
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2764
2765
2766
  out:
  	spin_unlock(&sbinfo->stat_lock);
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2767
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
2768

34c80b1d9   Al Viro   vfs: switch ->sho...
2769
  static int shmem_show_options(struct seq_file *seq, struct dentry *root)
680d794ba   akpm@linux-foundation.org   mount options: fi...
2770
  {
34c80b1d9   Al Viro   vfs: switch ->sho...
2771
  	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
680d794ba   akpm@linux-foundation.org   mount options: fi...
2772
2773
2774
2775
2776
2777
2778
  
  	if (sbinfo->max_blocks != shmem_default_max_blocks())
  		seq_printf(seq, ",size=%luk",
  			sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
  	if (sbinfo->max_inodes != shmem_default_max_inodes())
  		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
  	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
09208d150   Al Viro   shmem, ramfs: pro...
2779
  		seq_printf(seq, ",mode=%03ho", sbinfo->mode);
8751e0395   Eric W. Biederman   userns: Convert t...
2780
2781
2782
2783
2784
2785
  	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
  		seq_printf(seq, ",uid=%u",
  				from_kuid_munged(&init_user_ns, sbinfo->uid));
  	if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
  		seq_printf(seq, ",gid=%u",
  				from_kgid_munged(&init_user_ns, sbinfo->gid));
71fe804b6   Lee Schermerhorn   mempolicy: use st...
2786
  	shmem_show_mpol(seq, sbinfo->mpol);
680d794ba   akpm@linux-foundation.org   mount options: fi...
2787
2788
  	return 0;
  }
9183df25f   David Herrmann   shm: add memfd_cr...
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
  
  #define MFD_NAME_PREFIX "memfd:"
  #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
  #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
  
  #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
  
  SYSCALL_DEFINE2(memfd_create,
  		const char __user *, uname,
  		unsigned int, flags)
  {
  	struct shmem_inode_info *info;
  	struct file *file;
  	int fd, error;
  	char *name;
  	long len;
  
  	if (flags & ~(unsigned int)MFD_ALL_FLAGS)
  		return -EINVAL;
  
  	/* length includes terminating zero */
  	len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
  	if (len <= 0)
  		return -EFAULT;
  	if (len > MFD_NAME_MAX_LEN + 1)
  		return -EINVAL;
  
  	name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
  	if (!name)
  		return -ENOMEM;
  
  	strcpy(name, MFD_NAME_PREFIX);
  	if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
  		error = -EFAULT;
  		goto err_name;
  	}
  
  	/* terminating-zero may have changed after strnlen_user() returned */
  	if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
  		error = -EFAULT;
  		goto err_name;
  	}
  
  	fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
  	if (fd < 0) {
  		error = fd;
  		goto err_name;
  	}
  
  	file = shmem_file_setup(name, 0, VM_NORESERVE);
  	if (IS_ERR(file)) {
  		error = PTR_ERR(file);
  		goto err_fd;
  	}
  	info = SHMEM_I(file_inode(file));
  	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
  	file->f_flags |= O_RDWR | O_LARGEFILE;
  	if (flags & MFD_ALLOW_SEALING)
  		info->seals &= ~F_SEAL_SEAL;
  
  	fd_install(fd, file);
  	kfree(name);
  	return fd;
  
  err_fd:
  	put_unused_fd(fd);
  err_name:
  	kfree(name);
  	return error;
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
2859
  #endif /* CONFIG_TMPFS */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2860
2861
2862
  
  static void shmem_put_super(struct super_block *sb)
  {
602586a83   Hugh Dickins   shmem: put_super ...
2863
2864
2865
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  
  	percpu_counter_destroy(&sbinfo->used_blocks);
49cd0a5c2   Greg Thelen   tmpfs: fix mempol...
2866
  	mpol_put(sbinfo->mpol);
602586a83   Hugh Dickins   shmem: put_super ...
2867
  	kfree(sbinfo);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2868
2869
  	sb->s_fs_info = NULL;
  }
2b2af54a5   Kay Sievers   Driver Core: devt...
2870
  int shmem_fill_super(struct super_block *sb, void *data, int silent)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2871
2872
  {
  	struct inode *inode;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2873
  	struct shmem_sb_info *sbinfo;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2874
2875
2876
  	int err = -ENOMEM;
  
  	/* Round up to L1_CACHE_BYTES to resist false sharing */
425fbf047   Pekka Enberg   shmem: initialize...
2877
  	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
680d794ba   akpm@linux-foundation.org   mount options: fi...
2878
2879
2880
  				L1_CACHE_BYTES), GFP_KERNEL);
  	if (!sbinfo)
  		return -ENOMEM;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2881
  	sbinfo->mode = S_IRWXUGO | S_ISVTX;
76aac0e9a   David Howells   CRED: Wrap task c...
2882
2883
  	sbinfo->uid = current_fsuid();
  	sbinfo->gid = current_fsgid();
680d794ba   akpm@linux-foundation.org   mount options: fi...
2884
  	sb->s_fs_info = sbinfo;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2885

0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2886
  #ifdef CONFIG_TMPFS
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2887
2888
2889
2890
2891
  	/*
  	 * Per default we only allow half of the physical ram per
  	 * tmpfs instance, limiting inodes to one per page of lowmem;
  	 * but the internal instance is left unlimited.
  	 */
ca4e05195   Al Viro   shm_mnt is as lon...
2892
  	if (!(sb->s_flags & MS_KERNMOUNT)) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2893
2894
2895
2896
2897
2898
  		sbinfo->max_blocks = shmem_default_max_blocks();
  		sbinfo->max_inodes = shmem_default_max_inodes();
  		if (shmem_parse_options(data, sbinfo, false)) {
  			err = -EINVAL;
  			goto failed;
  		}
ca4e05195   Al Viro   shm_mnt is as lon...
2899
2900
  	} else {
  		sb->s_flags |= MS_NOUSER;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2901
  	}
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2902
  	sb->s_export_op = &shmem_export_ops;
2f6e38f3c   Hugh Dickins   tmpfs: enable NOS...
2903
  	sb->s_flags |= MS_NOSEC;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2904
2905
2906
  #else
  	sb->s_flags |= MS_NOUSER;
  #endif
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2907
  	spin_lock_init(&sbinfo->stat_lock);
908c7f194   Tejun Heo   percpu_counter: a...
2908
  	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
602586a83   Hugh Dickins   shmem: put_super ...
2909
  		goto failed;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2910
  	sbinfo->free_inodes = sbinfo->max_inodes;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2911

285b2c4fd   Hugh Dickins   tmpfs: demolish o...
2912
  	sb->s_maxbytes = MAX_LFS_FILESIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2913
2914
2915
2916
  	sb->s_blocksize = PAGE_CACHE_SIZE;
  	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
  	sb->s_magic = TMPFS_MAGIC;
  	sb->s_op = &shmem_ops;
cfd95a9cf   Robin H. Johnson   [PATCH] tmpfs: ti...
2917
  	sb->s_time_gran = 1;
b09e0fa4b   Eric Paris   tmpfs: implement ...
2918
  #ifdef CONFIG_TMPFS_XATTR
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2919
  	sb->s_xattr = shmem_xattr_handlers;
b09e0fa4b   Eric Paris   tmpfs: implement ...
2920
2921
  #endif
  #ifdef CONFIG_TMPFS_POSIX_ACL
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2922
2923
  	sb->s_flags |= MS_POSIXACL;
  #endif
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2924

454abafe9   Dmitry Monakhov   ramfs: replace in...
2925
  	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2926
2927
  	if (!inode)
  		goto failed;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2928
2929
  	inode->i_uid = sbinfo->uid;
  	inode->i_gid = sbinfo->gid;
318ceed08   Al Viro   tidy up after d_m...
2930
2931
  	sb->s_root = d_make_root(inode);
  	if (!sb->s_root)
48fde701a   Al Viro   switch open-coded...
2932
  		goto failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2933
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2934
2935
2936
2937
  failed:
  	shmem_put_super(sb);
  	return err;
  }
fcc234f88   Pekka Enberg   [PATCH] mm: kill ...
2938
  static struct kmem_cache *shmem_inode_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2939
2940
2941
  
  static struct inode *shmem_alloc_inode(struct super_block *sb)
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2942
2943
2944
  	struct shmem_inode_info *info;
  	info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
  	if (!info)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2945
  		return NULL;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2946
  	return &info->vfs_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2947
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2948
  static void shmem_destroy_callback(struct rcu_head *head)
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
2949
2950
  {
  	struct inode *inode = container_of(head, struct inode, i_rcu);
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
2951
2952
  	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2953
2954
  static void shmem_destroy_inode(struct inode *inode)
  {
09208d150   Al Viro   shmem, ramfs: pro...
2955
  	if (S_ISREG(inode->i_mode))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2956
  		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2957
  	call_rcu(&inode->i_rcu, shmem_destroy_callback);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2958
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2959
  static void shmem_init_inode(void *foo)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2960
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2961
2962
  	struct shmem_inode_info *info = foo;
  	inode_init_once(&info->vfs_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2963
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2964
  static int shmem_init_inodecache(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2965
2966
2967
  {
  	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
  				sizeof(struct shmem_inode_info),
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2968
  				0, SLAB_PANIC, shmem_init_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2969
2970
  	return 0;
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
2971
  static void shmem_destroy_inodecache(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2972
  {
1a1d92c10   Alexey Dobriyan   [PATCH] Really ig...
2973
  	kmem_cache_destroy(shmem_inode_cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2974
  }
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
2975
  static const struct address_space_operations shmem_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2976
  	.writepage	= shmem_writepage,
767193253   Ken Chen   [PATCH] simplify ...
2977
  	.set_page_dirty	= __set_page_dirty_no_writeback,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2978
  #ifdef CONFIG_TMPFS
800d15a53   Nick Piggin   implement simple ...
2979
2980
  	.write_begin	= shmem_write_begin,
  	.write_end	= shmem_write_end,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2981
  #endif
1c93923cc   Andrew Morton   include/linux/mig...
2982
  #ifdef CONFIG_MIGRATION
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
2983
  	.migratepage	= migrate_page,
1c93923cc   Andrew Morton   include/linux/mig...
2984
  #endif
aa261f549   Andi Kleen   HWPOISON: Enable ...
2985
  	.error_remove_page = generic_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2986
  };
15ad7cdcf   Helge Deller   [PATCH] struct se...
2987
  static const struct file_operations shmem_file_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2988
2989
  	.mmap		= shmem_mmap,
  #ifdef CONFIG_TMPFS
220f2ac91   Hugh Dickins   tmpfs: support SE...
2990
  	.llseek		= shmem_file_llseek,
2ba5bbed0   Al Viro   shmem: switch to ...
2991
  	.read_iter	= shmem_file_read_iter,
8174202b3   Al Viro   write_iter varian...
2992
  	.write_iter	= generic_file_write_iter,
1b061d924   Christoph Hellwig   rename the generi...
2993
  	.fsync		= noop_fsync,
708e3508c   Hugh Dickins   tmpfs: clone shme...
2994
  	.splice_read	= shmem_file_splice_read,
f6cb85d00   Al Viro   shmem: switch to ...
2995
  	.splice_write	= iter_file_splice_write,
83e4fa9c1   Hugh Dickins   tmpfs: support fa...
2996
  	.fallocate	= shmem_fallocate,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2997
2998
  #endif
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
2999
  static const struct inode_operations shmem_inode_operations = {
94c1e62df   Hugh Dickins   tmpfs: take contr...
3000
  	.setattr	= shmem_setattr,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3001
3002
3003
3004
3005
  #ifdef CONFIG_TMPFS_XATTR
  	.setxattr	= shmem_setxattr,
  	.getxattr	= shmem_getxattr,
  	.listxattr	= shmem_listxattr,
  	.removexattr	= shmem_removexattr,
feda821e7   Christoph Hellwig   fs: remove generi...
3006
  	.set_acl	= simple_set_acl,
b09e0fa4b   Eric Paris   tmpfs: implement ...
3007
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3008
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
3009
  static const struct inode_operations shmem_dir_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3010
3011
3012
3013
3014
3015
3016
3017
3018
  #ifdef CONFIG_TMPFS
  	.create		= shmem_create,
  	.lookup		= simple_lookup,
  	.link		= shmem_link,
  	.unlink		= shmem_unlink,
  	.symlink	= shmem_symlink,
  	.mkdir		= shmem_mkdir,
  	.rmdir		= shmem_rmdir,
  	.mknod		= shmem_mknod,
3b69ff51d   Miklos Szeredi   shmem: support RE...
3019
  	.rename2	= shmem_rename2,
60545d0d4   Al Viro   [O_TMPFILE] it's ...
3020
  	.tmpfile	= shmem_tmpfile,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3021
  #endif
b09e0fa4b   Eric Paris   tmpfs: implement ...
3022
3023
3024
3025
3026
3027
  #ifdef CONFIG_TMPFS_XATTR
  	.setxattr	= shmem_setxattr,
  	.getxattr	= shmem_getxattr,
  	.listxattr	= shmem_listxattr,
  	.removexattr	= shmem_removexattr,
  #endif
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3028
  #ifdef CONFIG_TMPFS_POSIX_ACL
94c1e62df   Hugh Dickins   tmpfs: take contr...
3029
  	.setattr	= shmem_setattr,
feda821e7   Christoph Hellwig   fs: remove generi...
3030
  	.set_acl	= simple_set_acl,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3031
3032
  #endif
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
3033
  static const struct inode_operations shmem_special_inode_operations = {
b09e0fa4b   Eric Paris   tmpfs: implement ...
3034
3035
3036
3037
3038
3039
  #ifdef CONFIG_TMPFS_XATTR
  	.setxattr	= shmem_setxattr,
  	.getxattr	= shmem_getxattr,
  	.listxattr	= shmem_listxattr,
  	.removexattr	= shmem_removexattr,
  #endif
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3040
  #ifdef CONFIG_TMPFS_POSIX_ACL
94c1e62df   Hugh Dickins   tmpfs: take contr...
3041
  	.setattr	= shmem_setattr,
feda821e7   Christoph Hellwig   fs: remove generi...
3042
  	.set_acl	= simple_set_acl,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
3043
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3044
  };
759b9775c   Hugh Dickins   [PATCH] shmem and...
3045
  static const struct super_operations shmem_ops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3046
3047
3048
3049
3050
  	.alloc_inode	= shmem_alloc_inode,
  	.destroy_inode	= shmem_destroy_inode,
  #ifdef CONFIG_TMPFS
  	.statfs		= shmem_statfs,
  	.remount_fs	= shmem_remount_fs,
680d794ba   akpm@linux-foundation.org   mount options: fi...
3051
  	.show_options	= shmem_show_options,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3052
  #endif
1f895f75d   Al Viro   switch shmem.c to...
3053
  	.evict_inode	= shmem_evict_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3054
3055
3056
  	.drop_inode	= generic_delete_inode,
  	.put_super	= shmem_put_super,
  };
f0f37e2f7   Alexey Dobriyan   const: mark struc...
3057
  static const struct vm_operations_struct shmem_vm_ops = {
54cb8821d   Nick Piggin   mm: merge populat...
3058
  	.fault		= shmem_fault,
d7c175517   Ning Qu   mm: implement ->m...
3059
  	.map_pages	= filemap_map_pages,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3060
3061
3062
3063
3064
  #ifdef CONFIG_NUMA
  	.set_policy     = shmem_set_policy,
  	.get_policy     = shmem_get_policy,
  #endif
  };
3c26ff6e4   Al Viro   convert get_sb_no...
3065
3066
  static struct dentry *shmem_mount(struct file_system_type *fs_type,
  	int flags, const char *dev_name, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3067
  {
3c26ff6e4   Al Viro   convert get_sb_no...
3068
  	return mount_nodev(fs_type, flags, data, shmem_fill_super);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3069
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3070
  static struct file_system_type shmem_fs_type = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3071
3072
  	.owner		= THIS_MODULE,
  	.name		= "tmpfs",
3c26ff6e4   Al Viro   convert get_sb_no...
3073
  	.mount		= shmem_mount,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3074
  	.kill_sb	= kill_litter_super,
2b8576cb0   Eric W. Biederman   userns: Allow the...
3075
  	.fs_flags	= FS_USERNS_MOUNT,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3076
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3077

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3078
  int __init shmem_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3079
3080
  {
  	int error;
16203a7a9   Rob Landley   initmpfs: make ro...
3081
3082
3083
  	/* If rootfs called this, don't re-init */
  	if (shmem_inode_cachep)
  		return 0;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3084
  	error = shmem_init_inodecache();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3085
3086
  	if (error)
  		goto out3;
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3087
  	error = register_filesystem(&shmem_fs_type);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3088
3089
3090
3091
3092
  	if (error) {
  		printk(KERN_ERR "Could not register tmpfs
  ");
  		goto out2;
  	}
95dc112a5   Greg Kroah-Hartman   [PATCH] devfs: Re...
3093

ca4e05195   Al Viro   shm_mnt is as lon...
3094
  	shm_mnt = kern_mount(&shmem_fs_type);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3095
3096
3097
3098
3099
3100
3101
3102
3103
  	if (IS_ERR(shm_mnt)) {
  		error = PTR_ERR(shm_mnt);
  		printk(KERN_ERR "Could not kern_mount tmpfs
  ");
  		goto out1;
  	}
  	return 0;
  
  out1:
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3104
  	unregister_filesystem(&shmem_fs_type);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3105
  out2:
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3106
  	shmem_destroy_inodecache();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3107
3108
3109
3110
  out3:
  	shm_mnt = ERR_PTR(error);
  	return error;
  }
853ac43ab   Matt Mackall   shmem: unify regu...
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
  
  #else /* !CONFIG_SHMEM */
  
  /*
   * tiny-shmem: simple shmemfs and tmpfs using ramfs code
   *
   * This is intended for small system where the benefits of the full
   * shmem code (swap-backed and resource-limited) are outweighed by
   * their complexity. On systems without swap this code should be
   * effectively equivalent, but much lighter weight.
   */
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3122
  static struct file_system_type shmem_fs_type = {
853ac43ab   Matt Mackall   shmem: unify regu...
3123
  	.name		= "tmpfs",
3c26ff6e4   Al Viro   convert get_sb_no...
3124
  	.mount		= ramfs_mount,
853ac43ab   Matt Mackall   shmem: unify regu...
3125
  	.kill_sb	= kill_litter_super,
2b8576cb0   Eric W. Biederman   userns: Allow the...
3126
  	.fs_flags	= FS_USERNS_MOUNT,
853ac43ab   Matt Mackall   shmem: unify regu...
3127
  };
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3128
  int __init shmem_init(void)
853ac43ab   Matt Mackall   shmem: unify regu...
3129
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3130
  	BUG_ON(register_filesystem(&shmem_fs_type) != 0);
853ac43ab   Matt Mackall   shmem: unify regu...
3131

41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3132
  	shm_mnt = kern_mount(&shmem_fs_type);
853ac43ab   Matt Mackall   shmem: unify regu...
3133
3134
3135
3136
  	BUG_ON(IS_ERR(shm_mnt));
  
  	return 0;
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3137
  int shmem_unuse(swp_entry_t swap, struct page *page)
853ac43ab   Matt Mackall   shmem: unify regu...
3138
3139
3140
  {
  	return 0;
  }
3f96b79ad   Hugh Dickins   tmpfs: depend on ...
3141
3142
3143
3144
  int shmem_lock(struct file *file, int lock, struct user_struct *user)
  {
  	return 0;
  }
245132643   Hugh Dickins   SHM_UNLOCK: fix U...
3145
3146
3147
  void shmem_unlock_mapping(struct address_space *mapping)
  {
  }
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3148
  void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
94c1e62df   Hugh Dickins   tmpfs: take contr...
3149
  {
41ffe5d5c   Hugh Dickins   tmpfs: miscellane...
3150
  	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
94c1e62df   Hugh Dickins   tmpfs: take contr...
3151
3152
  }
  EXPORT_SYMBOL_GPL(shmem_truncate_range);
0b0a0806b   Hugh Dickins   shmem: fix shared...
3153
3154
  #define shmem_vm_ops				generic_file_vm_ops
  #define shmem_file_operations			ramfs_file_operations
454abafe9   Dmitry Monakhov   ramfs: replace in...
3155
  #define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
0b0a0806b   Hugh Dickins   shmem: fix shared...
3156
3157
  #define shmem_acct_size(flags, size)		0
  #define shmem_unacct_size(flags, size)		do {} while (0)
853ac43ab   Matt Mackall   shmem: unify regu...
3158
3159
3160
3161
  
  #endif /* CONFIG_SHMEM */
  
  /* common code */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3162

3451538a1   Al Viro   shmem_setup_file(...
3163
  static struct dentry_operations anon_ops = {
118b23022   Al Viro   cope with potenti...
3164
  	.d_dname = simple_dname
3451538a1   Al Viro   shmem_setup_file(...
3165
  };
c72770909   Eric Paris   security: shmem: ...
3166
3167
  static struct file *__shmem_file_setup(const char *name, loff_t size,
  				       unsigned long flags, unsigned int i_flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3168
  {
6b4d0b279   Al Viro   clean shmem_file_...
3169
  	struct file *res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3170
  	struct inode *inode;
2c48b9c45   Al Viro   switch alloc_file...
3171
  	struct path path;
3451538a1   Al Viro   shmem_setup_file(...
3172
  	struct super_block *sb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3173
3174
3175
  	struct qstr this;
  
  	if (IS_ERR(shm_mnt))
6b4d0b279   Al Viro   clean shmem_file_...
3176
  		return ERR_CAST(shm_mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3177

285b2c4fd   Hugh Dickins   tmpfs: demolish o...
3178
  	if (size < 0 || size > MAX_LFS_FILESIZE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3179
3180
3181
3182
  		return ERR_PTR(-EINVAL);
  
  	if (shmem_acct_size(flags, size))
  		return ERR_PTR(-ENOMEM);
6b4d0b279   Al Viro   clean shmem_file_...
3183
  	res = ERR_PTR(-ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3184
3185
3186
  	this.name = name;
  	this.len = strlen(name);
  	this.hash = 0; /* will go */
3451538a1   Al Viro   shmem_setup_file(...
3187
  	sb = shm_mnt->mnt_sb;
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3188
  	path.mnt = mntget(shm_mnt);
3451538a1   Al Viro   shmem_setup_file(...
3189
  	path.dentry = d_alloc_pseudo(sb, &this);
2c48b9c45   Al Viro   switch alloc_file...
3190
  	if (!path.dentry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3191
  		goto put_memory;
3451538a1   Al Viro   shmem_setup_file(...
3192
  	d_set_d_op(path.dentry, &anon_ops);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3193

6b4d0b279   Al Viro   clean shmem_file_...
3194
  	res = ERR_PTR(-ENOSPC);
3451538a1   Al Viro   shmem_setup_file(...
3195
  	inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3196
  	if (!inode)
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3197
  		goto put_memory;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3198

c72770909   Eric Paris   security: shmem: ...
3199
  	inode->i_flags |= i_flags;
2c48b9c45   Al Viro   switch alloc_file...
3200
  	d_instantiate(path.dentry, inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3201
  	inode->i_size = size;
6d6b77f16   Miklos Szeredi   filesystems: add ...
3202
  	clear_nlink(inode);	/* It is unlinked */
26567cdbb   Al Viro   fix nommu breakag...
3203
3204
  	res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
  	if (IS_ERR(res))
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3205
  		goto put_path;
4b42af81f   Al Viro   switch shmem_file...
3206

6b4d0b279   Al Viro   clean shmem_file_...
3207
  	res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
4b42af81f   Al Viro   switch shmem_file...
3208
  		  &shmem_file_operations);
6b4d0b279   Al Viro   clean shmem_file_...
3209
  	if (IS_ERR(res))
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3210
  		goto put_path;
4b42af81f   Al Viro   switch shmem_file...
3211

6b4d0b279   Al Viro   clean shmem_file_...
3212
  	return res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3213

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3214
3215
  put_memory:
  	shmem_unacct_size(flags, size);
66ee4b888   Konstantin Khlebnikov   shmem: fix double...
3216
3217
  put_path:
  	path_put(&path);
6b4d0b279   Al Viro   clean shmem_file_...
3218
  	return res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3219
  }
c72770909   Eric Paris   security: shmem: ...
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
  
  /**
   * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
   * 	kernel internal.  There will be NO LSM permission checks against the
   * 	underlying inode.  So users of this interface must do LSM checks at a
   * 	higher layer.  The one user is the big_key implementation.  LSM checks
   * 	are provided at the key level rather than the inode level.
   * @name: name for dentry (to be seen in /proc/<pid>/maps
   * @size: size to be set for the file
   * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
   */
  struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
  {
  	return __shmem_file_setup(name, size, flags, S_PRIVATE);
  }
  
  /**
   * shmem_file_setup - get an unlinked file living in tmpfs
   * @name: name for dentry (to be seen in /proc/<pid>/maps
   * @size: size to be set for the file
   * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
   */
  struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
  {
  	return __shmem_file_setup(name, size, flags, 0);
  }
395e0ddc4   Keith Packard   Export shmem_file...
3246
  EXPORT_SYMBOL_GPL(shmem_file_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3247

cc03d1f37   John Stultz   ashmem: Add shmem...
3248
3249
3250
3251
3252
3253
3254
  void shmem_set_file(struct vm_area_struct *vma, struct file *file)
  {
  	if (vma->vm_file)
  		fput(vma->vm_file);
  	vma->vm_file = file;
  	vma->vm_ops = &shmem_vm_ops;
  }
467118102   Randy Dunlap   mm/shmem and tiny...
3255
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3256
   * shmem_zero_setup - setup a shared anonymous mapping
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3257
3258
3259
3260
3261
3262
   * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
   */
  int shmem_zero_setup(struct vm_area_struct *vma)
  {
  	struct file *file;
  	loff_t size = vma->vm_end - vma->vm_start;
66fc13039   Hugh Dickins   mm: shmem_zero_se...
3263
3264
3265
3266
3267
3268
3269
  	/*
  	 * Cloning a new file under mmap_sem leads to a lock ordering conflict
  	 * between XFS directory reading and selinux: since this file is only
  	 * accessible to the user through its mapping, use S_PRIVATE flag to
  	 * bypass file security, in the same way as shmem_kernel_file_setup().
  	 */
  	file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3270
3271
  	if (IS_ERR(file))
  		return PTR_ERR(file);
cc03d1f37   John Stultz   ashmem: Add shmem...
3272
  	shmem_set_file(vma, file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3273
3274
  	return 0;
  }
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
  
  /**
   * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
   * @mapping:	the page's address_space
   * @index:	the page index
   * @gfp:	the page allocator flags to use if allocating
   *
   * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
   * with any new page allocations done using the specified allocation flags.
   * But read_cache_page_gfp() uses the ->readpage() method: which does not
   * suit tmpfs, since it may have pages in swapcache, and needs to find those
   * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
   *
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3288
3289
   * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in
   * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3290
3291
3292
3293
   */
  struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
  					 pgoff_t index, gfp_t gfp)
  {
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3294
3295
  #ifdef CONFIG_SHMEM
  	struct inode *inode = mapping->host;
9276aad6c   Hugh Dickins   tmpfs: remove_shm...
3296
  	struct page *page;
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
  	int error;
  
  	BUG_ON(mapping->a_ops != &shmem_aops);
  	error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
  	if (error)
  		page = ERR_PTR(error);
  	else
  		unlock_page(page);
  	return page;
  #else
  	/*
  	 * The tiny !SHMEM case uses ramfs without swap
  	 */
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3310
  	return read_cache_page_gfp(mapping, index, gfp);
68da9f055   Hugh Dickins   tmpfs: pass gfp t...
3311
  #endif
d9d90e5eb   Hugh Dickins   tmpfs: add shmem_...
3312
3313
  }
  EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);