Blame view

mm/shmem.c 71.6 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
  /*
   * Resizable virtual memory filesystem for Linux.
   *
   * Copyright (C) 2000 Linus Torvalds.
   *		 2000 Transmeta Corp.
   *		 2000-2001 Christoph Rohland
   *		 2000-2001 SAP AG
   *		 2002 Red Hat Inc.
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
9
10
   * Copyright (C) 2002-2005 Hugh Dickins.
   * Copyright (C) 2002-2005 VERITAS Software Corporation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
13
14
15
16
   * Copyright (C) 2004 Andi Kleen, SuSE Labs
   *
   * Extended attribute support for tmpfs:
   * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
   *
853ac43ab   Matt Mackall   shmem: unify regu...
17
18
19
   * tiny-shmem:
   * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
   * This file is released under the GPL.
   */
853ac43ab   Matt Mackall   shmem: unify regu...
22
23
24
25
  #include <linux/fs.h>
  #include <linux/init.h>
  #include <linux/vfs.h>
  #include <linux/mount.h>
caefba174   Hugh Dickins   shmem: respect MA...
26
  #include <linux/pagemap.h>
853ac43ab   Matt Mackall   shmem: unify regu...
27
28
29
  #include <linux/file.h>
  #include <linux/mm.h>
  #include <linux/module.h>
7e496299d   Tim Chen   tmpfs: make tmpfs...
30
  #include <linux/percpu_counter.h>
853ac43ab   Matt Mackall   shmem: unify regu...
31
32
33
34
35
  #include <linux/swap.h>
  
  static struct vfsmount *shm_mnt;
  
  #ifdef CONFIG_SHMEM
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
37
38
39
40
  /*
   * This virtual memory filesystem is heavily based on the ramfs. It
   * extends ramfs by the ability to use swap and honor resource limits
   * which makes it a completely usable filesystem.
   */
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
41
  #include <linux/xattr.h>
a56942551   Christoph Hellwig   knfsd: exportfs: ...
42
  #include <linux/exportfs.h>
1c7c474c3   Christoph Hellwig   make generic_acl ...
43
  #include <linux/posix_acl.h>
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
44
  #include <linux/generic_acl.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
  #include <linux/mman.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
46
47
48
49
  #include <linux/string.h>
  #include <linux/slab.h>
  #include <linux/backing-dev.h>
  #include <linux/shmem_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50
  #include <linux/writeback.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
51
52
53
54
55
  #include <linux/blkdev.h>
  #include <linux/security.h>
  #include <linux/swapops.h>
  #include <linux/mempolicy.h>
  #include <linux/namei.h>
b00dc3ad7   Hugh Dickins   [PATCH] tmpfs: fi...
56
  #include <linux/ctype.h>
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
57
  #include <linux/migrate.h>
c1f60a5a4   Christoph Lameter   [PATCH] reduce MA...
58
  #include <linux/highmem.h>
680d794ba   akpm@linux-foundation.org   mount options: fi...
59
  #include <linux/seq_file.h>
925629278   Mimi Zohar   integrity: specia...
60
  #include <linux/magic.h>
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
61

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
64
  #include <asm/uaccess.h>
  #include <asm/div64.h>
  #include <asm/pgtable.h>
caefba174   Hugh Dickins   shmem: respect MA...
65
66
67
68
69
70
71
72
73
74
75
76
  /*
   * The maximum size of a shmem/tmpfs file is limited by the maximum size of
   * its triple-indirect swap vector - see illustration at shmem_swp_entry().
   *
   * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
   * but one eighth of that on a 64-bit kernel.  With 8kB page size, maximum
   * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
   * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
   *
   * We use / and * instead of shifts in the definitions below, so that the swap
   * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
77
  #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
61609d01c   Yuri Tikhonov   shmem: fix divisi...
78
  #define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
79

caefba174   Hugh Dickins   shmem: respect MA...
80
81
  #define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
  #define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82

caefba174   Hugh Dickins   shmem: respect MA...
83
84
85
86
  #define SHMEM_MAX_BYTES  min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
  #define SHMEM_MAX_INDEX  ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
  
  #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
89
90
91
92
93
94
95
96
97
  #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
  
  /* info->flags needs VM_flags to handle pagein/truncate races efficiently */
  #define SHMEM_PAGEIN	 VM_READ
  #define SHMEM_TRUNCATE	 VM_WRITE
  
  /* Definition to limit shmem_truncate's steps between cond_rescheds */
  #define LATENCY_LIMIT	 64
  
  /* Pretend that each entry is of this size in directory's i_size */
  #define BOGO_DIRENT_SIZE 20
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98
99
  /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
  enum sgp_type {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
100
101
  	SGP_READ,	/* don't exceed i_size, don't allocate page */
  	SGP_CACHE,	/* don't exceed i_size, may allocate page */
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
102
  	SGP_DIRTY,	/* like SGP_CACHE, but set new page dirty */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
  	SGP_WRITE,	/* may exceed i_size, may allocate page */
  };
b76db7354   Andrew Morton   mount-options-fix...
105
  #ifdef CONFIG_TMPFS
680d794ba   akpm@linux-foundation.org   mount options: fi...
106
107
108
109
110
111
112
113
114
  static unsigned long shmem_default_max_blocks(void)
  {
  	return totalram_pages / 2;
  }
  
  static unsigned long shmem_default_max_inodes(void)
  {
  	return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
  }
b76db7354   Andrew Morton   mount-options-fix...
115
  #endif
680d794ba   akpm@linux-foundation.org   mount options: fi...
116

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
118
  static int shmem_getpage(struct inode *inode, unsigned long idx,
  			 struct page **pagep, enum sgp_type sgp, int *type);
6daa0e286   Al Viro   [PATCH] gfp_t: mm...
119
  static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
120
121
122
123
124
  {
  	/*
  	 * The above definition of ENTRIES_PER_PAGE, and the use of
  	 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
  	 * might be reconsidered if it ever diverges from PAGE_SIZE.
769848c03   Mel Gorman   Add __GFP_MOVABLE...
125
  	 *
e12ba74d8   Mel Gorman   Group short-lived...
126
  	 * Mobility flags are masked out as swap vectors cannot move
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
  	 */
e12ba74d8   Mel Gorman   Group short-lived...
128
  	return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
769848c03   Mel Gorman   Add __GFP_MOVABLE...
129
  				PAGE_CACHE_SHIFT-PAGE_SHIFT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
  }
  
  static inline void shmem_dir_free(struct page *page)
  {
  	__free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
  }
  
  static struct page **shmem_dir_map(struct page *page)
  {
  	return (struct page **)kmap_atomic(page, KM_USER0);
  }
  
  static inline void shmem_dir_unmap(struct page **dir)
  {
  	kunmap_atomic(dir, KM_USER0);
  }
  
  static swp_entry_t *shmem_swp_map(struct page *page)
  {
  	return (swp_entry_t *)kmap_atomic(page, KM_USER1);
  }
  
  static inline void shmem_swp_balance_unmap(void)
  {
  	/*
  	 * When passing a pointer to an i_direct entry, to code which
  	 * also handles indirect entries and so will shmem_swp_unmap,
  	 * we must arrange for the preempt count to remain in balance.
  	 * What kmap_atomic of a lowmem page does depends on config
  	 * and architecture, so pretend to kmap_atomic some lowmem page.
  	 */
  	(void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
  }
  
  static inline void shmem_swp_unmap(swp_entry_t *entry)
  {
  	kunmap_atomic(entry, KM_USER1);
  }
  
  static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
  {
  	return sb->s_fs_info;
  }
  
  /*
   * shmem_file_setup pre-accounts the whole fixed size of a VM object,
   * for shared memory and for shared anonymous (/dev/zero) mappings
   * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
   * consistent with the pre-accounting of private mappings ...
   */
  static inline int shmem_acct_size(unsigned long flags, loff_t size)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
182
183
  	return (flags & VM_NORESERVE) ?
  		0 : security_vm_enough_memory_kern(VM_ACCT(size));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
185
186
187
  }
  
  static inline void shmem_unacct_size(unsigned long flags, loff_t size)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
188
  	if (!(flags & VM_NORESERVE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
189
190
191
192
193
194
195
196
197
198
199
  		vm_unacct_memory(VM_ACCT(size));
  }
  
  /*
   * ... whereas tmpfs objects are accounted incrementally as
   * pages are allocated, in order to allow huge sparse files.
   * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
   * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
   */
  static inline int shmem_acct_block(unsigned long flags)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
200
201
  	return (flags & VM_NORESERVE) ?
  		security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)) : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
204
205
  }
  
  static inline void shmem_unacct_blocks(unsigned long flags, long pages)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
206
  	if (flags & VM_NORESERVE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
208
  		vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
  }
759b9775c   Hugh Dickins   [PATCH] shmem and...
209
  static const struct super_operations shmem_ops;
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
210
  static const struct address_space_operations shmem_aops;
15ad7cdcf   Helge Deller   [PATCH] struct se...
211
  static const struct file_operations shmem_file_operations;
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
212
213
214
  static const struct inode_operations shmem_inode_operations;
  static const struct inode_operations shmem_dir_inode_operations;
  static const struct inode_operations shmem_special_inode_operations;
f0f37e2f7   Alexey Dobriyan   const: mark struc...
215
  static const struct vm_operations_struct shmem_vm_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
216

6c231b7ba   Ravikiran G Thirumalai   [PATCH] Additions...
217
  static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
  	.ra_pages	= 0,	/* No readahead */
4f98a2fee   Rik van Riel   vmscan: split LRU...
219
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
221
222
223
  	.unplug_io_fn	= default_unplug_io_fn,
  };
  
  static LIST_HEAD(shmem_swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
224
  static DEFINE_MUTEX(shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225
226
227
228
  
  static void shmem_free_blocks(struct inode *inode, long pages)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
229
  	if (sbinfo->max_blocks) {
7e496299d   Tim Chen   tmpfs: make tmpfs...
230
231
  		percpu_counter_add(&sbinfo->used_blocks, -pages);
  		spin_lock(&inode->i_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
  		inode->i_blocks -= pages*BLOCKS_PER_PAGE;
7e496299d   Tim Chen   tmpfs: make tmpfs...
233
  		spin_unlock(&inode->i_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
235
  	}
  }
5b04c6890   Pavel Emelyanov   shmem: factor out...
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
  static int shmem_reserve_inode(struct super_block *sb)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	if (sbinfo->max_inodes) {
  		spin_lock(&sbinfo->stat_lock);
  		if (!sbinfo->free_inodes) {
  			spin_unlock(&sbinfo->stat_lock);
  			return -ENOSPC;
  		}
  		sbinfo->free_inodes--;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	return 0;
  }
  
  static void shmem_free_inode(struct super_block *sb)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  	if (sbinfo->max_inodes) {
  		spin_lock(&sbinfo->stat_lock);
  		sbinfo->free_inodes++;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  }
467118102   Randy Dunlap   mm/shmem and tiny...
260
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
261
   * shmem_recalc_inode - recalculate the size of an inode
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
   * @inode: inode to recalc
   *
   * We have to calculate the free blocks since the mm can drop
   * undirtied hole pages behind our back.
   *
   * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
   * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
   *
   * It has to be called with the spinlock held.
   */
  static void shmem_recalc_inode(struct inode *inode)
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	long freed;
  
  	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
  	if (freed > 0) {
  		info->alloced -= freed;
  		shmem_unacct_blocks(info->flags, freed);
  		shmem_free_blocks(inode, freed);
  	}
  }
467118102   Randy Dunlap   mm/shmem and tiny...
284
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
   * shmem_swp_entry - find the swap vector position in the info structure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
   * @info:  info structure for the inode
   * @index: index of the page to find
   * @page:  optional page to add to the structure. Has to be preset to
   *         all zeros
   *
   * If there is no space allocated yet it will return NULL when
   * page is NULL, else it will use the page for the needed block,
   * setting it to NULL on return to indicate that it has been used.
   *
   * The swap vector is organized the following way:
   *
   * There are SHMEM_NR_DIRECT entries directly stored in the
   * shmem_inode_info structure. So small files do not need an addional
   * allocation.
   *
   * For pages with index > SHMEM_NR_DIRECT there is the pointer
   * i_indirect which points to a page which holds in the first half
   * doubly indirect blocks, in the second half triple indirect blocks:
   *
   * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
   * following layout (for SHMEM_NR_DIRECT == 16):
   *
   * i_indirect -> dir --> 16-19
   * 	      |	     +-> 20-23
   * 	      |
   * 	      +-->dir2 --> 24-27
   * 	      |	       +-> 28-31
   * 	      |	       +-> 32-35
   * 	      |	       +-> 36-39
   * 	      |
   * 	      +-->dir3 --> 40-43
   * 	       	       +-> 44-47
   * 	      	       +-> 48-51
   * 	      	       +-> 52-55
   */
  static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
  {
  	unsigned long offset;
  	struct page **dir;
  	struct page *subdir;
  
  	if (index < SHMEM_NR_DIRECT) {
  		shmem_swp_balance_unmap();
  		return info->i_direct+index;
  	}
  	if (!info->i_indirect) {
  		if (page) {
  			info->i_indirect = *page;
  			*page = NULL;
  		}
  		return NULL;			/* need another page */
  	}
  
  	index -= SHMEM_NR_DIRECT;
  	offset = index % ENTRIES_PER_PAGE;
  	index /= ENTRIES_PER_PAGE;
  	dir = shmem_dir_map(info->i_indirect);
  
  	if (index >= ENTRIES_PER_PAGE/2) {
  		index -= ENTRIES_PER_PAGE/2;
  		dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
  		index %= ENTRIES_PER_PAGE;
  		subdir = *dir;
  		if (!subdir) {
  			if (page) {
  				*dir = *page;
  				*page = NULL;
  			}
  			shmem_dir_unmap(dir);
  			return NULL;		/* need another page */
  		}
  		shmem_dir_unmap(dir);
  		dir = shmem_dir_map(subdir);
  	}
  
  	dir += index;
  	subdir = *dir;
  	if (!subdir) {
  		if (!page || !(subdir = *page)) {
  			shmem_dir_unmap(dir);
  			return NULL;		/* need a page */
  		}
  		*dir = subdir;
  		*page = NULL;
  	}
  	shmem_dir_unmap(dir);
  	return shmem_swp_map(subdir) + offset;
  }
  
  static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
  {
  	long incdec = value? 1: -1;
  
  	entry->val = value;
  	info->swapped += incdec;
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
381
382
383
384
  	if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
  		struct page *page = kmap_atomic_to_page(entry);
  		set_page_private(page, page_private(page) + incdec);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
385
  }
467118102   Randy Dunlap   mm/shmem and tiny...
386
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
387
   * shmem_swp_alloc - get the position of the swap entry for the page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
388
389
390
   * @info:	info structure for the inode
   * @index:	index of the page to find
   * @sgp:	check and recheck i_size? skip allocation?
467118102   Randy Dunlap   mm/shmem and tiny...
391
392
   *
   * If the entry does not exist, allocate it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
   */
  static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
  {
  	struct inode *inode = &info->vfs_inode;
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  	struct page *page = NULL;
  	swp_entry_t *entry;
  
  	if (sgp != SGP_WRITE &&
  	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
  		return ERR_PTR(-EINVAL);
  
  	while (!(entry = shmem_swp_entry(info, index, &page))) {
  		if (sgp == SGP_READ)
  			return shmem_swp_map(ZERO_PAGE(0));
  		/*
7e496299d   Tim Chen   tmpfs: make tmpfs...
409
  		 * Test used_blocks against 1 less max_blocks, since we have 1 data
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
411
412
  		 * page (and perhaps indirect index pages) yet to allocate:
  		 * a waste to allocate index if we cannot allocate data.
  		 */
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
413
  		if (sbinfo->max_blocks) {
7e496299d   Tim Chen   tmpfs: make tmpfs...
414
  			if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415
  				return ERR_PTR(-ENOSPC);
7e496299d   Tim Chen   tmpfs: make tmpfs...
416
417
  			percpu_counter_inc(&sbinfo->used_blocks);
  			spin_lock(&inode->i_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
  			inode->i_blocks += BLOCKS_PER_PAGE;
7e496299d   Tim Chen   tmpfs: make tmpfs...
419
  			spin_unlock(&inode->i_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
420
421
422
  		}
  
  		spin_unlock(&info->lock);
769848c03   Mel Gorman   Add __GFP_MOVABLE...
423
  		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
  		spin_lock(&info->lock);
  
  		if (!page) {
  			shmem_free_blocks(inode, 1);
  			return ERR_PTR(-ENOMEM);
  		}
  		if (sgp != SGP_WRITE &&
  		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
  			entry = ERR_PTR(-EINVAL);
  			break;
  		}
  		if (info->next_index <= index)
  			info->next_index = index + 1;
  	}
  	if (page) {
  		/* another task gave its page, or truncated the file */
  		shmem_free_blocks(inode, 1);
  		shmem_dir_free(page);
  	}
  	if (info->next_index <= index && !IS_ERR(entry))
  		info->next_index = index + 1;
  	return entry;
  }
467118102   Randy Dunlap   mm/shmem and tiny...
447
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
448
   * shmem_free_swp - free some swap entries in a directory
1ae700063   Hugh Dickins   [PATCH] holepunch...
449
450
451
   * @dir:        pointer to the directory
   * @edir:       pointer after last entry of the directory
   * @punch_lock: pointer to spinlock when needed for the holepunch case
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
   */
1ae700063   Hugh Dickins   [PATCH] holepunch...
453
454
  static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
  						spinlock_t *punch_lock)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
  {
1ae700063   Hugh Dickins   [PATCH] holepunch...
456
  	spinlock_t *punch_unlock = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457
458
459
460
461
  	swp_entry_t *ptr;
  	int freed = 0;
  
  	for (ptr = dir; ptr < edir; ptr++) {
  		if (ptr->val) {
1ae700063   Hugh Dickins   [PATCH] holepunch...
462
463
464
465
466
467
468
  			if (unlikely(punch_lock)) {
  				punch_unlock = punch_lock;
  				punch_lock = NULL;
  				spin_lock(punch_unlock);
  				if (!ptr->val)
  					continue;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
469
470
471
472
473
  			free_swap_and_cache(*ptr);
  			*ptr = (swp_entry_t){0};
  			freed++;
  		}
  	}
1ae700063   Hugh Dickins   [PATCH] holepunch...
474
475
  	if (punch_unlock)
  		spin_unlock(punch_unlock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
477
  	return freed;
  }
1ae700063   Hugh Dickins   [PATCH] holepunch...
478
479
  static int shmem_map_and_free_swp(struct page *subdir, int offset,
  		int limit, struct page ***dir, spinlock_t *punch_lock)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
480
481
482
483
484
485
486
487
488
  {
  	swp_entry_t *ptr;
  	int freed = 0;
  
  	ptr = shmem_swp_map(subdir);
  	for (; offset < limit; offset += LATENCY_LIMIT) {
  		int size = limit - offset;
  		if (size > LATENCY_LIMIT)
  			size = LATENCY_LIMIT;
1ae700063   Hugh Dickins   [PATCH] holepunch...
489
490
  		freed += shmem_free_swp(ptr+offset, ptr+offset+size,
  							punch_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
  		if (need_resched()) {
  			shmem_swp_unmap(ptr);
  			if (*dir) {
  				shmem_dir_unmap(*dir);
  				*dir = NULL;
  			}
  			cond_resched();
  			ptr = shmem_swp_map(subdir);
  		}
  	}
  	shmem_swp_unmap(ptr);
  	return freed;
  }
  
  static void shmem_free_pages(struct list_head *next)
  {
  	struct page *page;
  	int freed = 0;
  
  	do {
  		page = container_of(next, struct page, lru);
  		next = next->next;
  		shmem_dir_free(page);
  		freed++;
  		if (freed >= LATENCY_LIMIT) {
  			cond_resched();
  			freed = 0;
  		}
  	} while (next);
  }
f6b3ec238   Badari Pulavarty   [PATCH] madvise(M...
521
  static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	unsigned long idx;
  	unsigned long size;
  	unsigned long limit;
  	unsigned long stage;
  	unsigned long diroff;
  	struct page **dir;
  	struct page *topdir;
  	struct page *middir;
  	struct page *subdir;
  	swp_entry_t *ptr;
  	LIST_HEAD(pages_to_free);
  	long nr_pages_to_free = 0;
  	long nr_swaps_freed = 0;
  	int offset;
  	int freed;
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
539
  	int punch_hole;
1ae700063   Hugh Dickins   [PATCH] holepunch...
540
541
  	spinlock_t *needs_lock;
  	spinlock_t *punch_lock;
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
542
  	unsigned long upper_limit;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
543
544
  
  	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
f6b3ec238   Badari Pulavarty   [PATCH] madvise(M...
545
  	idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
546
547
548
549
550
  	if (idx >= info->next_index)
  		return;
  
  	spin_lock(&info->lock);
  	info->flags |= SHMEM_TRUNCATE;
f6b3ec238   Badari Pulavarty   [PATCH] madvise(M...
551
552
  	if (likely(end == (loff_t) -1)) {
  		limit = info->next_index;
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
553
  		upper_limit = SHMEM_MAX_INDEX;
f6b3ec238   Badari Pulavarty   [PATCH] madvise(M...
554
  		info->next_index = idx;
1ae700063   Hugh Dickins   [PATCH] holepunch...
555
  		needs_lock = NULL;
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
556
  		punch_hole = 0;
f6b3ec238   Badari Pulavarty   [PATCH] madvise(M...
557
  	} else {
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
558
559
560
561
562
563
564
565
  		if (end + 1 >= inode->i_size) {	/* we may free a little more */
  			limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
  							PAGE_CACHE_SHIFT;
  			upper_limit = SHMEM_MAX_INDEX;
  		} else {
  			limit = (end + 1) >> PAGE_CACHE_SHIFT;
  			upper_limit = limit;
  		}
1ae700063   Hugh Dickins   [PATCH] holepunch...
566
  		needs_lock = &info->lock;
f6b3ec238   Badari Pulavarty   [PATCH] madvise(M...
567
568
  		punch_hole = 1;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
569
  	topdir = info->i_indirect;
f6b3ec238   Badari Pulavarty   [PATCH] madvise(M...
570
  	if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
571
572
573
574
575
576
577
578
579
580
581
  		info->i_indirect = NULL;
  		nr_pages_to_free++;
  		list_add(&topdir->lru, &pages_to_free);
  	}
  	spin_unlock(&info->lock);
  
  	if (info->swapped && idx < SHMEM_NR_DIRECT) {
  		ptr = info->i_direct;
  		size = limit;
  		if (size > SHMEM_NR_DIRECT)
  			size = SHMEM_NR_DIRECT;
1ae700063   Hugh Dickins   [PATCH] holepunch...
582
  		nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
583
  	}
92a3d03aa   Badari Pulavarty   [PATCH] Fix for s...
584
585
586
587
588
  
  	/*
  	 * If there are no indirect blocks or we are punching a hole
  	 * below indirect blocks, nothing to be done.
  	 */
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
589
  	if (!topdir || limit <= SHMEM_NR_DIRECT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590
  		goto done2;
1ae700063   Hugh Dickins   [PATCH] holepunch...
591
592
593
594
595
596
597
598
599
600
601
602
  	/*
  	 * The truncation case has already dropped info->lock, and we're safe
  	 * because i_size and next_index have already been lowered, preventing
  	 * access beyond.  But in the punch_hole case, we still need to take
  	 * the lock when updating the swap directory, because there might be
  	 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
  	 * shmem_writepage.  However, whenever we find we can remove a whole
  	 * directory page (not at the misaligned start or end of the range),
  	 * we first NULLify its pointer in the level above, and then have no
  	 * need to take the lock when updating its contents: needs_lock and
  	 * punch_lock (either pointing to info->lock or NULL) manage this.
  	 */
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
603
  	upper_limit -= SHMEM_NR_DIRECT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
  	limit -= SHMEM_NR_DIRECT;
  	idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
  	offset = idx % ENTRIES_PER_PAGE;
  	idx -= offset;
  
  	dir = shmem_dir_map(topdir);
  	stage = ENTRIES_PER_PAGEPAGE/2;
  	if (idx < ENTRIES_PER_PAGEPAGE/2) {
  		middir = topdir;
  		diroff = idx/ENTRIES_PER_PAGE;
  	} else {
  		dir += ENTRIES_PER_PAGE/2;
  		dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
  		while (stage <= idx)
  			stage += ENTRIES_PER_PAGEPAGE;
  		middir = *dir;
  		if (*dir) {
  			diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
  				ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
623
  			if (!diroff && !offset && upper_limit >= stage) {
1ae700063   Hugh Dickins   [PATCH] holepunch...
624
625
626
627
628
629
630
  				if (needs_lock) {
  					spin_lock(needs_lock);
  					*dir = NULL;
  					spin_unlock(needs_lock);
  					needs_lock = NULL;
  				} else
  					*dir = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
  				nr_pages_to_free++;
  				list_add(&middir->lru, &pages_to_free);
  			}
  			shmem_dir_unmap(dir);
  			dir = shmem_dir_map(middir);
  		} else {
  			diroff = 0;
  			offset = 0;
  			idx = stage;
  		}
  	}
  
  	for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
  		if (unlikely(idx == stage)) {
  			shmem_dir_unmap(dir);
  			dir = shmem_dir_map(topdir) +
  			    ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
  			while (!*dir) {
  				dir++;
  				idx += ENTRIES_PER_PAGEPAGE;
  				if (idx >= limit)
  					goto done1;
  			}
  			stage = idx + ENTRIES_PER_PAGEPAGE;
  			middir = *dir;
1ae700063   Hugh Dickins   [PATCH] holepunch...
656
657
  			if (punch_hole)
  				needs_lock = &info->lock;
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
658
  			if (upper_limit >= stage) {
1ae700063   Hugh Dickins   [PATCH] holepunch...
659
660
661
662
663
664
665
  				if (needs_lock) {
  					spin_lock(needs_lock);
  					*dir = NULL;
  					spin_unlock(needs_lock);
  					needs_lock = NULL;
  				} else
  					*dir = NULL;
a2646d1e6   Hugh Dickins   [PATCH] holepunch...
666
667
668
  				nr_pages_to_free++;
  				list_add(&middir->lru, &pages_to_free);
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
669
670
671
672
673
  			shmem_dir_unmap(dir);
  			cond_resched();
  			dir = shmem_dir_map(middir);
  			diroff = 0;
  		}
1ae700063   Hugh Dickins   [PATCH] holepunch...
674
  		punch_lock = needs_lock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
675
  		subdir = dir[diroff];
1ae700063   Hugh Dickins   [PATCH] holepunch...
676
677
678
679
680
681
682
683
684
685
686
687
  		if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
  			if (needs_lock) {
  				spin_lock(needs_lock);
  				dir[diroff] = NULL;
  				spin_unlock(needs_lock);
  				punch_lock = NULL;
  			} else
  				dir[diroff] = NULL;
  			nr_pages_to_free++;
  			list_add(&subdir->lru, &pages_to_free);
  		}
  		if (subdir && page_private(subdir) /* has swap entries */) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
688
689
690
691
  			size = limit - idx;
  			if (size > ENTRIES_PER_PAGE)
  				size = ENTRIES_PER_PAGE;
  			freed = shmem_map_and_free_swp(subdir,
1ae700063   Hugh Dickins   [PATCH] holepunch...
692
  					offset, size, &dir, punch_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
694
695
  			if (!dir)
  				dir = shmem_dir_map(middir);
  			nr_swaps_freed += freed;
1ae700063   Hugh Dickins   [PATCH] holepunch...
696
  			if (offset || punch_lock) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
  				spin_lock(&info->lock);
1ae700063   Hugh Dickins   [PATCH] holepunch...
698
699
  				set_page_private(subdir,
  					page_private(subdir) - freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
700
  				spin_unlock(&info->lock);
1ae700063   Hugh Dickins   [PATCH] holepunch...
701
702
  			} else
  				BUG_ON(page_private(subdir) != freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703
  		}
1ae700063   Hugh Dickins   [PATCH] holepunch...
704
  		offset = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
705
706
707
708
709
710
711
  	}
  done1:
  	shmem_dir_unmap(dir);
  done2:
  	if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
  		/*
  		 * Call truncate_inode_pages again: racing shmem_unuse_inode
3889e6e76   npiggin@suse.de   tmpfs: convert to...
712
713
714
715
716
  		 * may have swizzled a page in from swap since
  		 * truncate_pagecache or generic_delete_inode did it, before we
  		 * lowered next_index.  Also, though shmem_getpage checks
  		 * i_size before adding to cache, no recheck after: so fix the
  		 * narrow window there too.
16a100190   Hugh Dickins   [PATCH] holepunch...
717
718
719
720
721
  		 *
  		 * Recalling truncate_inode_pages_range and unmap_mapping_range
  		 * every time for punch_hole (which never got a chance to clear
  		 * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
  		 * yet hardly ever necessary: try to optimize them out later.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
722
  		 */
f6b3ec238   Badari Pulavarty   [PATCH] madvise(M...
723
  		truncate_inode_pages_range(inode->i_mapping, start, end);
16a100190   Hugh Dickins   [PATCH] holepunch...
724
725
726
  		if (punch_hole)
  			unmap_mapping_range(inode->i_mapping, start,
  							end - start, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
  	}
  
  	spin_lock(&info->lock);
  	info->flags &= ~SHMEM_TRUNCATE;
  	info->swapped -= nr_swaps_freed;
  	if (nr_pages_to_free)
  		shmem_free_blocks(inode, nr_pages_to_free);
  	shmem_recalc_inode(inode);
  	spin_unlock(&info->lock);
  
  	/*
  	 * Empty swap vector directory pages to be freed?
  	 */
  	if (!list_empty(&pages_to_free)) {
  		pages_to_free.prev->next = NULL;
  		shmem_free_pages(pages_to_free.next);
  	}
  }
  
  static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
  {
  	struct inode *inode = dentry->d_inode;
af5a30d8c   Nick Piggin   fix truncate inod...
749
  	loff_t newsize = attr->ia_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
750
  	int error;
db78b877f   Christoph Hellwig   always call inode...
751
752
753
  	error = inode_change_ok(inode, attr);
  	if (error)
  		return error;
af5a30d8c   Nick Piggin   fix truncate inod...
754
755
  	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)
  					&& newsize != inode->i_size) {
3889e6e76   npiggin@suse.de   tmpfs: convert to...
756
757
758
  		struct page *page = NULL;
  
  		if (newsize < inode->i_size) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
759
760
761
762
763
764
765
  			/*
  			 * If truncating down to a partial page, then
  			 * if that page is already allocated, hold it
  			 * in memory until the truncation is over, so
  			 * truncate_partial_page cannnot miss it were
  			 * it assigned to swap.
  			 */
3889e6e76   npiggin@suse.de   tmpfs: convert to...
766
  			if (newsize & (PAGE_CACHE_SIZE-1)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
767
  				(void) shmem_getpage(inode,
3889e6e76   npiggin@suse.de   tmpfs: convert to...
768
  					newsize >> PAGE_CACHE_SHIFT,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
769
  						&page, SGP_READ, NULL);
d3602444e   Hugh Dickins   shmem_getpage ret...
770
771
  				if (page)
  					unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
774
775
776
777
778
779
  			}
  			/*
  			 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
  			 * detect if any pages might have been added to cache
  			 * after truncate_inode_pages.  But we needn't bother
  			 * if it's being fully truncated to zero-length: the
  			 * nrpages check is efficient enough in that case.
  			 */
3889e6e76   npiggin@suse.de   tmpfs: convert to...
780
  			if (newsize) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
781
782
783
784
785
786
  				struct shmem_inode_info *info = SHMEM_I(inode);
  				spin_lock(&info->lock);
  				info->flags &= ~SHMEM_PAGEIN;
  				spin_unlock(&info->lock);
  			}
  		}
3889e6e76   npiggin@suse.de   tmpfs: convert to...
787

2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
788
789
  		/* XXX(truncate): truncate_setsize should be called last */
  		truncate_setsize(inode, newsize);
3889e6e76   npiggin@suse.de   tmpfs: convert to...
790
791
  		if (page)
  			page_cache_release(page);
3889e6e76   npiggin@suse.de   tmpfs: convert to...
792
  		shmem_truncate_range(inode, newsize, (loff_t)-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
793
  	}
db78b877f   Christoph Hellwig   always call inode...
794
  	setattr_copy(inode, attr);
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
795
  #ifdef CONFIG_TMPFS_POSIX_ACL
db78b877f   Christoph Hellwig   always call inode...
796
  	if (attr->ia_valid & ATTR_MODE)
1c7c474c3   Christoph Hellwig   make generic_acl ...
797
  		error = generic_acl_chmod(inode);
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
798
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
799
800
  	return error;
  }
1f895f75d   Al Viro   switch shmem.c to...
801
  static void shmem_evict_inode(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
802
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
803
  	struct shmem_inode_info *info = SHMEM_I(inode);
3889e6e76   npiggin@suse.de   tmpfs: convert to...
804
  	if (inode->i_mapping->a_ops == &shmem_aops) {
fef266580   Mark Fasheh   [PATCH] update fi...
805
  		truncate_inode_pages(inode->i_mapping, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
806
807
  		shmem_unacct_size(info->flags, inode->i_size);
  		inode->i_size = 0;
3889e6e76   npiggin@suse.de   tmpfs: convert to...
808
  		shmem_truncate_range(inode, 0, (loff_t)-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
809
  		if (!list_empty(&info->swaplist)) {
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
810
  			mutex_lock(&shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
811
  			list_del_init(&info->swaplist);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
812
  			mutex_unlock(&shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
813
814
  		}
  	}
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
815
  	BUG_ON(inode->i_blocks);
5b04c6890   Pavel Emelyanov   shmem: factor out...
816
  	shmem_free_inode(inode->i_sb);
1f895f75d   Al Viro   switch shmem.c to...
817
  	end_writeback(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
  }
  
  static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
  {
  	swp_entry_t *ptr;
  
  	for (ptr = dir; ptr < edir; ptr++) {
  		if (ptr->val == entry.val)
  			return ptr - dir;
  	}
  	return -1;
  }
  
  static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
  {
  	struct inode *inode;
  	unsigned long idx;
  	unsigned long size;
  	unsigned long limit;
  	unsigned long stage;
  	struct page **dir;
  	struct page *subdir;
  	swp_entry_t *ptr;
  	int offset;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
842
  	int error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
843
844
845
846
  
  	idx = 0;
  	ptr = info->i_direct;
  	spin_lock(&info->lock);
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
847
848
849
850
  	if (!info->swapped) {
  		list_del_init(&info->swaplist);
  		goto lost2;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851
852
853
854
855
  	limit = info->next_index;
  	size = limit;
  	if (size > SHMEM_NR_DIRECT)
  		size = SHMEM_NR_DIRECT;
  	offset = shmem_find_swp(entry, ptr, ptr+size);
2e0e26c76   Hugh Dickins   tmpfs: open a win...
856
  	if (offset >= 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
857
  		goto found;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
858
859
860
861
862
863
864
865
866
  	if (!info->i_indirect)
  		goto lost2;
  
  	dir = shmem_dir_map(info->i_indirect);
  	stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
  
  	for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
  		if (unlikely(idx == stage)) {
  			shmem_dir_unmap(dir-1);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
867
868
869
870
871
872
873
874
  			if (cond_resched_lock(&info->lock)) {
  				/* check it has not been truncated */
  				if (limit > info->next_index) {
  					limit = info->next_index;
  					if (idx >= limit)
  						goto lost2;
  				}
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
875
876
877
878
879
880
881
882
883
884
885
886
887
888
  			dir = shmem_dir_map(info->i_indirect) +
  			    ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
  			while (!*dir) {
  				dir++;
  				idx += ENTRIES_PER_PAGEPAGE;
  				if (idx >= limit)
  					goto lost1;
  			}
  			stage = idx + ENTRIES_PER_PAGEPAGE;
  			subdir = *dir;
  			shmem_dir_unmap(dir);
  			dir = shmem_dir_map(subdir);
  		}
  		subdir = *dir;
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
889
  		if (subdir && page_private(subdir)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
890
891
892
893
894
  			ptr = shmem_swp_map(subdir);
  			size = limit - idx;
  			if (size > ENTRIES_PER_PAGE)
  				size = ENTRIES_PER_PAGE;
  			offset = shmem_find_swp(entry, ptr, ptr+size);
2e0e26c76   Hugh Dickins   tmpfs: open a win...
895
  			shmem_swp_unmap(ptr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
896
897
898
899
  			if (offset >= 0) {
  				shmem_dir_unmap(dir);
  				goto found;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
900
901
902
903
904
905
906
907
908
  		}
  	}
  lost1:
  	shmem_dir_unmap(dir-1);
  lost2:
  	spin_unlock(&info->lock);
  	return 0;
  found:
  	idx += offset;
2e0e26c76   Hugh Dickins   tmpfs: open a win...
909
910
  	inode = igrab(&info->vfs_inode);
  	spin_unlock(&info->lock);
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
911
912
  	/*
  	 * Move _head_ to start search for next from here.
1f895f75d   Al Viro   switch shmem.c to...
913
  	 * But be careful: shmem_evict_inode checks list_empty without taking
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
914
915
916
917
918
919
  	 * mutex, and there's an instant in list_move_tail when info->swaplist
  	 * would appear empty, if it were the only one on shmem_swaplist.  We
  	 * could avoid doing it if inode NULL; or use this minor optimization.
  	 */
  	if (shmem_swaplist.next != &info->swaplist)
  		list_move_tail(&shmem_swaplist, &info->swaplist);
2e0e26c76   Hugh Dickins   tmpfs: open a win...
920
921
922
923
924
  	mutex_unlock(&shmem_swaplist_mutex);
  
  	error = 1;
  	if (!inode)
  		goto out;
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
925
  	/*
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
926
927
928
  	 * Charge page using GFP_KERNEL while we can wait.
  	 * Charged back to the user(not to caller) when swap account is used.
  	 * add_to_page_cache() will be called with GFP_NOWAIT.
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
929
  	 */
82369553d   Hugh Dickins   memcgroup: fix ha...
930
  	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
b409f9fcf   Hugh Dickins   tmpfs: radix_tree...
931
932
  	if (error)
  		goto out;
82369553d   Hugh Dickins   memcgroup: fix ha...
933
  	error = radix_tree_preload(GFP_KERNEL);
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
934
935
936
937
  	if (error) {
  		mem_cgroup_uncharge_cache_page(page);
  		goto out;
  	}
b409f9fcf   Hugh Dickins   tmpfs: radix_tree...
938
  	error = 1;
2e0e26c76   Hugh Dickins   tmpfs: open a win...
939
940
941
  
  	spin_lock(&info->lock);
  	ptr = shmem_swp_entry(info, idx, NULL);
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
942
  	if (ptr && ptr->val == entry.val) {
e286781d5   Nick Piggin   mm: speculative p...
943
  		error = add_to_page_cache_locked(page, inode->i_mapping,
b409f9fcf   Hugh Dickins   tmpfs: radix_tree...
944
  						idx, GFP_NOWAIT);
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
945
946
947
  		/* does mem_cgroup_uncharge_cache_page on error */
  	} else	/* we must compensate for our precharge above */
  		mem_cgroup_uncharge_cache_page(page);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
948
949
  	if (error == -EEXIST) {
  		struct page *filepage = find_get_page(inode->i_mapping, idx);
2e0e26c76   Hugh Dickins   tmpfs: open a win...
950
  		error = 1;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
951
952
953
954
955
956
957
958
959
960
961
  		if (filepage) {
  			/*
  			 * There might be a more uptodate page coming down
  			 * from a stacked writepage: forget our swappage if so.
  			 */
  			if (PageUptodate(filepage))
  				error = 0;
  			page_cache_release(filepage);
  		}
  	}
  	if (!error) {
73b1262fa   Hugh Dickins   tmpfs: move swap ...
962
963
  		delete_from_swap_cache(page);
  		set_page_dirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
964
  		info->flags |= SHMEM_PAGEIN;
2e0e26c76   Hugh Dickins   tmpfs: open a win...
965
966
967
  		shmem_swp_set(info, ptr, 0);
  		swap_free(entry);
  		error = 1;	/* not an error, but entry was found */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
968
  	}
2e0e26c76   Hugh Dickins   tmpfs: open a win...
969
970
  	if (ptr)
  		shmem_swp_unmap(ptr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
971
  	spin_unlock(&info->lock);
b409f9fcf   Hugh Dickins   tmpfs: radix_tree...
972
  	radix_tree_preload_end();
2e0e26c76   Hugh Dickins   tmpfs: open a win...
973
974
975
976
977
  out:
  	unlock_page(page);
  	page_cache_release(page);
  	iput(inode);		/* allows for NULL */
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
978
979
980
981
982
983
984
985
986
987
  }
  
  /*
   * shmem_unuse() search for an eventually swapped out shmem page.
   */
  int shmem_unuse(swp_entry_t entry, struct page *page)
  {
  	struct list_head *p, *next;
  	struct shmem_inode_info *info;
  	int found = 0;
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
988
  	mutex_lock(&shmem_swaplist_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
989
990
  	list_for_each_safe(p, next, &shmem_swaplist) {
  		info = list_entry(p, struct shmem_inode_info, swaplist);
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
991
  		found = shmem_unuse_inode(info, entry, page);
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
992
  		cond_resched();
2e0e26c76   Hugh Dickins   tmpfs: open a win...
993
994
  		if (found)
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
995
  	}
cb5f7b9a4   Hugh Dickins   tmpfs: make shmem...
996
  	mutex_unlock(&shmem_swaplist_mutex);
aaa468653   Hugh Dickins   swap_info: note S...
997
998
999
1000
1001
1002
1003
1004
  	/*
  	 * Can some race bring us here?  We've been holding page lock,
  	 * so I think not; but would rather try again later than BUG()
  	 */
  	unlock_page(page);
  	page_cache_release(page);
  out:
  	return (found < 0) ? found : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
  }
  
  /*
   * Move the page from the page cache to the swap cache.
   */
  static int shmem_writepage(struct page *page, struct writeback_control *wbc)
  {
  	struct shmem_inode_info *info;
  	swp_entry_t *entry, swap;
  	struct address_space *mapping;
  	unsigned long index;
  	struct inode *inode;
  
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1019
1020
1021
1022
1023
1024
  	mapping = page->mapping;
  	index = page->index;
  	inode = mapping->host;
  	info = SHMEM_I(inode);
  	if (info->flags & VM_LOCKED)
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1025
  	if (!total_swap_pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026
  		goto redirty;
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1027
1028
1029
1030
1031
  	/*
  	 * shmem_backing_dev_info's capabilities prevent regular writeback or
  	 * sync from ever calling shmem_writepage; but a stacking filesystem
  	 * may use the ->writepage of its underlying filesystem, in which case
  	 * tmpfs should write out to swap only in response to memory pressure,
5b0830cb9   Jens Axboe   writeback: get ri...
1032
1033
1034
  	 * and not for the writeback threads or sync.  However, in those cases,
  	 * we do still want to check if there's a redundant swappage to be
  	 * discarded.
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1035
1036
1037
1038
1039
  	 */
  	if (wbc->for_reclaim)
  		swap = get_swap_page();
  	else
  		swap.val = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1040
  	spin_lock(&info->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1041
1042
1043
1044
1045
  	if (index >= info->next_index) {
  		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
  		goto unlock;
  	}
  	entry = shmem_swp_entry(info, index, NULL);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1046
1047
1048
1049
1050
1051
1052
1053
1054
  	if (entry->val) {
  		/*
  		 * The more uptodate page coming down from a stacked
  		 * writepage should replace our old swappage.
  		 */
  		free_swap_and_cache(*entry);
  		shmem_swp_set(info, entry, 0);
  	}
  	shmem_recalc_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1055

d9fe526a8   Hugh Dickins   tmpfs: allow file...
1056
  	if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
73b1262fa   Hugh Dickins   tmpfs: move swap ...
1057
  		remove_from_page_cache(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1058
1059
  		shmem_swp_set(info, entry, swap.val);
  		shmem_swp_unmap(entry);
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
1060
1061
1062
1063
  		if (list_empty(&info->swaplist))
  			inode = igrab(inode);
  		else
  			inode = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1064
  		spin_unlock(&info->lock);
aaa468653   Hugh Dickins   swap_info: note S...
1065
  		swap_shmem_alloc(swap);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1066
  		BUG_ON(page_mapped(page));
73b1262fa   Hugh Dickins   tmpfs: move swap ...
1067
  		page_cache_release(page);	/* pagecache ref */
9fab5619b   Hugh Dickins   shmem: writepage ...
1068
  		swap_writepage(page, wbc);
1b1b32f2c   Hugh Dickins   tmpfs: fix shmem_...
1069
1070
1071
1072
1073
1074
1075
  		if (inode) {
  			mutex_lock(&shmem_swaplist_mutex);
  			/* move instead of add in case we're racing */
  			list_move_tail(&info->swaplist, &shmem_swaplist);
  			mutex_unlock(&shmem_swaplist_mutex);
  			iput(inode);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1076
1077
1078
1079
1080
1081
  		return 0;
  	}
  
  	shmem_swp_unmap(entry);
  unlock:
  	spin_unlock(&info->lock);
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
1082
1083
1084
1085
  	/*
  	 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
  	 * clear SWAP_HAS_CACHE flag.
  	 */
cb4b86ba4   KAMEZAWA Hiroyuki   mm: add swap cach...
1086
  	swapcache_free(swap, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1087
1088
  redirty:
  	set_page_dirty(page);
d9fe526a8   Hugh Dickins   tmpfs: allow file...
1089
1090
1091
1092
  	if (wbc->for_reclaim)
  		return AOP_WRITEPAGE_ACTIVATE;	/* Return with page locked */
  	unlock_page(page);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
1094
1095
  }
  
  #ifdef CONFIG_NUMA
680d794ba   akpm@linux-foundation.org   mount options: fi...
1096
  #ifdef CONFIG_TMPFS
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1097
  static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
680d794ba   akpm@linux-foundation.org   mount options: fi...
1098
  {
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
1099
  	char buffer[64];
680d794ba   akpm@linux-foundation.org   mount options: fi...
1100

71fe804b6   Lee Schermerhorn   mempolicy: use st...
1101
  	if (!mpol || mpol->mode == MPOL_DEFAULT)
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
1102
  		return;		/* show nothing */
680d794ba   akpm@linux-foundation.org   mount options: fi...
1103

71fe804b6   Lee Schermerhorn   mempolicy: use st...
1104
  	mpol_to_str(buffer, sizeof(buffer), mpol, 1);
095f1fc4e   Lee Schermerhorn   mempolicy: rework...
1105
1106
  
  	seq_printf(seq, ",mpol=%s", buffer);
680d794ba   akpm@linux-foundation.org   mount options: fi...
1107
  }
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
  
  static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  {
  	struct mempolicy *mpol = NULL;
  	if (sbinfo->mpol) {
  		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
  		mpol = sbinfo->mpol;
  		mpol_get(mpol);
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	return mpol;
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
1120
  #endif /* CONFIG_TMPFS */
02098feaa   Hugh Dickins   swapin needs gfp_...
1121
1122
  static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
  			struct shmem_inode_info *info, unsigned long idx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1123
  {
52cd3b074   Lee Schermerhorn   mempolicy: rework...
1124
  	struct mempolicy mpol, *spol;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
  	struct vm_area_struct pvma;
c4cc6d07b   Hugh Dickins   swapin_readahead:...
1126
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1127

52cd3b074   Lee Schermerhorn   mempolicy: rework...
1128
1129
  	spol = mpol_cond_copy(&mpol,
  				mpol_shared_policy_lookup(&info->policy, idx));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1130
  	/* Create a pseudo vma that just contains the policy */
c4cc6d07b   Hugh Dickins   swapin_readahead:...
1131
  	pvma.vm_start = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1132
  	pvma.vm_pgoff = idx;
c4cc6d07b   Hugh Dickins   swapin_readahead:...
1133
  	pvma.vm_ops = NULL;
52cd3b074   Lee Schermerhorn   mempolicy: rework...
1134
  	pvma.vm_policy = spol;
02098feaa   Hugh Dickins   swapin needs gfp_...
1135
  	page = swapin_readahead(entry, gfp, &pvma, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1136
1137
  	return page;
  }
02098feaa   Hugh Dickins   swapin needs gfp_...
1138
1139
  static struct page *shmem_alloc_page(gfp_t gfp,
  			struct shmem_inode_info *info, unsigned long idx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1140
1141
  {
  	struct vm_area_struct pvma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142

c4cc6d07b   Hugh Dickins   swapin_readahead:...
1143
1144
  	/* Create a pseudo vma that just contains the policy */
  	pvma.vm_start = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1145
  	pvma.vm_pgoff = idx;
c4cc6d07b   Hugh Dickins   swapin_readahead:...
1146
1147
  	pvma.vm_ops = NULL;
  	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
52cd3b074   Lee Schermerhorn   mempolicy: rework...
1148
1149
1150
1151
1152
  
  	/*
  	 * alloc_page_vma() will drop the shared policy reference
  	 */
  	return alloc_page_vma(gfp, &pvma, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1153
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
1154
1155
  #else /* !CONFIG_NUMA */
  #ifdef CONFIG_TMPFS
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1156
  static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
680d794ba   akpm@linux-foundation.org   mount options: fi...
1157
1158
1159
  {
  }
  #endif /* CONFIG_TMPFS */
02098feaa   Hugh Dickins   swapin needs gfp_...
1160
1161
  static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
  			struct shmem_inode_info *info, unsigned long idx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1162
  {
02098feaa   Hugh Dickins   swapin needs gfp_...
1163
  	return swapin_readahead(entry, gfp, NULL, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1164
  }
02098feaa   Hugh Dickins   swapin needs gfp_...
1165
1166
  static inline struct page *shmem_alloc_page(gfp_t gfp,
  			struct shmem_inode_info *info, unsigned long idx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1167
  {
e84e2e132   Hugh Dickins   tmpfs: restore mi...
1168
  	return alloc_page(gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1169
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
1170
  #endif /* CONFIG_NUMA */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1171

71fe804b6   Lee Schermerhorn   mempolicy: use st...
1172
1173
1174
1175
1176
1177
  #if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
  static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  {
  	return NULL;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
  /*
   * shmem_getpage - either get the page from swap or allocate a new one
   *
   * If we allocate a new one we do not mark it dirty. That's up to the
   * vm. If we swap it in we mark it dirty since we also free the swap
   * entry since a page cannot live in both the swap and page cache
   */
  static int shmem_getpage(struct inode *inode, unsigned long idx,
  			struct page **pagep, enum sgp_type sgp, int *type)
  {
  	struct address_space *mapping = inode->i_mapping;
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	struct shmem_sb_info *sbinfo;
  	struct page *filepage = *pagep;
  	struct page *swappage;
ff36b8016   Shaohua Li   shmem: reduce pag...
1193
  	struct page *prealloc_page = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1194
1195
  	swp_entry_t *entry;
  	swp_entry_t swap;
02098feaa   Hugh Dickins   swapin needs gfp_...
1196
  	gfp_t gfp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1197
1198
1199
1200
  	int error;
  
  	if (idx >= SHMEM_MAX_INDEX)
  		return -EFBIG;
54cb8821d   Nick Piggin   mm: merge populat...
1201
1202
  
  	if (type)
83c54070e   Nick Piggin   mm: fault feedbac...
1203
  		*type = 0;
54cb8821d   Nick Piggin   mm: merge populat...
1204

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1205
1206
1207
1208
  	/*
  	 * Normally, filepage is NULL on entry, and either found
  	 * uptodate immediately, or allocated and zeroed, or read
  	 * in under swappage, which is then assigned to filepage.
5402b976a   Hugh Dickins   shmem_file_write ...
1209
  	 * But shmem_readpage (required for splice) passes in a locked
ae9764164   Hugh Dickins   shmem: convert to...
1210
1211
  	 * filepage, which may be found not uptodate by other callers
  	 * too, and may need to be copied from the swappage read in.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1212
1213
1214
1215
1216
1217
  	 */
  repeat:
  	if (!filepage)
  		filepage = find_lock_page(mapping, idx);
  	if (filepage && PageUptodate(filepage))
  		goto done;
02098feaa   Hugh Dickins   swapin needs gfp_...
1218
  	gfp = mapping_gfp_mask(mapping);
b409f9fcf   Hugh Dickins   tmpfs: radix_tree...
1219
1220
1221
1222
1223
1224
1225
1226
1227
  	if (!filepage) {
  		/*
  		 * Try to preload while we can wait, to not make a habit of
  		 * draining atomic reserves; but don't latch on to this cpu.
  		 */
  		error = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
  		if (error)
  			goto failed;
  		radix_tree_preload_end();
ff36b8016   Shaohua Li   shmem: reduce pag...
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
  		if (sgp != SGP_READ && !prealloc_page) {
  			/* We don't care if this fails */
  			prealloc_page = shmem_alloc_page(gfp, info, idx);
  			if (prealloc_page) {
  				if (mem_cgroup_cache_charge(prealloc_page,
  						current->mm, GFP_KERNEL)) {
  					page_cache_release(prealloc_page);
  					prealloc_page = NULL;
  				}
  			}
  		}
b409f9fcf   Hugh Dickins   tmpfs: radix_tree...
1239
  	}
ff36b8016   Shaohua Li   shmem: reduce pag...
1240
  	error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
  
  	spin_lock(&info->lock);
  	shmem_recalc_inode(inode);
  	entry = shmem_swp_alloc(info, idx, sgp);
  	if (IS_ERR(entry)) {
  		spin_unlock(&info->lock);
  		error = PTR_ERR(entry);
  		goto failed;
  	}
  	swap = *entry;
  
  	if (swap.val) {
  		/* Look it up and read it in.. */
  		swappage = lookup_swap_cache(swap);
  		if (!swappage) {
  			shmem_swp_unmap(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1257
  			/* here we actually do the io */
83c54070e   Nick Piggin   mm: fault feedbac...
1258
  			if (type && !(*type & VM_FAULT_MAJOR)) {
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
1259
  				__count_vm_event(PGMAJFAULT);
83c54070e   Nick Piggin   mm: fault feedbac...
1260
  				*type |= VM_FAULT_MAJOR;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261
  			}
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
1262
  			spin_unlock(&info->lock);
02098feaa   Hugh Dickins   swapin needs gfp_...
1263
  			swappage = shmem_swapin(swap, gfp, info, idx);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
  			if (!swappage) {
  				spin_lock(&info->lock);
  				entry = shmem_swp_alloc(info, idx, sgp);
  				if (IS_ERR(entry))
  					error = PTR_ERR(entry);
  				else {
  					if (entry->val == swap.val)
  						error = -ENOMEM;
  					shmem_swp_unmap(entry);
  				}
  				spin_unlock(&info->lock);
  				if (error)
  					goto failed;
  				goto repeat;
  			}
  			wait_on_page_locked(swappage);
  			page_cache_release(swappage);
  			goto repeat;
  		}
  
  		/* We have to do this with page locked to prevent races */
529ae9aaa   Nick Piggin   mm: rename page t...
1285
  		if (!trylock_page(swappage)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
  			shmem_swp_unmap(entry);
  			spin_unlock(&info->lock);
  			wait_on_page_locked(swappage);
  			page_cache_release(swappage);
  			goto repeat;
  		}
  		if (PageWriteback(swappage)) {
  			shmem_swp_unmap(entry);
  			spin_unlock(&info->lock);
  			wait_on_page_writeback(swappage);
  			unlock_page(swappage);
  			page_cache_release(swappage);
  			goto repeat;
  		}
  		if (!PageUptodate(swappage)) {
  			shmem_swp_unmap(entry);
  			spin_unlock(&info->lock);
  			unlock_page(swappage);
  			page_cache_release(swappage);
  			error = -EIO;
  			goto failed;
  		}
  
  		if (filepage) {
  			shmem_swp_set(info, entry, 0);
  			shmem_swp_unmap(entry);
  			delete_from_swap_cache(swappage);
  			spin_unlock(&info->lock);
  			copy_highpage(filepage, swappage);
  			unlock_page(swappage);
  			page_cache_release(swappage);
  			flush_dcache_page(filepage);
  			SetPageUptodate(filepage);
  			set_page_dirty(filepage);
  			swap_free(swap);
e286781d5   Nick Piggin   mm: speculative p...
1321
1322
  		} else if (!(error = add_to_page_cache_locked(swappage, mapping,
  					idx, GFP_NOWAIT))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1323
1324
1325
  			info->flags |= SHMEM_PAGEIN;
  			shmem_swp_set(info, entry, 0);
  			shmem_swp_unmap(entry);
73b1262fa   Hugh Dickins   tmpfs: move swap ...
1326
  			delete_from_swap_cache(swappage);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1327
1328
  			spin_unlock(&info->lock);
  			filepage = swappage;
73b1262fa   Hugh Dickins   tmpfs: move swap ...
1329
  			set_page_dirty(filepage);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1330
1331
1332
1333
  			swap_free(swap);
  		} else {
  			shmem_swp_unmap(entry);
  			spin_unlock(&info->lock);
82369553d   Hugh Dickins   memcgroup: fix ha...
1334
  			if (error == -ENOMEM) {
ae3abae64   Daisuke Nishimura   memcg: fix mem_cg...
1335
1336
1337
1338
1339
1340
  				/*
  				 * reclaim from proper memory cgroup and
  				 * call memcg's OOM if needed.
  				 */
  				error = mem_cgroup_shmem_charge_fallback(
  								swappage,
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1341
  								current->mm,
c9b0ed514   KAMEZAWA Hiroyuki   memcg: helper fun...
1342
  								gfp);
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1343
1344
1345
  				if (error) {
  					unlock_page(swappage);
  					page_cache_release(swappage);
82369553d   Hugh Dickins   memcgroup: fix ha...
1346
  					goto failed;
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1347
  				}
82369553d   Hugh Dickins   memcgroup: fix ha...
1348
  			}
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1349
1350
  			unlock_page(swappage);
  			page_cache_release(swappage);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1351
1352
1353
1354
1355
1356
  			goto repeat;
  		}
  	} else if (sgp == SGP_READ && !filepage) {
  		shmem_swp_unmap(entry);
  		filepage = find_get_page(mapping, idx);
  		if (filepage &&
529ae9aaa   Nick Piggin   mm: rename page t...
1357
  		    (!PageUptodate(filepage) || !trylock_page(filepage))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
  			spin_unlock(&info->lock);
  			wait_on_page_locked(filepage);
  			page_cache_release(filepage);
  			filepage = NULL;
  			goto repeat;
  		}
  		spin_unlock(&info->lock);
  	} else {
  		shmem_swp_unmap(entry);
  		sbinfo = SHMEM_SB(inode->i_sb);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
1368
  		if (sbinfo->max_blocks) {
7e496299d   Tim Chen   tmpfs: make tmpfs...
1369
  			if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) ||
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1370
  			    shmem_acct_block(info->flags)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1371
1372
1373
1374
  				spin_unlock(&info->lock);
  				error = -ENOSPC;
  				goto failed;
  			}
7e496299d   Tim Chen   tmpfs: make tmpfs...
1375
1376
  			percpu_counter_inc(&sbinfo->used_blocks);
  			spin_lock(&inode->i_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1377
  			inode->i_blocks += BLOCKS_PER_PAGE;
7e496299d   Tim Chen   tmpfs: make tmpfs...
1378
  			spin_unlock(&inode->i_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1379
1380
1381
1382
1383
1384
1385
  		} else if (shmem_acct_block(info->flags)) {
  			spin_unlock(&info->lock);
  			error = -ENOSPC;
  			goto failed;
  		}
  
  		if (!filepage) {
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1386
  			int ret;
ff36b8016   Shaohua Li   shmem: reduce pag...
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
  			if (!prealloc_page) {
  				spin_unlock(&info->lock);
  				filepage = shmem_alloc_page(gfp, info, idx);
  				if (!filepage) {
  					shmem_unacct_blocks(info->flags, 1);
  					shmem_free_blocks(inode, 1);
  					error = -ENOMEM;
  					goto failed;
  				}
  				SetPageSwapBacked(filepage);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1397

ff36b8016   Shaohua Li   shmem: reduce pag...
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
  				/*
  				 * Precharge page while we can wait, compensate
  				 * after
  				 */
  				error = mem_cgroup_cache_charge(filepage,
  					current->mm, GFP_KERNEL);
  				if (error) {
  					page_cache_release(filepage);
  					shmem_unacct_blocks(info->flags, 1);
  					shmem_free_blocks(inode, 1);
  					filepage = NULL;
  					goto failed;
  				}
  
  				spin_lock(&info->lock);
  			} else {
  				filepage = prealloc_page;
  				prealloc_page = NULL;
  				SetPageSwapBacked(filepage);
82369553d   Hugh Dickins   memcgroup: fix ha...
1417
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1418
1419
1420
1421
1422
1423
1424
  			entry = shmem_swp_alloc(info, idx, sgp);
  			if (IS_ERR(entry))
  				error = PTR_ERR(entry);
  			else {
  				swap = *entry;
  				shmem_swp_unmap(entry);
  			}
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
  			ret = error || swap.val;
  			if (ret)
  				mem_cgroup_uncharge_cache_page(filepage);
  			else
  				ret = add_to_page_cache_lru(filepage, mapping,
  						idx, GFP_NOWAIT);
  			/*
  			 * At add_to_page_cache_lru() failure, uncharge will
  			 * be done automatically.
  			 */
  			if (ret) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
  				spin_unlock(&info->lock);
  				page_cache_release(filepage);
  				shmem_unacct_blocks(info->flags, 1);
  				shmem_free_blocks(inode, 1);
  				filepage = NULL;
  				if (error)
  					goto failed;
  				goto repeat;
  			}
  			info->flags |= SHMEM_PAGEIN;
  		}
  
  		info->alloced++;
  		spin_unlock(&info->lock);
e84e2e132   Hugh Dickins   tmpfs: restore mi...
1450
  		clear_highpage(filepage);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1451
1452
  		flush_dcache_page(filepage);
  		SetPageUptodate(filepage);
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
1453
1454
  		if (sgp == SGP_DIRTY)
  			set_page_dirty(filepage);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1455
1456
  	}
  done:
d3602444e   Hugh Dickins   shmem_getpage ret...
1457
  	*pagep = filepage;
ff36b8016   Shaohua Li   shmem: reduce pag...
1458
1459
  	error = 0;
  	goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1460
1461
1462
1463
1464
1465
  
  failed:
  	if (*pagep != filepage) {
  		unlock_page(filepage);
  		page_cache_release(filepage);
  	}
ff36b8016   Shaohua Li   shmem: reduce pag...
1466
1467
1468
1469
1470
  out:
  	if (prealloc_page) {
  		mem_cgroup_uncharge_cache_page(prealloc_page);
  		page_cache_release(prealloc_page);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1471
1472
  	return error;
  }
d0217ac04   Nick Piggin   mm: fault feedbac...
1473
  static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1474
  {
d3ac7f892   Josef "Jeff" Sipek   [PATCH] mm: chang...
1475
  	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1476
  	int error;
d0217ac04   Nick Piggin   mm: fault feedbac...
1477
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1478

d0217ac04   Nick Piggin   mm: fault feedbac...
1479
1480
  	if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
  		return VM_FAULT_SIGBUS;
d00806b18   Nick Piggin   mm: fix fault vs ...
1481

27d54b398   Hugh Dickins   shmem: SGP_QUICK ...
1482
  	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
d0217ac04   Nick Piggin   mm: fault feedbac...
1483
1484
  	if (error)
  		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1485

83c54070e   Nick Piggin   mm: fault feedbac...
1486
  	return ret | VM_FAULT_LOCKED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1487
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1488
  #ifdef CONFIG_NUMA
d8dc74f21   Adrian Bunk   mm/shmem.c: make ...
1489
  static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1490
  {
d3ac7f892   Josef "Jeff" Sipek   [PATCH] mm: chang...
1491
  	struct inode *i = vma->vm_file->f_path.dentry->d_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1492
1493
  	return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
  }
d8dc74f21   Adrian Bunk   mm/shmem.c: make ...
1494
1495
  static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
  					  unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1496
  {
d3ac7f892   Josef "Jeff" Sipek   [PATCH] mm: chang...
1497
  	struct inode *i = vma->vm_file->f_path.dentry->d_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1498
1499
1500
1501
1502
1503
1504
1505
1506
  	unsigned long idx;
  
  	idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
  	return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
  }
  #endif
  
  int shmem_lock(struct file *file, int lock, struct user_struct *user)
  {
d3ac7f892   Josef "Jeff" Sipek   [PATCH] mm: chang...
1507
  	struct inode *inode = file->f_path.dentry->d_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1508
1509
1510
1511
1512
1513
1514
1515
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	int retval = -ENOMEM;
  
  	spin_lock(&info->lock);
  	if (lock && !(info->flags & VM_LOCKED)) {
  		if (!user_shm_lock(inode->i_size, user))
  			goto out_nomem;
  		info->flags |= VM_LOCKED;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
1516
  		mapping_set_unevictable(file->f_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1517
1518
1519
1520
  	}
  	if (!lock && (info->flags & VM_LOCKED) && user) {
  		user_shm_unlock(inode->i_size, user);
  		info->flags &= ~VM_LOCKED;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
1521
1522
  		mapping_clear_unevictable(file->f_mapping);
  		scan_mapping_unevictable_pages(file->f_mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1523
1524
  	}
  	retval = 0;
89e004ea5   Lee Schermerhorn   SHM_LOCKED pages ...
1525

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1526
1527
1528
1529
  out_nomem:
  	spin_unlock(&info->lock);
  	return retval;
  }
9b83a6a85   Adrian Bunk   [PATCH] mm/{,tiny...
1530
  static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1531
1532
1533
  {
  	file_accessed(file);
  	vma->vm_ops = &shmem_vm_ops;
d0217ac04   Nick Piggin   mm: fault feedbac...
1534
  	vma->vm_flags |= VM_CAN_NONLINEAR;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1535
1536
  	return 0;
  }
454abafe9   Dmitry Monakhov   ramfs: replace in...
1537
1538
  static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
  				     int mode, dev_t dev, unsigned long flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1539
1540
1541
1542
  {
  	struct inode *inode;
  	struct shmem_inode_info *info;
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
5b04c6890   Pavel Emelyanov   shmem: factor out...
1543
1544
  	if (shmem_reserve_inode(sb))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1545
1546
1547
  
  	inode = new_inode(sb);
  	if (inode) {
454abafe9   Dmitry Monakhov   ramfs: replace in...
1548
  		inode_init_owner(inode, dir, mode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1549
  		inode->i_blocks = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1550
1551
  		inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
  		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
1552
  		inode->i_generation = get_seconds();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1553
1554
1555
  		info = SHMEM_I(inode);
  		memset(info, 0, (char *)inode - (char *)info);
  		spin_lock_init(&info->lock);
0b0a0806b   Hugh Dickins   shmem: fix shared...
1556
  		info->flags = flags & VM_NORESERVE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1557
  		INIT_LIST_HEAD(&info->swaplist);
72c04902d   Al Viro   Get "no acls for ...
1558
  		cache_no_acl(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1559
1560
1561
  
  		switch (mode & S_IFMT) {
  		default:
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
1562
  			inode->i_op = &shmem_special_inode_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1563
1564
1565
  			init_special_inode(inode, mode, dev);
  			break;
  		case S_IFREG:
14fcc23fd   Hugh Dickins   tmpfs: fix kernel...
1566
  			inode->i_mapping->a_ops = &shmem_aops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1567
1568
  			inode->i_op = &shmem_inode_operations;
  			inode->i_fop = &shmem_file_operations;
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1569
1570
  			mpol_shared_policy_init(&info->policy,
  						 shmem_get_sbmpol(sbinfo));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1571
1572
  			break;
  		case S_IFDIR:
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
1573
  			inc_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
  			/* Some things misbehave if size == 0 on a directory */
  			inode->i_size = 2 * BOGO_DIRENT_SIZE;
  			inode->i_op = &shmem_dir_inode_operations;
  			inode->i_fop = &simple_dir_operations;
  			break;
  		case S_IFLNK:
  			/*
  			 * Must not load anything in the rbtree,
  			 * mpol_free_shared_policy will not be called.
  			 */
71fe804b6   Lee Schermerhorn   mempolicy: use st...
1584
  			mpol_shared_policy_init(&info->policy, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1585
1586
  			break;
  		}
5b04c6890   Pavel Emelyanov   shmem: factor out...
1587
1588
  	} else
  		shmem_free_inode(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1589
1590
1591
1592
  	return inode;
  }
  
  #ifdef CONFIG_TMPFS
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
1593
1594
  static const struct inode_operations shmem_symlink_inode_operations;
  static const struct inode_operations shmem_symlink_inline_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1595
1596
  
  /*
800d15a53   Nick Piggin   implement simple ...
1597
   * Normally tmpfs avoids the use of shmem_readpage and shmem_write_begin;
ae9764164   Hugh Dickins   shmem: convert to...
1598
1599
   * but providing them allows a tmpfs file to be used for splice, sendfile, and
   * below the loop driver, in the generic fashion that many filesystems support.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1600
   */
ae9764164   Hugh Dickins   shmem: convert to...
1601
1602
1603
1604
1605
1606
1607
  static int shmem_readpage(struct file *file, struct page *page)
  {
  	struct inode *inode = page->mapping->host;
  	int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
  	unlock_page(page);
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1608
  static int
800d15a53   Nick Piggin   implement simple ...
1609
1610
1611
  shmem_write_begin(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1612
  {
800d15a53   Nick Piggin   implement simple ...
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
  	struct inode *inode = mapping->host;
  	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
  	*pagep = NULL;
  	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
  }
  
  static int
  shmem_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
  {
  	struct inode *inode = mapping->host;
d3602444e   Hugh Dickins   shmem_getpage ret...
1625
1626
  	if (pos + copied > inode->i_size)
  		i_size_write(inode, pos + copied);
800d15a53   Nick Piggin   implement simple ...
1627
  	set_page_dirty(page);
6746aff74   Wu Fengguang   HWPOISON: shmem: ...
1628
  	unlock_page(page);
800d15a53   Nick Piggin   implement simple ...
1629
  	page_cache_release(page);
800d15a53   Nick Piggin   implement simple ...
1630
  	return copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1631
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1632
1633
  static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
  {
d3ac7f892   Josef "Jeff" Sipek   [PATCH] mm: chang...
1634
  	struct inode *inode = filp->f_path.dentry->d_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1635
1636
  	struct address_space *mapping = inode->i_mapping;
  	unsigned long index, offset;
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
1637
1638
1639
1640
1641
1642
1643
1644
1645
  	enum sgp_type sgp = SGP_READ;
  
  	/*
  	 * Might this read be for a stacking filesystem?  Then when reading
  	 * holes of a sparse file, we actually need to allocate those pages,
  	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
  	 */
  	if (segment_eq(get_fs(), KERNEL_DS))
  		sgp = SGP_DIRTY;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
  
  	index = *ppos >> PAGE_CACHE_SHIFT;
  	offset = *ppos & ~PAGE_CACHE_MASK;
  
  	for (;;) {
  		struct page *page = NULL;
  		unsigned long end_index, nr, ret;
  		loff_t i_size = i_size_read(inode);
  
  		end_index = i_size >> PAGE_CACHE_SHIFT;
  		if (index > end_index)
  			break;
  		if (index == end_index) {
  			nr = i_size & ~PAGE_CACHE_MASK;
  			if (nr <= offset)
  				break;
  		}
a0ee5ec52   Hugh Dickins   tmpfs: allocate o...
1663
  		desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1664
1665
1666
1667
1668
  		if (desc->error) {
  			if (desc->error == -EINVAL)
  				desc->error = 0;
  			break;
  		}
d3602444e   Hugh Dickins   shmem_getpage ret...
1669
1670
  		if (page)
  			unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1671
1672
1673
  
  		/*
  		 * We must evaluate after, since reads (unlike writes)
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
1674
  		 * are called without i_mutex protection against truncate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
  		 */
  		nr = PAGE_CACHE_SIZE;
  		i_size = i_size_read(inode);
  		end_index = i_size >> PAGE_CACHE_SHIFT;
  		if (index == end_index) {
  			nr = i_size & ~PAGE_CACHE_MASK;
  			if (nr <= offset) {
  				if (page)
  					page_cache_release(page);
  				break;
  			}
  		}
  		nr -= offset;
  
  		if (page) {
  			/*
  			 * If users can be writing to this page using arbitrary
  			 * virtual addresses, take care about potential aliasing
  			 * before reading the page on the kernel side.
  			 */
  			if (mapping_writably_mapped(mapping))
  				flush_dcache_page(page);
  			/*
  			 * Mark the page accessed if we read the beginning.
  			 */
  			if (!offset)
  				mark_page_accessed(page);
b5810039a   Nick Piggin   [PATCH] core remo...
1702
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1703
  			page = ZERO_PAGE(0);
b5810039a   Nick Piggin   [PATCH] core remo...
1704
1705
  			page_cache_get(page);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
  
  		/*
  		 * Ok, we have the page, and it's up-to-date, so
  		 * now we can copy it to user space...
  		 *
  		 * The actor routine returns how many bytes were actually used..
  		 * NOTE! This may not be the same as how much of a user buffer
  		 * we filled up (we may be padding etc), so we can only update
  		 * "pos" here (the actor routine has to update the user buffer
  		 * pointers and the remaining count).
  		 */
  		ret = actor(desc, page, offset, nr);
  		offset += ret;
  		index += offset >> PAGE_CACHE_SHIFT;
  		offset &= ~PAGE_CACHE_MASK;
  
  		page_cache_release(page);
  		if (ret != nr || !desc->count)
  			break;
  
  		cond_resched();
  	}
  
  	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
  	file_accessed(filp);
  }
bcd78e496   Hugh Dickins   tmpfs: support aio
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
  static ssize_t shmem_file_aio_read(struct kiocb *iocb,
  		const struct iovec *iov, unsigned long nr_segs, loff_t pos)
  {
  	struct file *filp = iocb->ki_filp;
  	ssize_t retval;
  	unsigned long seg;
  	size_t count;
  	loff_t *ppos = &iocb->ki_pos;
  
  	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
  	if (retval)
  		return retval;
  
  	for (seg = 0; seg < nr_segs; seg++) {
  		read_descriptor_t desc;
  
  		desc.written = 0;
  		desc.arg.buf = iov[seg].iov_base;
  		desc.count = iov[seg].iov_len;
  		if (desc.count == 0)
  			continue;
  		desc.error = 0;
  		do_shmem_file_read(filp, ppos, &desc, file_read_actor);
  		retval += desc.written;
  		if (desc.error) {
  			retval = retval ?: desc.error;
  			break;
  		}
  		if (desc.count > 0)
  			break;
  	}
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1764
  }
726c33422   David Howells   [PATCH] VFS: Perm...
1765
  static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1766
  {
726c33422   David Howells   [PATCH] VFS: Perm...
1767
  	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1768
1769
1770
1771
  
  	buf->f_type = TMPFS_MAGIC;
  	buf->f_bsize = PAGE_CACHE_SIZE;
  	buf->f_namelen = NAME_MAX;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
1772
  	if (sbinfo->max_blocks) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1773
  		buf->f_blocks = sbinfo->max_blocks;
7e496299d   Tim Chen   tmpfs: make tmpfs...
1774
1775
  		buf->f_bavail = buf->f_bfree =
  				sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
1776
1777
  	}
  	if (sbinfo->max_inodes) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1778
1779
  		buf->f_files = sbinfo->max_inodes;
  		buf->f_ffree = sbinfo->free_inodes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
  	}
  	/* else leave those fields 0 like simple_statfs */
  	return 0;
  }
  
  /*
   * File creation. Allocate an inode, and we're done..
   */
  static int
  shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
  {
0b0a0806b   Hugh Dickins   shmem: fix shared...
1791
  	struct inode *inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1792
  	int error = -ENOSPC;
454abafe9   Dmitry Monakhov   ramfs: replace in...
1793
  	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1794
  	if (inode) {
570bc1c2e   Stephen Smalley   [PATCH] tmpfs: En...
1795
1796
1797
1798
1799
1800
1801
  		error = security_inode_init_security(inode, dir, NULL, NULL,
  						     NULL);
  		if (error) {
  			if (error != -EOPNOTSUPP) {
  				iput(inode);
  				return error;
  			}
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
1802
  		}
1c7c474c3   Christoph Hellwig   make generic_acl ...
1803
1804
  #ifdef CONFIG_TMPFS_POSIX_ACL
  		error = generic_acl_init(inode, dir);
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
1805
1806
1807
  		if (error) {
  			iput(inode);
  			return error;
570bc1c2e   Stephen Smalley   [PATCH] tmpfs: En...
1808
  		}
718deb6b6   Al Viro   Fix breakage in s...
1809
1810
  #else
  		error = 0;
1c7c474c3   Christoph Hellwig   make generic_acl ...
1811
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1812
1813
1814
1815
  		dir->i_size += BOGO_DIRENT_SIZE;
  		dir->i_ctime = dir->i_mtime = CURRENT_TIME;
  		d_instantiate(dentry, inode);
  		dget(dentry); /* Extra count - pin the dentry in core */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
  	}
  	return error;
  }
  
  static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
  {
  	int error;
  
  	if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
  		return error;
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
1826
  	inc_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
  	return 0;
  }
  
  static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
  		struct nameidata *nd)
  {
  	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
  }
  
  /*
   * Link a file..
   */
  static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  {
  	struct inode *inode = old_dentry->d_inode;
5b04c6890   Pavel Emelyanov   shmem: factor out...
1842
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1843
1844
1845
1846
1847
1848
  
  	/*
  	 * No ordinary (disk based) filesystem counts links as inodes;
  	 * but each new link needs a new dentry, pinning lowmem, and
  	 * tmpfs dentries cannot be pruned until they are unlinked.
  	 */
5b04c6890   Pavel Emelyanov   shmem: factor out...
1849
1850
1851
  	ret = shmem_reserve_inode(inode->i_sb);
  	if (ret)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1852
1853
1854
  
  	dir->i_size += BOGO_DIRENT_SIZE;
  	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
1855
  	inc_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1856
1857
1858
  	atomic_inc(&inode->i_count);	/* New dentry reference */
  	dget(dentry);		/* Extra pinning count for the created dentry */
  	d_instantiate(dentry, inode);
5b04c6890   Pavel Emelyanov   shmem: factor out...
1859
1860
  out:
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1861
1862
1863
1864
1865
  }
  
  static int shmem_unlink(struct inode *dir, struct dentry *dentry)
  {
  	struct inode *inode = dentry->d_inode;
5b04c6890   Pavel Emelyanov   shmem: factor out...
1866
1867
  	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
  		shmem_free_inode(inode->i_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1868
1869
1870
  
  	dir->i_size -= BOGO_DIRENT_SIZE;
  	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
1871
  	drop_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1872
1873
1874
1875
1876
1877
1878
1879
  	dput(dentry);	/* Undo the count from "create" - this does all the work */
  	return 0;
  }
  
  static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
  {
  	if (!simple_empty(dentry))
  		return -ENOTEMPTY;
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
1880
1881
  	drop_nlink(dentry->d_inode);
  	drop_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
  	return shmem_unlink(dir, dentry);
  }
  
  /*
   * The VFS layer already does all the dentry stuff for rename,
   * we just have to decrement the usage count for the target if
   * it exists so that the VFS layer correctly free's it when it
   * gets overwritten.
   */
  static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
  {
  	struct inode *inode = old_dentry->d_inode;
  	int they_are_dirs = S_ISDIR(inode->i_mode);
  
  	if (!simple_empty(new_dentry))
  		return -ENOTEMPTY;
  
  	if (new_dentry->d_inode) {
  		(void) shmem_unlink(new_dir, new_dentry);
  		if (they_are_dirs)
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
1902
  			drop_nlink(old_dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1903
  	} else if (they_are_dirs) {
9a53c3a78   Dave Hansen   [PATCH] r/o bind ...
1904
  		drop_nlink(old_dir);
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
1905
  		inc_nlink(new_dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
  	}
  
  	old_dir->i_size -= BOGO_DIRENT_SIZE;
  	new_dir->i_size += BOGO_DIRENT_SIZE;
  	old_dir->i_ctime = old_dir->i_mtime =
  	new_dir->i_ctime = new_dir->i_mtime =
  	inode->i_ctime = CURRENT_TIME;
  	return 0;
  }
  
  static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
  {
  	int error;
  	int len;
  	struct inode *inode;
  	struct page *page = NULL;
  	char *kaddr;
  	struct shmem_inode_info *info;
  
  	len = strlen(symname) + 1;
  	if (len > PAGE_CACHE_SIZE)
  		return -ENAMETOOLONG;
454abafe9   Dmitry Monakhov   ramfs: replace in...
1928
  	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1929
1930
  	if (!inode)
  		return -ENOSPC;
570bc1c2e   Stephen Smalley   [PATCH] tmpfs: En...
1931
1932
1933
1934
1935
1936
1937
1938
1939
  	error = security_inode_init_security(inode, dir, NULL, NULL,
  					     NULL);
  	if (error) {
  		if (error != -EOPNOTSUPP) {
  			iput(inode);
  			return error;
  		}
  		error = 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
  	info = SHMEM_I(inode);
  	inode->i_size = len-1;
  	if (len <= (char *)inode - (char *)info) {
  		/* do it inline */
  		memcpy(info, symname, len);
  		inode->i_op = &shmem_symlink_inline_operations;
  	} else {
  		error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
  		if (error) {
  			iput(inode);
  			return error;
  		}
14fcc23fd   Hugh Dickins   tmpfs: fix kernel...
1952
  		inode->i_mapping->a_ops = &shmem_aops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1953
1954
1955
1956
1957
  		inode->i_op = &shmem_symlink_inode_operations;
  		kaddr = kmap_atomic(page, KM_USER0);
  		memcpy(kaddr, symname, len);
  		kunmap_atomic(kaddr, KM_USER0);
  		set_page_dirty(page);
6746aff74   Wu Fengguang   HWPOISON: shmem: ...
1958
  		unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1959
1960
  		page_cache_release(page);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1961
1962
1963
1964
1965
1966
  	dir->i_size += BOGO_DIRENT_SIZE;
  	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
  	d_instantiate(dentry, inode);
  	dget(dentry);
  	return 0;
  }
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
1967
  static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1968
1969
  {
  	nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
1970
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1971
  }
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
1972
  static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1973
1974
1975
1976
  {
  	struct page *page = NULL;
  	int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
  	nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
d3602444e   Hugh Dickins   shmem_getpage ret...
1977
1978
  	if (page)
  		unlock_page(page);
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
1979
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1980
  }
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
1981
  static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1982
1983
  {
  	if (!IS_ERR(nd_get_link(nd))) {
cc314eef0   Linus Torvalds   Fix nasty ncpfs s...
1984
  		struct page *page = cookie;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1985
1986
1987
  		kunmap(page);
  		mark_page_accessed(page);
  		page_cache_release(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1988
1989
  	}
  }
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
1990
  static const struct inode_operations shmem_symlink_inline_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1991
1992
  	.readlink	= generic_readlink,
  	.follow_link	= shmem_follow_link_inline,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1993
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
1994
  static const struct inode_operations shmem_symlink_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1995
1996
1997
  	.readlink	= generic_readlink,
  	.follow_link	= shmem_follow_link,
  	.put_link	= shmem_put_link,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1998
  };
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
1999
  #ifdef CONFIG_TMPFS_POSIX_ACL
467118102   Randy Dunlap   mm/shmem and tiny...
2000
  /*
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2001
2002
2003
2004
2005
   * Superblocks without xattr inode operations will get security.* xattr
   * support from the VFS "for free". As soon as we have any other xattrs
   * like ACLs, we also need to implement the security.* handlers at
   * filesystem level, though.
   */
431547b3c   Christoph Hellwig   sanitize xattr ha...
2006
  static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2007
  					size_t list_len, const char *name,
431547b3c   Christoph Hellwig   sanitize xattr ha...
2008
  					size_t name_len, int handler_flags)
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2009
  {
431547b3c   Christoph Hellwig   sanitize xattr ha...
2010
  	return security_inode_listsecurity(dentry->d_inode, list, list_len);
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2011
  }
431547b3c   Christoph Hellwig   sanitize xattr ha...
2012
2013
  static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
  		void *buffer, size_t size, int handler_flags)
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2014
2015
2016
  {
  	if (strcmp(name, "") == 0)
  		return -EINVAL;
431547b3c   Christoph Hellwig   sanitize xattr ha...
2017
  	return xattr_getsecurity(dentry->d_inode, name, buffer, size);
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2018
  }
431547b3c   Christoph Hellwig   sanitize xattr ha...
2019
2020
  static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
  		const void *value, size_t size, int flags, int handler_flags)
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2021
2022
2023
  {
  	if (strcmp(name, "") == 0)
  		return -EINVAL;
431547b3c   Christoph Hellwig   sanitize xattr ha...
2024
2025
  	return security_inode_setsecurity(dentry->d_inode, name, value,
  					  size, flags);
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2026
  }
bb4354538   Stephen Hemminger   fs: xattr_handler...
2027
  static const struct xattr_handler shmem_xattr_security_handler = {
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2028
2029
2030
2031
2032
  	.prefix = XATTR_SECURITY_PREFIX,
  	.list   = shmem_xattr_security_list,
  	.get    = shmem_xattr_security_get,
  	.set    = shmem_xattr_security_set,
  };
bb4354538   Stephen Hemminger   fs: xattr_handler...
2033
  static const struct xattr_handler *shmem_xattr_handlers[] = {
1c7c474c3   Christoph Hellwig   make generic_acl ...
2034
2035
  	&generic_acl_access_handler,
  	&generic_acl_default_handler,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2036
2037
2038
2039
  	&shmem_xattr_security_handler,
  	NULL
  };
  #endif
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
  static struct dentry *shmem_get_parent(struct dentry *child)
  {
  	return ERR_PTR(-ESTALE);
  }
  
  static int shmem_match(struct inode *ino, void *vfh)
  {
  	__u32 *fh = vfh;
  	__u64 inum = fh[2];
  	inum = (inum << 32) | fh[1];
  	return ino->i_ino == inum && fh[0] == ino->i_generation;
  }
480b116c9   Christoph Hellwig   shmem: new export...
2052
2053
  static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
  		struct fid *fid, int fh_len, int fh_type)
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2054
  {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2055
  	struct inode *inode;
480b116c9   Christoph Hellwig   shmem: new export...
2056
2057
2058
2059
2060
2061
  	struct dentry *dentry = NULL;
  	u64 inum = fid->raw[2];
  	inum = (inum << 32) | fid->raw[1];
  
  	if (fh_len < 3)
  		return NULL;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2062

480b116c9   Christoph Hellwig   shmem: new export...
2063
2064
  	inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
  			shmem_match, fid->raw);
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2065
  	if (inode) {
480b116c9   Christoph Hellwig   shmem: new export...
2066
  		dentry = d_find_alias(inode);
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2067
2068
  		iput(inode);
  	}
480b116c9   Christoph Hellwig   shmem: new export...
2069
  	return dentry;
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
  }
  
  static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
  				int connectable)
  {
  	struct inode *inode = dentry->d_inode;
  
  	if (*len < 3)
  		return 255;
  
  	if (hlist_unhashed(&inode->i_hash)) {
  		/* Unfortunately insert_inode_hash is not idempotent,
  		 * so as we hash inodes here rather than at creation
  		 * time, we need a lock to ensure we only try
  		 * to do it once
  		 */
  		static DEFINE_SPINLOCK(lock);
  		spin_lock(&lock);
  		if (hlist_unhashed(&inode->i_hash))
  			__insert_inode_hash(inode,
  					    inode->i_ino + inode->i_generation);
  		spin_unlock(&lock);
  	}
  
  	fh[0] = inode->i_generation;
  	fh[1] = inode->i_ino;
  	fh[2] = ((__u64)inode->i_ino) >> 32;
  
  	*len = 3;
  	return 1;
  }
396551644   Christoph Hellwig   exportfs: make st...
2101
  static const struct export_operations shmem_export_ops = {
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2102
  	.get_parent     = shmem_get_parent,
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2103
  	.encode_fh      = shmem_encode_fh,
480b116c9   Christoph Hellwig   shmem: new export...
2104
  	.fh_to_dentry	= shmem_fh_to_dentry,
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2105
  };
680d794ba   akpm@linux-foundation.org   mount options: fi...
2106
2107
  static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
  			       bool remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2108
2109
  {
  	char *this_char, *value, *rest;
b00dc3ad7   Hugh Dickins   [PATCH] tmpfs: fi...
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
  	while (options != NULL) {
  		this_char = options;
  		for (;;) {
  			/*
  			 * NUL-terminate this option: unfortunately,
  			 * mount options form a comma-separated list,
  			 * but mpol's nodelist may also contain commas.
  			 */
  			options = strchr(options, ',');
  			if (options == NULL)
  				break;
  			options++;
  			if (!isdigit(*options)) {
  				options[-1] = '\0';
  				break;
  			}
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
  		if (!*this_char)
  			continue;
  		if ((value = strchr(this_char,'=')) != NULL) {
  			*value++ = 0;
  		} else {
  			printk(KERN_ERR
  			    "tmpfs: No value for mount option '%s'
  ",
  			    this_char);
  			return 1;
  		}
  
  		if (!strcmp(this_char,"size")) {
  			unsigned long long size;
  			size = memparse(value,&rest);
  			if (*rest == '%') {
  				size <<= PAGE_SHIFT;
  				size *= totalram_pages;
  				do_div(size, 100);
  				rest++;
  			}
  			if (*rest)
  				goto bad_val;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2150
2151
  			sbinfo->max_blocks =
  				DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2152
  		} else if (!strcmp(this_char,"nr_blocks")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2153
  			sbinfo->max_blocks = memparse(value, &rest);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2154
2155
2156
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"nr_inodes")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2157
  			sbinfo->max_inodes = memparse(value, &rest);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2158
2159
2160
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"mode")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2161
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2162
  				continue;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2163
  			sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2164
2165
2166
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"uid")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2167
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2168
  				continue;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2169
  			sbinfo->uid = simple_strtoul(value, &rest, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2170
2171
2172
  			if (*rest)
  				goto bad_val;
  		} else if (!strcmp(this_char,"gid")) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2173
  			if (remount)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2174
  				continue;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2175
  			sbinfo->gid = simple_strtoul(value, &rest, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2176
2177
  			if (*rest)
  				goto bad_val;
7339ff830   Robin Holt   [PATCH] Add tmpfs...
2178
  		} else if (!strcmp(this_char,"mpol")) {
71fe804b6   Lee Schermerhorn   mempolicy: use st...
2179
  			if (mpol_parse_str(value, &sbinfo->mpol, 1))
7339ff830   Robin Holt   [PATCH] Add tmpfs...
2180
  				goto bad_val;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
  		} else {
  			printk(KERN_ERR "tmpfs: Bad mount option %s
  ",
  			       this_char);
  			return 1;
  		}
  	}
  	return 0;
  
  bad_val:
  	printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'
  ",
  	       value, this_char);
  	return 1;
  
  }
  
  static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
680d794ba   akpm@linux-foundation.org   mount options: fi...
2201
  	struct shmem_sb_info config = *sbinfo;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2202
2203
  	unsigned long inodes;
  	int error = -EINVAL;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2204
  	if (shmem_parse_options(data, &config, true))
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2205
  		return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2206

0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2207
  	spin_lock(&sbinfo->stat_lock);
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2208
  	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
7e496299d   Tim Chen   tmpfs: make tmpfs...
2209
  	if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2210
  		goto out;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2211
  	if (config.max_inodes < inodes)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2212
2213
2214
2215
2216
2217
2218
  		goto out;
  	/*
  	 * Those tests also disallow limited->unlimited while any are in
  	 * use, so i_blocks will always be zero when max_blocks is zero;
  	 * but we must separately disallow unlimited->limited, because
  	 * in that case we have no record of how much is already in use.
  	 */
680d794ba   akpm@linux-foundation.org   mount options: fi...
2219
  	if (config.max_blocks && !sbinfo->max_blocks)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2220
  		goto out;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2221
  	if (config.max_inodes && !sbinfo->max_inodes)
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2222
2223
2224
  		goto out;
  
  	error = 0;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2225
  	sbinfo->max_blocks  = config.max_blocks;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2226
2227
  	sbinfo->max_inodes  = config.max_inodes;
  	sbinfo->free_inodes = config.max_inodes - inodes;
71fe804b6   Lee Schermerhorn   mempolicy: use st...
2228
2229
2230
  
  	mpol_put(sbinfo->mpol);
  	sbinfo->mpol        = config.mpol;	/* transfers initial ref */
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2231
2232
2233
  out:
  	spin_unlock(&sbinfo->stat_lock);
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2234
  }
680d794ba   akpm@linux-foundation.org   mount options: fi...
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
  
  static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
  {
  	struct shmem_sb_info *sbinfo = SHMEM_SB(vfs->mnt_sb);
  
  	if (sbinfo->max_blocks != shmem_default_max_blocks())
  		seq_printf(seq, ",size=%luk",
  			sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
  	if (sbinfo->max_inodes != shmem_default_max_inodes())
  		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
  	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
  		seq_printf(seq, ",mode=%03o", sbinfo->mode);
  	if (sbinfo->uid != 0)
  		seq_printf(seq, ",uid=%u", sbinfo->uid);
  	if (sbinfo->gid != 0)
  		seq_printf(seq, ",gid=%u", sbinfo->gid);
71fe804b6   Lee Schermerhorn   mempolicy: use st...
2251
  	shmem_show_mpol(seq, sbinfo->mpol);
680d794ba   akpm@linux-foundation.org   mount options: fi...
2252
2253
2254
  	return 0;
  }
  #endif /* CONFIG_TMPFS */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2255
2256
2257
  
  static void shmem_put_super(struct super_block *sb)
  {
602586a83   Hugh Dickins   shmem: put_super ...
2258
2259
2260
2261
  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  
  	percpu_counter_destroy(&sbinfo->used_blocks);
  	kfree(sbinfo);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2262
2263
  	sb->s_fs_info = NULL;
  }
2b2af54a5   Kay Sievers   Driver Core: devt...
2264
  int shmem_fill_super(struct super_block *sb, void *data, int silent)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2265
2266
2267
  {
  	struct inode *inode;
  	struct dentry *root;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2268
  	struct shmem_sb_info *sbinfo;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2269
2270
2271
  	int err = -ENOMEM;
  
  	/* Round up to L1_CACHE_BYTES to resist false sharing */
425fbf047   Pekka Enberg   shmem: initialize...
2272
  	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
680d794ba   akpm@linux-foundation.org   mount options: fi...
2273
2274
2275
  				L1_CACHE_BYTES), GFP_KERNEL);
  	if (!sbinfo)
  		return -ENOMEM;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2276
  	sbinfo->mode = S_IRWXUGO | S_ISVTX;
76aac0e9a   David Howells   CRED: Wrap task c...
2277
2278
  	sbinfo->uid = current_fsuid();
  	sbinfo->gid = current_fsgid();
680d794ba   akpm@linux-foundation.org   mount options: fi...
2279
  	sb->s_fs_info = sbinfo;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2280

0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2281
  #ifdef CONFIG_TMPFS
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2282
2283
2284
2285
2286
2287
  	/*
  	 * Per default we only allow half of the physical ram per
  	 * tmpfs instance, limiting inodes to one per page of lowmem;
  	 * but the internal instance is left unlimited.
  	 */
  	if (!(sb->s_flags & MS_NOUSER)) {
680d794ba   akpm@linux-foundation.org   mount options: fi...
2288
2289
2290
2291
2292
2293
  		sbinfo->max_blocks = shmem_default_max_blocks();
  		sbinfo->max_inodes = shmem_default_max_inodes();
  		if (shmem_parse_options(data, sbinfo, false)) {
  			err = -EINVAL;
  			goto failed;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2294
  	}
91828a405   David M. Grimes   [PATCH] knfsd: ad...
2295
  	sb->s_export_op = &shmem_export_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2296
2297
2298
  #else
  	sb->s_flags |= MS_NOUSER;
  #endif
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2299
  	spin_lock_init(&sbinfo->stat_lock);
602586a83   Hugh Dickins   shmem: put_super ...
2300
2301
  	if (percpu_counter_init(&sbinfo->used_blocks, 0))
  		goto failed;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2302
  	sbinfo->free_inodes = sbinfo->max_inodes;
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2303

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2304
2305
2306
2307
2308
  	sb->s_maxbytes = SHMEM_MAX_BYTES;
  	sb->s_blocksize = PAGE_CACHE_SIZE;
  	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
  	sb->s_magic = TMPFS_MAGIC;
  	sb->s_op = &shmem_ops;
cfd95a9cf   Robin H. Johnson   [PATCH] tmpfs: ti...
2309
  	sb->s_time_gran = 1;
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2310
2311
2312
2313
  #ifdef CONFIG_TMPFS_POSIX_ACL
  	sb->s_xattr = shmem_xattr_handlers;
  	sb->s_flags |= MS_POSIXACL;
  #endif
0edd73b33   Hugh Dickins   [PATCH] shmem: re...
2314

454abafe9   Dmitry Monakhov   ramfs: replace in...
2315
  	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2316
2317
  	if (!inode)
  		goto failed;
680d794ba   akpm@linux-foundation.org   mount options: fi...
2318
2319
  	inode->i_uid = sbinfo->uid;
  	inode->i_gid = sbinfo->gid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
  	root = d_alloc_root(inode);
  	if (!root)
  		goto failed_iput;
  	sb->s_root = root;
  	return 0;
  
  failed_iput:
  	iput(inode);
  failed:
  	shmem_put_super(sb);
  	return err;
  }
fcc234f88   Pekka Enberg   [PATCH] mm: kill ...
2332
  static struct kmem_cache *shmem_inode_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2333
2334
2335
2336
  
  static struct inode *shmem_alloc_inode(struct super_block *sb)
  {
  	struct shmem_inode_info *p;
e94b17660   Christoph Lameter   [PATCH] slab: rem...
2337
  	p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
  	if (!p)
  		return NULL;
  	return &p->vfs_inode;
  }
  
  static void shmem_destroy_inode(struct inode *inode)
  {
  	if ((inode->i_mode & S_IFMT) == S_IFREG) {
  		/* only struct inode is valid if it's an inline symlink */
  		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
  	}
  	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
  }
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
2351
  static void init_once(void *foo)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2352
2353
  {
  	struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
2354
  	inode_init_once(&p->vfs_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2355
2356
2357
2358
2359
2360
  }
  
  static int init_inodecache(void)
  {
  	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
  				sizeof(struct shmem_inode_info),
040b5c6f9   Alexey Dobriyan   SLAB_PANIC more (...
2361
  				0, SLAB_PANIC, init_once);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2362
2363
2364
2365
2366
  	return 0;
  }
  
  static void destroy_inodecache(void)
  {
1a1d92c10   Alexey Dobriyan   [PATCH] Really ig...
2367
  	kmem_cache_destroy(shmem_inode_cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2368
  }
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
2369
  static const struct address_space_operations shmem_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2370
  	.writepage	= shmem_writepage,
767193253   Ken Chen   [PATCH] simplify ...
2371
  	.set_page_dirty	= __set_page_dirty_no_writeback,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2372
  #ifdef CONFIG_TMPFS
ae9764164   Hugh Dickins   shmem: convert to...
2373
  	.readpage	= shmem_readpage,
800d15a53   Nick Piggin   implement simple ...
2374
2375
  	.write_begin	= shmem_write_begin,
  	.write_end	= shmem_write_end,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2376
  #endif
304dbdb7a   Lee Schermerhorn   [PATCH] add migra...
2377
  	.migratepage	= migrate_page,
aa261f549   Andi Kleen   HWPOISON: Enable ...
2378
  	.error_remove_page = generic_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2379
  };
15ad7cdcf   Helge Deller   [PATCH] struct se...
2380
  static const struct file_operations shmem_file_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2381
2382
2383
  	.mmap		= shmem_mmap,
  #ifdef CONFIG_TMPFS
  	.llseek		= generic_file_llseek,
bcd78e496   Hugh Dickins   tmpfs: support aio
2384
  	.read		= do_sync_read,
5402b976a   Hugh Dickins   shmem_file_write ...
2385
  	.write		= do_sync_write,
bcd78e496   Hugh Dickins   tmpfs: support aio
2386
  	.aio_read	= shmem_file_aio_read,
5402b976a   Hugh Dickins   shmem_file_write ...
2387
  	.aio_write	= generic_file_aio_write,
1b061d924   Christoph Hellwig   rename the generi...
2388
  	.fsync		= noop_fsync,
ae9764164   Hugh Dickins   shmem: convert to...
2389
2390
  	.splice_read	= generic_file_splice_read,
  	.splice_write	= generic_file_splice_write,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2391
2392
  #endif
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
2393
  static const struct inode_operations shmem_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2394
  	.setattr	= shmem_notify_change,
f6b3ec238   Badari Pulavarty   [PATCH] madvise(M...
2395
  	.truncate_range	= shmem_truncate_range,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2396
2397
2398
2399
2400
  #ifdef CONFIG_TMPFS_POSIX_ACL
  	.setxattr	= generic_setxattr,
  	.getxattr	= generic_getxattr,
  	.listxattr	= generic_listxattr,
  	.removexattr	= generic_removexattr,
1c7c474c3   Christoph Hellwig   make generic_acl ...
2401
  	.check_acl	= generic_check_acl,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2402
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2403
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
2404
  static const struct inode_operations shmem_dir_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
  #ifdef CONFIG_TMPFS
  	.create		= shmem_create,
  	.lookup		= simple_lookup,
  	.link		= shmem_link,
  	.unlink		= shmem_unlink,
  	.symlink	= shmem_symlink,
  	.mkdir		= shmem_mkdir,
  	.rmdir		= shmem_rmdir,
  	.mknod		= shmem_mknod,
  	.rename		= shmem_rename,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2415
  #endif
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2416
2417
2418
2419
2420
2421
  #ifdef CONFIG_TMPFS_POSIX_ACL
  	.setattr	= shmem_notify_change,
  	.setxattr	= generic_setxattr,
  	.getxattr	= generic_getxattr,
  	.listxattr	= generic_listxattr,
  	.removexattr	= generic_removexattr,
1c7c474c3   Christoph Hellwig   make generic_acl ...
2422
  	.check_acl	= generic_check_acl,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2423
2424
  #endif
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
2425
  static const struct inode_operations shmem_special_inode_operations = {
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2426
2427
2428
2429
2430
2431
  #ifdef CONFIG_TMPFS_POSIX_ACL
  	.setattr	= shmem_notify_change,
  	.setxattr	= generic_setxattr,
  	.getxattr	= generic_getxattr,
  	.listxattr	= generic_listxattr,
  	.removexattr	= generic_removexattr,
1c7c474c3   Christoph Hellwig   make generic_acl ...
2432
  	.check_acl	= generic_check_acl,
39f0247d3   Andreas Gruenbacher   [PATCH] Access Co...
2433
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2434
  };
759b9775c   Hugh Dickins   [PATCH] shmem and...
2435
  static const struct super_operations shmem_ops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2436
2437
2438
2439
2440
  	.alloc_inode	= shmem_alloc_inode,
  	.destroy_inode	= shmem_destroy_inode,
  #ifdef CONFIG_TMPFS
  	.statfs		= shmem_statfs,
  	.remount_fs	= shmem_remount_fs,
680d794ba   akpm@linux-foundation.org   mount options: fi...
2441
  	.show_options	= shmem_show_options,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2442
  #endif
1f895f75d   Al Viro   switch shmem.c to...
2443
  	.evict_inode	= shmem_evict_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2444
2445
2446
  	.drop_inode	= generic_delete_inode,
  	.put_super	= shmem_put_super,
  };
f0f37e2f7   Alexey Dobriyan   const: mark struc...
2447
  static const struct vm_operations_struct shmem_vm_ops = {
54cb8821d   Nick Piggin   mm: merge populat...
2448
  	.fault		= shmem_fault,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2449
2450
2451
2452
2453
  #ifdef CONFIG_NUMA
  	.set_policy     = shmem_set_policy,
  	.get_policy     = shmem_get_policy,
  #endif
  };
454e2398b   David Howells   [PATCH] VFS: Perm...
2454
2455
  static int shmem_get_sb(struct file_system_type *fs_type,
  	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2456
  {
454e2398b   David Howells   [PATCH] VFS: Perm...
2457
  	return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2458
2459
2460
2461
2462
2463
2464
2465
  }
  
  static struct file_system_type tmpfs_fs_type = {
  	.owner		= THIS_MODULE,
  	.name		= "tmpfs",
  	.get_sb		= shmem_get_sb,
  	.kill_sb	= kill_litter_super,
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2466

2b2af54a5   Kay Sievers   Driver Core: devt...
2467
  int __init init_tmpfs(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2468
2469
  {
  	int error;
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
2470
2471
2472
  	error = bdi_init(&shmem_backing_dev_info);
  	if (error)
  		goto out4;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
  	error = init_inodecache();
  	if (error)
  		goto out3;
  
  	error = register_filesystem(&tmpfs_fs_type);
  	if (error) {
  		printk(KERN_ERR "Could not register tmpfs
  ");
  		goto out2;
  	}
95dc112a5   Greg Kroah-Hartman   [PATCH] devfs: Re...
2483

1f5ce9e93   Trond Myklebust   VFS: Unexport do_...
2484
  	shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
  				tmpfs_fs_type.name, NULL);
  	if (IS_ERR(shm_mnt)) {
  		error = PTR_ERR(shm_mnt);
  		printk(KERN_ERR "Could not kern_mount tmpfs
  ");
  		goto out1;
  	}
  	return 0;
  
  out1:
  	unregister_filesystem(&tmpfs_fs_type);
  out2:
  	destroy_inodecache();
  out3:
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
2499
2500
  	bdi_destroy(&shmem_backing_dev_info);
  out4:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2501
2502
2503
  	shm_mnt = ERR_PTR(error);
  	return error;
  }
853ac43ab   Matt Mackall   shmem: unify regu...
2504

87946a722   Daisuke Nishimura   memcg: move charg...
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  /**
   * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
   * @inode: the inode to be searched
   * @pgoff: the offset to be searched
   * @pagep: the pointer for the found page to be stored
   * @ent: the pointer for the found swap entry to be stored
   *
   * If a page is found, refcount of it is incremented. Callers should handle
   * these refcount.
   */
  void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
  					struct page **pagep, swp_entry_t *ent)
  {
  	swp_entry_t entry = { .val = 0 }, *ptr;
  	struct page *page = NULL;
  	struct shmem_inode_info *info = SHMEM_I(inode);
  
  	if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
  		goto out;
  
  	spin_lock(&info->lock);
  	ptr = shmem_swp_entry(info, pgoff, NULL);
  #ifdef CONFIG_SWAP
  	if (ptr && ptr->val) {
  		entry.val = ptr->val;
  		page = find_get_page(&swapper_space, entry.val);
  	} else
  #endif
  		page = find_get_page(inode->i_mapping, pgoff);
  	if (ptr)
  		shmem_swp_unmap(ptr);
  	spin_unlock(&info->lock);
  out:
  	*pagep = page;
  	*ent = entry;
  }
  #endif
853ac43ab   Matt Mackall   shmem: unify regu...
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
  #else /* !CONFIG_SHMEM */
  
  /*
   * tiny-shmem: simple shmemfs and tmpfs using ramfs code
   *
   * This is intended for small system where the benefits of the full
   * shmem code (swap-backed and resource-limited) are outweighed by
   * their complexity. On systems without swap this code should be
   * effectively equivalent, but much lighter weight.
   */
  
  #include <linux/ramfs.h>
  
  static struct file_system_type tmpfs_fs_type = {
  	.name		= "tmpfs",
  	.get_sb		= ramfs_get_sb,
  	.kill_sb	= kill_litter_super,
  };
2b2af54a5   Kay Sievers   Driver Core: devt...
2561
  int __init init_tmpfs(void)
853ac43ab   Matt Mackall   shmem: unify regu...
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
  {
  	BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
  
  	shm_mnt = kern_mount(&tmpfs_fs_type);
  	BUG_ON(IS_ERR(shm_mnt));
  
  	return 0;
  }
  
  int shmem_unuse(swp_entry_t entry, struct page *page)
  {
  	return 0;
  }
3f96b79ad   Hugh Dickins   tmpfs: depend on ...
2575
2576
2577
2578
  int shmem_lock(struct file *file, int lock, struct user_struct *user)
  {
  	return 0;
  }
87946a722   Daisuke Nishimura   memcg: move charg...
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  /**
   * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
   * @inode: the inode to be searched
   * @pgoff: the offset to be searched
   * @pagep: the pointer for the found page to be stored
   * @ent: the pointer for the found swap entry to be stored
   *
   * If a page is found, refcount of it is incremented. Callers should handle
   * these refcount.
   */
  void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
  					struct page **pagep, swp_entry_t *ent)
  {
  	struct page *page = NULL;
  
  	if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
  		goto out;
  	page = find_get_page(inode->i_mapping, pgoff);
  out:
  	*pagep = page;
  	*ent = (swp_entry_t){ .val = 0 };
  }
  #endif
0b0a0806b   Hugh Dickins   shmem: fix shared...
2603
2604
  #define shmem_vm_ops				generic_file_vm_ops
  #define shmem_file_operations			ramfs_file_operations
454abafe9   Dmitry Monakhov   ramfs: replace in...
2605
  #define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
0b0a0806b   Hugh Dickins   shmem: fix shared...
2606
2607
  #define shmem_acct_size(flags, size)		0
  #define shmem_unacct_size(flags, size)		do {} while (0)
caefba174   Hugh Dickins   shmem: respect MA...
2608
  #define SHMEM_MAX_BYTES				MAX_LFS_FILESIZE
853ac43ab   Matt Mackall   shmem: unify regu...
2609
2610
2611
2612
  
  #endif /* CONFIG_SHMEM */
  
  /* common code */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2613

467118102   Randy Dunlap   mm/shmem and tiny...
2614
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2615
   * shmem_file_setup - get an unlinked file living in tmpfs
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2616
2617
   * @name: name for dentry (to be seen in /proc/<pid>/maps
   * @size: size to be set for the file
0b0a0806b   Hugh Dickins   shmem: fix shared...
2618
   * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2619
   */
168f5ac66   Sergei Trofimovich   mm cleanup: shmem...
2620
  struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2621
2622
2623
2624
  {
  	int error;
  	struct file *file;
  	struct inode *inode;
2c48b9c45   Al Viro   switch alloc_file...
2625
2626
  	struct path path;
  	struct dentry *root;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
  	struct qstr this;
  
  	if (IS_ERR(shm_mnt))
  		return (void *)shm_mnt;
  
  	if (size < 0 || size > SHMEM_MAX_BYTES)
  		return ERR_PTR(-EINVAL);
  
  	if (shmem_acct_size(flags, size))
  		return ERR_PTR(-ENOMEM);
  
  	error = -ENOMEM;
  	this.name = name;
  	this.len = strlen(name);
  	this.hash = 0; /* will go */
  	root = shm_mnt->mnt_root;
2c48b9c45   Al Viro   switch alloc_file...
2643
2644
  	path.dentry = d_alloc(root, &this);
  	if (!path.dentry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2645
  		goto put_memory;
2c48b9c45   Al Viro   switch alloc_file...
2646
  	path.mnt = mntget(shm_mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2647

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2648
  	error = -ENOSPC;
454abafe9   Dmitry Monakhov   ramfs: replace in...
2649
  	inode = shmem_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2650
  	if (!inode)
4b42af81f   Al Viro   switch shmem_file...
2651
  		goto put_dentry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2652

2c48b9c45   Al Viro   switch alloc_file...
2653
  	d_instantiate(path.dentry, inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2654
2655
  	inode->i_size = size;
  	inode->i_nlink = 0;	/* It is unlinked */
853ac43ab   Matt Mackall   shmem: unify regu...
2656
2657
2658
  #ifndef CONFIG_MMU
  	error = ramfs_nommu_expand_for_mapping(inode, size);
  	if (error)
4b42af81f   Al Viro   switch shmem_file...
2659
  		goto put_dentry;
853ac43ab   Matt Mackall   shmem: unify regu...
2660
  #endif
4b42af81f   Al Viro   switch shmem_file...
2661
2662
  
  	error = -ENFILE;
2c48b9c45   Al Viro   switch alloc_file...
2663
  	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
4b42af81f   Al Viro   switch shmem_file...
2664
2665
2666
  		  &shmem_file_operations);
  	if (!file)
  		goto put_dentry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2667
  	return file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2668
  put_dentry:
2c48b9c45   Al Viro   switch alloc_file...
2669
  	path_put(&path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2670
2671
2672
2673
  put_memory:
  	shmem_unacct_size(flags, size);
  	return ERR_PTR(error);
  }
395e0ddc4   Keith Packard   Export shmem_file...
2674
  EXPORT_SYMBOL_GPL(shmem_file_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2675

467118102   Randy Dunlap   mm/shmem and tiny...
2676
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2677
   * shmem_zero_setup - setup a shared anonymous mapping
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
   * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
   */
  int shmem_zero_setup(struct vm_area_struct *vma)
  {
  	struct file *file;
  	loff_t size = vma->vm_end - vma->vm_start;
  
  	file = shmem_file_setup("dev/zero", size, vma->vm_flags);
  	if (IS_ERR(file))
  		return PTR_ERR(file);
  
  	if (vma->vm_file)
  		fput(vma->vm_file);
  	vma->vm_file = file;
  	vma->vm_ops = &shmem_vm_ops;
  	return 0;
  }