Blame view

fs/hugetlbfs/inode.c 34.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
  /*
   * hugetlbpage-backed filesystem.  Based on ramfs.
   *
6d49e352a   Nadia Yvette Chambers   propagate name ch...
4
   * Nadia Yvette Chambers, 2002
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
   *
   * Copyright (C) 2002 Linus Torvalds.
3e89e1c5e   Paul Gortmaker   hugetlb: make mm ...
7
   * License: GPL
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
   */
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
9
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
11
12
  #include <linux/thread_info.h>
  #include <asm/current.h>
  #include <linux/sched.h>		/* remove ASAP */
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
13
  #include <linux/falloc.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
16
  #include <linux/fs.h>
  #include <linux/mount.h>
  #include <linux/file.h>
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
17
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
21
22
  #include <linux/writeback.h>
  #include <linux/pagemap.h>
  #include <linux/highmem.h>
  #include <linux/init.h>
  #include <linux/string.h>
16f7e0fe2   Randy Dunlap   [PATCH] capable/c...
23
  #include <linux/capability.h>
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
24
  #include <linux/ctype.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
  #include <linux/backing-dev.h>
  #include <linux/hugetlb.h>
  #include <linux/pagevec.h>
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
28
  #include <linux/parser.h>
036e08568   Benjamin Herrenschmidt   get_unmapped_area...
29
  #include <linux/mman.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
33
  #include <linux/slab.h>
  #include <linux/dnotify.h>
  #include <linux/statfs.h>
  #include <linux/security.h>
1fd7317d0   Nick Black   Move magic number...
34
  #include <linux/magic.h>
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
35
  #include <linux/migrate.h>
34d0640e2   Al Viro   switch hugetlbfs ...
36
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
38
  
  #include <asm/uaccess.h>
ee9b6d61a   Josef 'Jeff' Sipek   [PATCH] Mark stru...
39
  static const struct super_operations hugetlbfs_ops;
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
40
  static const struct address_space_operations hugetlbfs_aops;
4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
41
  const struct file_operations hugetlbfs_file_operations;
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
42
43
  static const struct inode_operations hugetlbfs_dir_inode_operations;
  static const struct inode_operations hugetlbfs_inode_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44

a1d776ee3   David Gibson   hugetlb: cleanup ...
45
  struct hugetlbfs_config {
a0eb3a05a   Eric W. Biederman   userns: Convert h...
46
47
  	kuid_t   uid;
  	kgid_t   gid;
a1d776ee3   David Gibson   hugetlb: cleanup ...
48
  	umode_t mode;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
49
  	long	max_hpages;
a1d776ee3   David Gibson   hugetlb: cleanup ...
50
51
  	long	nr_inodes;
  	struct hstate *hstate;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
52
  	long    min_hpages;
a1d776ee3   David Gibson   hugetlb: cleanup ...
53
54
55
56
57
58
59
60
61
62
63
  };
  
  struct hugetlbfs_inode_info {
  	struct shared_policy policy;
  	struct inode vfs_inode;
  };
  
  static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
  {
  	return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64
  int sysctl_hugetlb_shm_group;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
65
66
67
  enum {
  	Opt_size, Opt_nr_inodes,
  	Opt_mode, Opt_uid, Opt_gid,
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
68
  	Opt_pagesize, Opt_min_size,
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
69
70
  	Opt_err,
  };
a447c0932   Steven Whitehouse   vfs: Use const fo...
71
  static const match_table_t tokens = {
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
72
73
74
75
76
  	{Opt_size,	"size=%s"},
  	{Opt_nr_inodes,	"nr_inodes=%s"},
  	{Opt_mode,	"mode=%o"},
  	{Opt_uid,	"uid=%u"},
  	{Opt_gid,	"gid=%u"},
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
77
  	{Opt_pagesize,	"pagesize=%s"},
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
78
  	{Opt_min_size,	"min_size=%s"},
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
79
80
  	{Opt_err,	NULL},
  };
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  #ifdef CONFIG_NUMA
  static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
  					struct inode *inode, pgoff_t index)
  {
  	vma->vm_policy = mpol_shared_policy_lookup(&HUGETLBFS_I(inode)->policy,
  							index);
  }
  
  static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
  {
  	mpol_cond_put(vma->vm_policy);
  }
  #else
  static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
  					struct inode *inode, pgoff_t index)
  {
  }
  
  static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
  {
  }
  #endif
2e9b367c2   Adam Litke   [PATCH] hugetlb: ...
103
104
105
106
107
108
109
110
111
  static void huge_pagevec_release(struct pagevec *pvec)
  {
  	int i;
  
  	for (i = 0; i < pagevec_count(pvec); ++i)
  		put_page(pvec->pages[i]);
  
  	pagevec_reinit(pvec);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
113
  static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
  {
496ad9aa8   Al Viro   new helper: file_...
114
  	struct inode *inode = file_inode(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
116
  	loff_t len, vma_len;
  	int ret;
a55164389   Andi Kleen   hugetlb: modular ...
117
  	struct hstate *h = hstate_file(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118

68589bc35   Hugh Dickins   [PATCH] hugetlb: ...
119
  	/*
dec4ad86c   David Gibson   hugepage: fix bro...
120
121
122
123
124
125
  	 * vma address alignment (but not the pgoff alignment) has
  	 * already been checked by prepare_hugepage_range.  If you add
  	 * any error returns here, do so after setting VM_HUGETLB, so
  	 * is_vm_hugetlb_page tests below unmap_region go the right
  	 * way when do_mmap_pgoff unwinds (may be important on powerpc
  	 * and ia64).
68589bc35   Hugh Dickins   [PATCH] hugetlb: ...
126
  	 */
a2fce9143   Naoya Horiguchi   hugetlbfs: stop s...
127
  	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
68589bc35   Hugh Dickins   [PATCH] hugetlb: ...
128
  	vma->vm_ops = &hugetlb_vm_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129

2b37c35e6   Becky Bruce   fs/hugetlbfs/inod...
130
  	if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
dec4ad86c   David Gibson   hugepage: fix bro...
131
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
  	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
5955102c9   Al Viro   wrappers for ->i_...
133
  	inode_lock(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
134
  	file_accessed(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135
136
137
  
  	ret = -ENOMEM;
  	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
138

a1e78772d   Mel Gorman   hugetlb: reserve ...
139
  	if (hugetlb_reserve_pages(inode,
a55164389   Andi Kleen   hugetlb: modular ...
140
  				vma->vm_pgoff >> huge_page_order(h),
5a6fe1259   Mel Gorman   Do not account fo...
141
142
  				len >> huge_page_shift(h), vma,
  				vma->vm_flags))
a43a8c39b   Kenneth W Chen   [PATCH] tightenin...
143
  		goto out;
b45b5bd65   David Gibson   [PATCH] hugepage:...
144

4c8872659   Adam Litke   [PATCH] hugetlb: ...
145
  	ret = 0;
b6174df5e   Zhang, Yanmin   [PATCH] mmap zero...
146
  	if (vma->vm_flags & VM_WRITE && inode->i_size < len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
147
148
  		inode->i_size = len;
  out:
5955102c9   Al Viro   wrappers for ->i_...
149
  	inode_unlock(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
151
152
153
154
  
  	return ret;
  }
  
  /*
508034a32   Hugh Dickins   [PATCH] mm: unmap...
155
   * Called under down_write(mmap_sem).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
   */
d2ba27e80   Adrian Bunk   proper prototype ...
157
  #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
159
160
161
162
163
  static unsigned long
  hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
  		unsigned long len, unsigned long pgoff, unsigned long flags)
  {
  	struct mm_struct *mm = current->mm;
  	struct vm_area_struct *vma;
a55164389   Andi Kleen   hugetlb: modular ...
164
  	struct hstate *h = hstate_file(file);
086593559   Michel Lespinasse   mm: use vm_unmapp...
165
  	struct vm_unmapped_area_info info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166

a55164389   Andi Kleen   hugetlb: modular ...
167
  	if (len & ~huge_page_mask(h))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
170
  		return -EINVAL;
  	if (len > TASK_SIZE)
  		return -ENOMEM;
036e08568   Benjamin Herrenschmidt   get_unmapped_area...
171
  	if (flags & MAP_FIXED) {
a55164389   Andi Kleen   hugetlb: modular ...
172
  		if (prepare_hugepage_range(file, addr, len))
036e08568   Benjamin Herrenschmidt   get_unmapped_area...
173
174
175
  			return -EINVAL;
  		return addr;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
  	if (addr) {
a55164389   Andi Kleen   hugetlb: modular ...
177
  		addr = ALIGN(addr, huge_page_size(h));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
179
180
181
182
  		vma = find_vma(mm, addr);
  		if (TASK_SIZE - len >= addr &&
  		    (!vma || addr + len <= vma->vm_start))
  			return addr;
  	}
086593559   Michel Lespinasse   mm: use vm_unmapp...
183
184
185
186
187
188
189
  	info.flags = 0;
  	info.length = len;
  	info.low_limit = TASK_UNMAPPED_BASE;
  	info.high_limit = TASK_SIZE;
  	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
  	info.align_offset = 0;
  	return vm_unmapped_area(&info);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
191
  }
  #endif
34d0640e2   Al Viro   switch hugetlbfs ...
192
  static size_t
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
193
  hugetlbfs_read_actor(struct page *page, unsigned long offset,
34d0640e2   Al Viro   switch hugetlbfs ...
194
  			struct iov_iter *to, unsigned long size)
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
195
  {
34d0640e2   Al Viro   switch hugetlbfs ...
196
  	size_t copied = 0;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
197
  	int i, chunksize;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
198
  	/* Find which 4k chunk and offset with in that chunk */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
199
200
  	i = offset >> PAGE_SHIFT;
  	offset = offset & ~PAGE_MASK;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
201
202
  
  	while (size) {
34d0640e2   Al Viro   switch hugetlbfs ...
203
  		size_t n;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
204
  		chunksize = PAGE_SIZE;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
205
206
207
208
  		if (offset)
  			chunksize -= offset;
  		if (chunksize > size)
  			chunksize = size;
34d0640e2   Al Viro   switch hugetlbfs ...
209
210
211
212
  		n = copy_page_to_iter(&page[i], offset, chunksize, to);
  		copied += n;
  		if (n != chunksize)
  			return copied;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
213
214
  		offset = 0;
  		size -= chunksize;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
215
216
  		i++;
  	}
34d0640e2   Al Viro   switch hugetlbfs ...
217
  	return copied;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
218
219
220
221
222
  }
  
  /*
   * Support for read() - Find the page attached to f_mapping and copy out the
   * data. Its *very* similar to do_generic_mapping_read(), we can't use that
ea1754a08   Kirill A. Shutemov   mm, fs: remove re...
223
   * since it has PAGE_SIZE assumptions.
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
224
   */
34d0640e2   Al Viro   switch hugetlbfs ...
225
  static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
226
  {
34d0640e2   Al Viro   switch hugetlbfs ...
227
228
229
  	struct file *file = iocb->ki_filp;
  	struct hstate *h = hstate_file(file);
  	struct address_space *mapping = file->f_mapping;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
230
  	struct inode *inode = mapping->host;
34d0640e2   Al Viro   switch hugetlbfs ...
231
232
  	unsigned long index = iocb->ki_pos >> huge_page_shift(h);
  	unsigned long offset = iocb->ki_pos & ~huge_page_mask(h);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
233
234
235
  	unsigned long end_index;
  	loff_t isize;
  	ssize_t retval = 0;
34d0640e2   Al Viro   switch hugetlbfs ...
236
  	while (iov_iter_count(to)) {
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
237
  		struct page *page;
34d0640e2   Al Viro   switch hugetlbfs ...
238
  		size_t nr, copied;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
239
240
  
  		/* nr is the maximum number of bytes to copy from this page */
a55164389   Andi Kleen   hugetlb: modular ...
241
  		nr = huge_page_size(h);
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
242
243
  		isize = i_size_read(inode);
  		if (!isize)
34d0640e2   Al Viro   switch hugetlbfs ...
244
  			break;
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
245
  		end_index = (isize - 1) >> huge_page_shift(h);
34d0640e2   Al Viro   switch hugetlbfs ...
246
247
248
  		if (index > end_index)
  			break;
  		if (index == end_index) {
a55164389   Andi Kleen   hugetlb: modular ...
249
  			nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
250
  			if (nr <= offset)
34d0640e2   Al Viro   switch hugetlbfs ...
251
  				break;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
252
253
254
255
  		}
  		nr = nr - offset;
  
  		/* Find the page */
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
256
  		page = find_lock_page(mapping, index);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
257
258
259
260
261
  		if (unlikely(page == NULL)) {
  			/*
  			 * We have a HOLE, zero out the user-buffer for the
  			 * length of the hole or request.
  			 */
34d0640e2   Al Viro   switch hugetlbfs ...
262
  			copied = iov_iter_zero(nr, to);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
263
  		} else {
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
264
  			unlock_page(page);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
265
266
267
  			/*
  			 * We have the page, copy it to user space buffer.
  			 */
34d0640e2   Al Viro   switch hugetlbfs ...
268
  			copied = hugetlbfs_read_actor(page, offset, to, nr);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
269
  			put_page(page);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
270
  		}
34d0640e2   Al Viro   switch hugetlbfs ...
271
272
273
274
275
276
  		offset += copied;
  		retval += copied;
  		if (copied != nr && iov_iter_count(to)) {
  			if (!retval)
  				retval = -EFAULT;
  			break;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
277
  		}
a55164389   Andi Kleen   hugetlb: modular ...
278
279
  		index += offset >> huge_page_shift(h);
  		offset &= ~huge_page_mask(h);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
280
  	}
34d0640e2   Al Viro   switch hugetlbfs ...
281
  	iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
282
283
  	return retval;
  }
800d15a53   Nick Piggin   implement simple ...
284
285
286
287
  static int hugetlbfs_write_begin(struct file *file,
  			struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
288
289
290
  {
  	return -EINVAL;
  }
800d15a53   Nick Piggin   implement simple ...
291
292
293
  static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294
  {
800d15a53   Nick Piggin   implement simple ...
295
  	BUG();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
297
  	return -EINVAL;
  }
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
298
  static void remove_huge_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299
  {
b9ea25152   Konstantin Khlebnikov   page_writeback: c...
300
  	ClearPageDirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
  	ClearPageUptodate(page);
bd65cb86c   Minchan Kim   mm: hugetlbfs: ch...
302
  	delete_from_page_cache(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
303
  }
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
  static void
  hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
  {
  	struct vm_area_struct *vma;
  
  	/*
  	 * end == 0 indicates that the entire range after
  	 * start should be unmapped.
  	 */
  	vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
  		unsigned long v_offset;
  		unsigned long v_end;
  
  		/*
  		 * Can the expression below overflow on 32-bit arches?
  		 * No, because the interval tree returns us only those vmas
  		 * which overlap the truncated area starting at pgoff,
  		 * and no vma on a 32-bit arch can span beyond the 4GB.
  		 */
  		if (vma->vm_pgoff < start)
  			v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
  		else
  			v_offset = 0;
  
  		if (!end)
  			v_end = vma->vm_end;
  		else {
  			v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
  							+ vma->vm_start;
  			if (v_end > vma->vm_end)
  				v_end = vma->vm_end;
  		}
  
  		unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
  									NULL);
  	}
  }
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
341
342
343
344
  
  /*
   * remove_inode_hugepages handles two distinct cases: truncation and hole
   * punch.  There are subtle differences in operation for each case.
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
345
   *
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
346
347
348
   * truncation is indicated by end of range being LLONG_MAX
   *	In this case, we first scan the range and release found pages.
   *	After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
349
350
351
352
353
   *	maps and global counts.  Page faults can not race with truncation
   *	in this routine.  hugetlb_no_page() prevents page faults in the
   *	truncated range.  It checks i_size before allocation, and again after
   *	with the page table lock for the page held.  The same lock must be
   *	acquired to unmap a page.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
354
355
356
357
   * hole punch is indicated if end is not LLONG_MAX
   *	In the hole punch case we scan the range and release found pages.
   *	Only when releasing a page is the associated region/reserv map
   *	deleted.  The region/reserv map for ranges without associated
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
358
359
   *	pages are not modified.  Page faults can race with hole punch.
   *	This is indicated if we find a mapped page.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
360
361
362
363
364
   * Note: If the passed end of range value is beyond the end of file, but
   * not LLONG_MAX this routine still performs a hole punch operation.
   */
  static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
  				   loff_t lend)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
365
  {
a55164389   Andi Kleen   hugetlb: modular ...
366
  	struct hstate *h = hstate_inode(inode);
b45b5bd65   David Gibson   [PATCH] hugepage:...
367
  	struct address_space *mapping = &inode->i_data;
a55164389   Andi Kleen   hugetlb: modular ...
368
  	const pgoff_t start = lstart >> huge_page_shift(h);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
369
370
  	const pgoff_t end = lend >> huge_page_shift(h);
  	struct vm_area_struct pseudo_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
372
  	struct pagevec pvec;
  	pgoff_t next;
a43a8c39b   Kenneth W Chen   [PATCH] tightenin...
373
  	int i, freed = 0;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
374
375
  	long lookup_nr = PAGEVEC_SIZE;
  	bool truncate_op = (lend == LLONG_MAX);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376

b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
377
378
  	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
  	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
379
380
  	pagevec_init(&pvec, 0);
  	next = start;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
381
382
  	while (next < end) {
  		/*
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
383
  		 * Don't grab more pages than the number left in the range.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
384
385
386
387
388
  		 */
  		if (end - next < lookup_nr)
  			lookup_nr = end - next;
  
  		/*
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
389
  		 * When no more pages are found, we are done.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
390
  		 */
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
391
392
  		if (!pagevec_lookup(&pvec, mapping, next, lookup_nr))
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
393
394
395
  
  		for (i = 0; i < pagevec_count(&pvec); ++i) {
  			struct page *page = pvec.pages[i];
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
396
  			u32 hash;
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
397
398
399
400
401
402
403
404
  			/*
  			 * The page (index) could be beyond end.  This is
  			 * only possible in the punch hole case as end is
  			 * max page offset in the truncate case.
  			 */
  			next = page->index;
  			if (next >= end)
  				break;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
405
406
407
408
  			hash = hugetlb_fault_mutex_hash(h, current->mm,
  							&pseudo_vma,
  							mapping, next, 0);
  			mutex_lock(&hugetlb_fault_mutex_table[hash]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
409

4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
410
411
412
413
414
415
416
417
418
419
  			/*
  			 * If page is mapped, it was faulted in after being
  			 * unmapped in caller.  Unmap (again) now after taking
  			 * the fault mutex.  The mutex will prevent faults
  			 * until we finish removing the page.
  			 *
  			 * This race can only happen in the hole punch case.
  			 * Getting here in a truncate operation is a bug.
  			 */
  			if (unlikely(page_mapped(page))) {
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
420
  				BUG_ON(truncate_op);
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
421
422
423
424
425
426
427
428
429
430
431
432
433
434
  
  				i_mmap_lock_write(mapping);
  				hugetlb_vmdelete_list(&mapping->i_mmap,
  					next * pages_per_huge_page(h),
  					(next + 1) * pages_per_huge_page(h));
  				i_mmap_unlock_write(mapping);
  			}
  
  			lock_page(page);
  			/*
  			 * We must free the huge page and remove from page
  			 * cache (remove_huge_page) BEFORE removing the
  			 * region/reserve map (hugetlb_unreserve_pages).  In
  			 * rare out of memory conditions, removal of the
72e2936c0   zhong jiang   mm: remove unnece...
435
436
437
  			 * region/reserve map could fail. Correspondingly,
  			 * the subpool and global reserve usage count can need
  			 * to be adjusted.
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
438
  			 */
72e2936c0   zhong jiang   mm: remove unnece...
439
  			VM_BUG_ON(PagePrivate(page));
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
440
441
442
443
444
  			remove_huge_page(page);
  			freed++;
  			if (!truncate_op) {
  				if (unlikely(hugetlb_unreserve_pages(inode,
  							next, next + 1, 1)))
72e2936c0   zhong jiang   mm: remove unnece...
445
  					hugetlb_fix_reserve_counts(inode);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
446
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
447
  			unlock_page(page);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
448
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
  		}
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
450
  		++next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
451
  		huge_pagevec_release(&pvec);
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
452
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
  	}
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
454
455
456
  
  	if (truncate_op)
  		(void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457
  }
2bbbda308   Al Viro   switch hugetlbfs ...
458
  static void hugetlbfs_evict_inode(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
459
  {
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
460
  	struct resv_map *resv_map;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
461
  	remove_inode_hugepages(inode, 0, LLONG_MAX);
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
462
463
464
465
  	resv_map = (struct resv_map *)inode->i_mapping->private_data;
  	/* root inode doesn't have the resv_map, so we should check it */
  	if (resv_map)
  		resv_map_release(&resv_map->refs);
dbd5768f8   Jan Kara   vfs: Rename end_w...
466
  	clear_inode(inode);
149f4211a   Christoph Hellwig   [PATCH] hugetlbfs...
467
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
468
469
  static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
  {
856fc2950   Hugh Dickins   [PATCH] hugetlb: ...
470
  	pgoff_t pgoff;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
  	struct address_space *mapping = inode->i_mapping;
a55164389   Andi Kleen   hugetlb: modular ...
472
  	struct hstate *h = hstate_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473

a55164389   Andi Kleen   hugetlb: modular ...
474
  	BUG_ON(offset & ~huge_page_mask(h));
856fc2950   Hugh Dickins   [PATCH] hugetlb: ...
475
  	pgoff = offset >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476

7aa91e104   Ken Chen   hugetlb: allow ex...
477
  	i_size_write(inode, offset);
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
478
  	i_mmap_lock_write(mapping);
6b2dbba8b   Michel Lespinasse   mm: replace vma p...
479
  	if (!RB_EMPTY_ROOT(&mapping->i_mmap))
1bfad99ab   Mike Kravetz   hugetlbfs: hugetl...
480
  		hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
481
  	i_mmap_unlock_write(mapping);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
482
  	remove_inode_hugepages(inode, offset, LLONG_MAX);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
483
484
  	return 0;
  }
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
  static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
  {
  	struct hstate *h = hstate_inode(inode);
  	loff_t hpage_size = huge_page_size(h);
  	loff_t hole_start, hole_end;
  
  	/*
  	 * For hole punch round up the beginning offset of the hole and
  	 * round down the end.
  	 */
  	hole_start = round_up(offset, hpage_size);
  	hole_end = round_down(offset + len, hpage_size);
  
  	if (hole_end > hole_start) {
  		struct address_space *mapping = inode->i_mapping;
5955102c9   Al Viro   wrappers for ->i_...
500
  		inode_lock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
501
502
503
504
505
506
507
  		i_mmap_lock_write(mapping);
  		if (!RB_EMPTY_ROOT(&mapping->i_mmap))
  			hugetlb_vmdelete_list(&mapping->i_mmap,
  						hole_start >> PAGE_SHIFT,
  						hole_end  >> PAGE_SHIFT);
  		i_mmap_unlock_write(mapping);
  		remove_inode_hugepages(inode, hole_start, hole_end);
5955102c9   Al Viro   wrappers for ->i_...
508
  		inode_unlock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
  	}
  
  	return 0;
  }
  
  static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
  				loff_t len)
  {
  	struct inode *inode = file_inode(file);
  	struct address_space *mapping = inode->i_mapping;
  	struct hstate *h = hstate_inode(inode);
  	struct vm_area_struct pseudo_vma;
  	struct mm_struct *mm = current->mm;
  	loff_t hpage_size = huge_page_size(h);
  	unsigned long hpage_shift = huge_page_shift(h);
  	pgoff_t start, index, end;
  	int error;
  	u32 hash;
  
  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
  		return -EOPNOTSUPP;
  
  	if (mode & FALLOC_FL_PUNCH_HOLE)
  		return hugetlbfs_punch_hole(inode, offset, len);
  
  	/*
  	 * Default preallocate case.
  	 * For this range, start is rounded down and end is rounded up
  	 * as well as being converted to page offsets.
  	 */
  	start = offset >> hpage_shift;
  	end = (offset + len + hpage_size - 1) >> hpage_shift;
5955102c9   Al Viro   wrappers for ->i_...
541
  	inode_lock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
  
  	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
  	error = inode_newsize_ok(inode, offset + len);
  	if (error)
  		goto out;
  
  	/*
  	 * Initialize a pseudo vma as this is required by the huge page
  	 * allocation routines.  If NUMA is configured, use page index
  	 * as input to create an allocation policy.
  	 */
  	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
  	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
  	pseudo_vma.vm_file = file;
  
  	for (index = start; index < end; index++) {
  		/*
  		 * This is supposed to be the vaddr where the page is being
  		 * faulted in, but we have no vaddr here.
  		 */
  		struct page *page;
  		unsigned long addr;
  		int avoid_reserve = 0;
  
  		cond_resched();
  
  		/*
  		 * fallocate(2) manpage permits EINTR; we may have been
  		 * interrupted because we are using up too much memory.
  		 */
  		if (signal_pending(current)) {
  			error = -EINTR;
  			break;
  		}
  
  		/* Set numa allocation policy based on index */
  		hugetlb_set_vma_policy(&pseudo_vma, inode, index);
  
  		/* addr is the offset within the file (zero based) */
  		addr = index * hpage_size;
  
  		/* mutex taken here, fault path and hole punch */
  		hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
  						index, addr);
  		mutex_lock(&hugetlb_fault_mutex_table[hash]);
  
  		/* See if already present in mapping to avoid alloc/free */
  		page = find_get_page(mapping, index);
  		if (page) {
  			put_page(page);
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  			hugetlb_drop_vma_policy(&pseudo_vma);
  			continue;
  		}
  
  		/* Allocate page and add to page cache */
  		page = alloc_huge_page(&pseudo_vma, addr, avoid_reserve);
  		hugetlb_drop_vma_policy(&pseudo_vma);
  		if (IS_ERR(page)) {
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  			error = PTR_ERR(page);
  			goto out;
  		}
  		clear_huge_page(page, addr, pages_per_huge_page(h));
  		__SetPageUptodate(page);
  		error = huge_add_to_page_cache(page, mapping, index);
  		if (unlikely(error)) {
  			put_page(page);
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  			goto out;
  		}
  
  		mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  
  		/*
  		 * page_put due to reference from alloc_huge_page()
  		 * unlock_page because locked by add_to_page_cache()
  		 */
  		put_page(page);
  		unlock_page(page);
  	}
  
  	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
  		i_size_write(inode, offset + len);
078cd8279   Deepa Dinamani   fs: Replace CURRE...
626
  	inode->i_ctime = current_time(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
627
  out:
5955102c9   Al Viro   wrappers for ->i_...
628
  	inode_unlock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
629
630
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
631
632
  static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
2b0143b5c   David Howells   VFS: normal files...
633
  	struct inode *inode = d_inode(dentry);
a55164389   Andi Kleen   hugetlb: modular ...
634
  	struct hstate *h = hstate_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
635
636
637
638
  	int error;
  	unsigned int ia_valid = attr->ia_valid;
  
  	BUG_ON(!inode);
31051c85b   Jan Kara   fs: Give dentry t...
639
  	error = setattr_prepare(dentry, attr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
640
  	if (error)
1025774ce   Christoph Hellwig   remove inode_setattr
641
  		return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
642
643
644
  
  	if (ia_valid & ATTR_SIZE) {
  		error = -EINVAL;
1025774ce   Christoph Hellwig   remove inode_setattr
645
646
647
  		if (attr->ia_size & ~huge_page_mask(h))
  			return -EINVAL;
  		error = hugetlb_vmtruncate(inode, attr->ia_size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
648
  		if (error)
1025774ce   Christoph Hellwig   remove inode_setattr
649
  			return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
650
  	}
1025774ce   Christoph Hellwig   remove inode_setattr
651
652
653
654
  
  	setattr_copy(inode, attr);
  	mark_inode_dirty(inode);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
655
  }
7d54fa647   Al Viro   hugetlbfs: switch...
656
657
  static struct inode *hugetlbfs_get_root(struct super_block *sb,
  					struct hugetlbfs_config *config)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658
659
  {
  	struct inode *inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
660
661
662
663
  
  	inode = new_inode(sb);
  	if (inode) {
  		struct hugetlbfs_inode_info *info;
85fe4025c   Christoph Hellwig   fs: do not assign...
664
  		inode->i_ino = get_next_ino();
7d54fa647   Al Viro   hugetlbfs: switch...
665
666
667
  		inode->i_mode = S_IFDIR | config->mode;
  		inode->i_uid = config->uid;
  		inode->i_gid = config->gid;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
668
  		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
7d54fa647   Al Viro   hugetlbfs: switch...
669
670
671
672
673
674
  		info = HUGETLBFS_I(inode);
  		mpol_shared_policy_init(&info->policy, NULL);
  		inode->i_op = &hugetlbfs_dir_inode_operations;
  		inode->i_fop = &simple_dir_operations;
  		/* directory inodes start off with i_nlink == 2 (for "." entry) */
  		inc_nlink(inode);
65ed76010   Aneesh Kumar K.V   hugetlbfs: lockde...
675
  		lockdep_annotate_inode_mutex_key(inode);
7d54fa647   Al Viro   hugetlbfs: switch...
676
677
678
  	}
  	return inode;
  }
b610ded71   Michal Hocko   hugetlb: fix lock...
679
  /*
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
680
   * Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
b610ded71   Michal Hocko   hugetlb: fix lock...
681
   * be taken from reclaim -- unlike regular filesystems. This needs an
88f306b68   Kirill A. Shutemov   mm: fix locking o...
682
   * annotation because huge_pmd_share() does an allocation under hugetlb's
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
683
   * i_mmap_rwsem.
b610ded71   Michal Hocko   hugetlb: fix lock...
684
   */
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
685
  static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
b610ded71   Michal Hocko   hugetlb: fix lock...
686

7d54fa647   Al Viro   hugetlbfs: switch...
687
688
  static struct inode *hugetlbfs_get_inode(struct super_block *sb,
  					struct inode *dir,
18df22524   Al Viro   hugetlbfs: propag...
689
  					umode_t mode, dev_t dev)
7d54fa647   Al Viro   hugetlbfs: switch...
690
691
  {
  	struct inode *inode;
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
692
693
694
695
696
  	struct resv_map *resv_map;
  
  	resv_map = resv_map_alloc();
  	if (!resv_map)
  		return NULL;
7d54fa647   Al Viro   hugetlbfs: switch...
697
698
699
700
701
702
  
  	inode = new_inode(sb);
  	if (inode) {
  		struct hugetlbfs_inode_info *info;
  		inode->i_ino = get_next_ino();
  		inode_init_owner(inode, dir, mode);
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
703
704
  		lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
  				&hugetlbfs_i_mmap_rwsem_key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
705
  		inode->i_mapping->a_ops = &hugetlbfs_aops;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
706
  		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
707
  		inode->i_mapping->private_data = resv_map;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
  		info = HUGETLBFS_I(inode);
6bfde05bf   Eric B Munson   hugetlbfs: allow ...
709
710
711
  		/*
  		 * The policy is initialized here even if we are creating a
  		 * private inode because initialization simply creates an
4a8c7bb59   Nathan Zimmer   mm/mempolicy.c: c...
712
  		 * an empty rb tree and calls rwlock_init(), later when we
6bfde05bf   Eric B Munson   hugetlbfs: allow ...
713
714
715
  		 * call mpol_free_shared_policy() it will just return because
  		 * the rb tree will still be empty.
  		 */
71fe804b6   Lee Schermerhorn   mempolicy: use st...
716
  		mpol_shared_policy_init(&info->policy, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
717
718
719
720
721
722
723
724
725
726
727
728
729
  		switch (mode & S_IFMT) {
  		default:
  			init_special_inode(inode, mode, dev);
  			break;
  		case S_IFREG:
  			inode->i_op = &hugetlbfs_inode_operations;
  			inode->i_fop = &hugetlbfs_file_operations;
  			break;
  		case S_IFDIR:
  			inode->i_op = &hugetlbfs_dir_inode_operations;
  			inode->i_fop = &simple_dir_operations;
  
  			/* directory inodes start off with i_nlink == 2 (for "." entry) */
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
730
  			inc_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
731
732
733
  			break;
  		case S_IFLNK:
  			inode->i_op = &page_symlink_inode_operations;
21fc61c73   Al Viro   don't put symlink...
734
  			inode_nohighmem(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
735
736
  			break;
  		}
e096d0c7e   Josh Boyer   lockdep: Add help...
737
  		lockdep_annotate_inode_mutex_key(inode);
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
738
739
  	} else
  		kref_put(&resv_map->refs, resv_map_release);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
740
741
742
743
744
745
746
  	return inode;
  }
  
  /*
   * File creation. Allocate an inode, and we're done..
   */
  static int hugetlbfs_mknod(struct inode *dir,
1a67aafb5   Al Viro   switch ->mknod() ...
747
  			struct dentry *dentry, umode_t mode, dev_t dev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748
749
750
  {
  	struct inode *inode;
  	int error = -ENOSPC;
7d54fa647   Al Viro   hugetlbfs: switch...
751
752
  
  	inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
753
  	if (inode) {
078cd8279   Deepa Dinamani   fs: Replace CURRE...
754
  		dir->i_ctime = dir->i_mtime = current_time(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
755
756
757
758
759
760
  		d_instantiate(dentry, inode);
  		dget(dentry);	/* Extra count - pin the dentry in core */
  		error = 0;
  	}
  	return error;
  }
18bb1db3e   Al Viro   switch vfs_mkdir(...
761
  static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
762
763
764
  {
  	int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
  	if (!retval)
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
765
  		inc_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
766
767
  	return retval;
  }
ebfc3b49a   Al Viro   don't pass nameid...
768
  static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
769
770
771
772
773
774
775
776
777
  {
  	return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
  }
  
  static int hugetlbfs_symlink(struct inode *dir,
  			struct dentry *dentry, const char *symname)
  {
  	struct inode *inode;
  	int error = -ENOSPC;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
778

7d54fa647   Al Viro   hugetlbfs: switch...
779
  	inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
781
782
783
784
785
786
787
788
  	if (inode) {
  		int l = strlen(symname)+1;
  		error = page_symlink(inode, symname, l);
  		if (!error) {
  			d_instantiate(dentry, inode);
  			dget(dentry);
  		} else
  			iput(inode);
  	}
078cd8279   Deepa Dinamani   fs: Replace CURRE...
789
  	dir->i_ctime = dir->i_mtime = current_time(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
790
791
792
793
794
  
  	return error;
  }
  
  /*
6649a3863   Ken Chen   [PATCH] hugetlb: ...
795
   * mark the head page dirty
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
796
797
798
   */
  static int hugetlbfs_set_page_dirty(struct page *page)
  {
d85f33855   Christoph Lameter   Make page->privat...
799
  	struct page *head = compound_head(page);
6649a3863   Ken Chen   [PATCH] hugetlb: ...
800
801
  
  	SetPageDirty(head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
802
803
  	return 0;
  }
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
804
  static int hugetlbfs_migrate_page(struct address_space *mapping,
b969c4ab9   Mel Gorman   mm: compaction: d...
805
  				struct page *newpage, struct page *page,
a6bc32b89   Mel Gorman   mm: compaction: i...
806
  				enum migrate_mode mode)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
807
808
809
810
  {
  	int rc;
  
  	rc = migrate_huge_page_move_mapping(mapping, newpage, page);
78bd52097   Rafael Aquini   mm: adjust addres...
811
  	if (rc != MIGRATEPAGE_SUCCESS)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
812
813
  		return rc;
  	migrate_page_copy(newpage, page);
78bd52097   Rafael Aquini   mm: adjust addres...
814
  	return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
815
  }
726c33422   David Howells   [PATCH] VFS: Perm...
816
  static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
817
  {
726c33422   David Howells   [PATCH] VFS: Perm...
818
  	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
2b0143b5c   David Howells   VFS: normal files...
819
  	struct hstate *h = hstate_inode(d_inode(dentry));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820
821
  
  	buf->f_type = HUGETLBFS_MAGIC;
a55164389   Andi Kleen   hugetlb: modular ...
822
  	buf->f_bsize = huge_page_size(h);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
824
  	if (sbinfo) {
  		spin_lock(&sbinfo->stat_lock);
74a8a65c5   David Gibson   [PATCH] Fix huget...
825
826
  		/* If no limits set, just report 0 for max/free/used
  		 * blocks, like simple_statfs() */
90481622d   David Gibson   hugepages: fix us...
827
828
829
830
831
832
833
834
835
  		if (sbinfo->spool) {
  			long free_pages;
  
  			spin_lock(&sbinfo->spool->lock);
  			buf->f_blocks = sbinfo->spool->max_hpages;
  			free_pages = sbinfo->spool->max_hpages
  				- sbinfo->spool->used_hpages;
  			buf->f_bavail = buf->f_bfree = free_pages;
  			spin_unlock(&sbinfo->spool->lock);
74a8a65c5   David Gibson   [PATCH] Fix huget...
836
837
838
  			buf->f_files = sbinfo->max_inodes;
  			buf->f_ffree = sbinfo->free_inodes;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
839
840
841
842
843
844
845
846
847
848
849
850
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	buf->f_namelen = NAME_MAX;
  	return 0;
  }
  
  static void hugetlbfs_put_super(struct super_block *sb)
  {
  	struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb);
  
  	if (sbi) {
  		sb->s_fs_info = NULL;
90481622d   David Gibson   hugepages: fix us...
851
852
853
  
  		if (sbi->spool)
  			hugepage_put_subpool(sbi->spool);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854
855
856
  		kfree(sbi);
  	}
  }
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
  static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo)
  {
  	if (sbinfo->free_inodes >= 0) {
  		spin_lock(&sbinfo->stat_lock);
  		if (unlikely(!sbinfo->free_inodes)) {
  			spin_unlock(&sbinfo->stat_lock);
  			return 0;
  		}
  		sbinfo->free_inodes--;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  
  	return 1;
  }
  
  static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
  {
  	if (sbinfo->free_inodes >= 0) {
  		spin_lock(&sbinfo->stat_lock);
  		sbinfo->free_inodes++;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  }
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
880
  static struct kmem_cache *hugetlbfs_inode_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
881
882
883
  
  static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
  {
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
884
  	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
885
  	struct hugetlbfs_inode_info *p;
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
886
887
  	if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
  		return NULL;
e94b17660   Christoph Lameter   [PATCH] slab: rem...
888
  	p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL);
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
889
890
  	if (unlikely(!p)) {
  		hugetlbfs_inc_free_inodes(sbinfo);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
891
  		return NULL;
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
892
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
893
894
  	return &p->vfs_inode;
  }
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
895
896
897
  static void hugetlbfs_i_callback(struct rcu_head *head)
  {
  	struct inode *inode = container_of(head, struct inode, i_rcu);
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
898
899
  	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
900
901
  static void hugetlbfs_destroy_inode(struct inode *inode)
  {
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
902
  	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
903
  	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
904
  	call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
905
  }
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
906
  static const struct address_space_operations hugetlbfs_aops = {
800d15a53   Nick Piggin   implement simple ...
907
908
  	.write_begin	= hugetlbfs_write_begin,
  	.write_end	= hugetlbfs_write_end,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
909
  	.set_page_dirty	= hugetlbfs_set_page_dirty,
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
910
  	.migratepage    = hugetlbfs_migrate_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
911
  };
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
912

51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
913
  static void init_once(void *foo)
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
914
915
  {
  	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
916
  	inode_init_once(&ei->vfs_inode);
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
917
  }
4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
918
  const struct file_operations hugetlbfs_file_operations = {
34d0640e2   Al Viro   switch hugetlbfs ...
919
  	.read_iter		= hugetlbfs_read_iter,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
920
  	.mmap			= hugetlbfs_file_mmap,
1b061d924   Christoph Hellwig   rename the generi...
921
  	.fsync			= noop_fsync,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
922
  	.get_unmapped_area	= hugetlb_get_unmapped_area,
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
923
924
  	.llseek			= default_llseek,
  	.fallocate		= hugetlbfs_fallocate,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
925
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
926
  static const struct inode_operations hugetlbfs_dir_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
927
928
929
930
931
932
933
934
935
936
937
  	.create		= hugetlbfs_create,
  	.lookup		= simple_lookup,
  	.link		= simple_link,
  	.unlink		= simple_unlink,
  	.symlink	= hugetlbfs_symlink,
  	.mkdir		= hugetlbfs_mkdir,
  	.rmdir		= simple_rmdir,
  	.mknod		= hugetlbfs_mknod,
  	.rename		= simple_rename,
  	.setattr	= hugetlbfs_setattr,
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
938
  static const struct inode_operations hugetlbfs_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
939
940
  	.setattr	= hugetlbfs_setattr,
  };
ee9b6d61a   Josef 'Jeff' Sipek   [PATCH] Mark stru...
941
  static const struct super_operations hugetlbfs_ops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
942
943
  	.alloc_inode    = hugetlbfs_alloc_inode,
  	.destroy_inode  = hugetlbfs_destroy_inode,
2bbbda308   Al Viro   switch hugetlbfs ...
944
  	.evict_inode	= hugetlbfs_evict_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
945
  	.statfs		= hugetlbfs_statfs,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
946
  	.put_super	= hugetlbfs_put_super,
10f19a86a   Miklos Szeredi   mount options: fi...
947
  	.show_options	= generic_show_options,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
948
  };
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
  enum { NO_SIZE, SIZE_STD, SIZE_PERCENT };
  
  /*
   * Convert size option passed from command line to number of huge pages
   * in the pool specified by hstate.  Size option could be in bytes
   * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
   */
  static long long
  hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
  								int val_type)
  {
  	if (val_type == NO_SIZE)
  		return -1;
  
  	if (val_type == SIZE_PERCENT) {
  		size_opt <<= huge_page_shift(h);
  		size_opt *= h->max_huge_pages;
  		do_div(size_opt, 100);
  	}
  
  	size_opt >>= huge_page_shift(h);
  	return size_opt;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
972
973
974
  static int
  hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
  {
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
975
976
977
  	char *p, *rest;
  	substring_t args[MAX_OPT_ARGS];
  	int option;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
978
979
  	unsigned long long max_size_opt = 0, min_size_opt = 0;
  	int max_val_type = NO_SIZE, min_val_type = NO_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
980
981
982
  
  	if (!options)
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
983

e73a75fa7   Randy Dunlap   hugetlbfs: use li...
984
985
  	while ((p = strsep(&options, ",")) != NULL) {
  		int token;
b4c07bce7   Lee Schermerhorn   hugetlbfs: handle...
986
987
  		if (!*p)
  			continue;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
988
989
990
991
992
993
  
  		token = match_token(p, tokens, args);
  		switch (token) {
  		case Opt_uid:
  			if (match_int(&args[0], &option))
   				goto bad_val;
a0eb3a05a   Eric W. Biederman   userns: Convert h...
994
995
996
  			pconfig->uid = make_kuid(current_user_ns(), option);
  			if (!uid_valid(pconfig->uid))
  				goto bad_val;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
997
998
999
1000
1001
  			break;
  
  		case Opt_gid:
  			if (match_int(&args[0], &option))
   				goto bad_val;
a0eb3a05a   Eric W. Biederman   userns: Convert h...
1002
1003
1004
  			pconfig->gid = make_kgid(current_user_ns(), option);
  			if (!gid_valid(pconfig->gid))
  				goto bad_val;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1005
1006
1007
1008
1009
  			break;
  
  		case Opt_mode:
  			if (match_octal(&args[0], &option))
   				goto bad_val;
75897d60a   Ken Chen   hugetlb: allow st...
1010
  			pconfig->mode = option & 01777U;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1011
1012
1013
  			break;
  
  		case Opt_size: {
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1014
1015
1016
  			/* memparse() will accept a K/M/G without a digit */
  			if (!isdigit(*args[0].from))
  				goto bad_val;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1017
1018
  			max_size_opt = memparse(args[0].from, &rest);
  			max_val_type = SIZE_STD;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1019
  			if (*rest == '%')
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1020
  				max_val_type = SIZE_PERCENT;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1021
1022
  			break;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1023

e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1024
1025
1026
1027
1028
1029
  		case Opt_nr_inodes:
  			/* memparse() will accept a K/M/G without a digit */
  			if (!isdigit(*args[0].from))
  				goto bad_val;
  			pconfig->nr_inodes = memparse(args[0].from, &rest);
  			break;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1030
1031
1032
1033
1034
  		case Opt_pagesize: {
  			unsigned long ps;
  			ps = memparse(args[0].from, &rest);
  			pconfig->hstate = size_to_hstate(ps);
  			if (!pconfig->hstate) {
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1035
1036
  				pr_err("Unsupported page size %lu MB
  ",
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1037
1038
1039
1040
1041
  					ps >> 20);
  				return -EINVAL;
  			}
  			break;
  		}
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
  		case Opt_min_size: {
  			/* memparse() will accept a K/M/G without a digit */
  			if (!isdigit(*args[0].from))
  				goto bad_val;
  			min_size_opt = memparse(args[0].from, &rest);
  			min_val_type = SIZE_STD;
  			if (*rest == '%')
  				min_val_type = SIZE_PERCENT;
  			break;
  		}
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1052
  		default:
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1053
1054
  			pr_err("Bad mount option: \"%s\"
  ", p);
b4c07bce7   Lee Schermerhorn   hugetlbfs: handle...
1055
  			return -EINVAL;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1056
1057
  			break;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1058
  	}
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1059

7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
  	/*
  	 * Use huge page pool size (in hstate) to convert the size
  	 * options to number of huge pages.  If NO_SIZE, -1 is returned.
  	 */
  	pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
  						max_size_opt, max_val_type);
  	pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
  						min_size_opt, min_val_type);
  
  	/*
  	 * If max_size was specified, then min_size must be smaller
  	 */
  	if (max_val_type > NO_SIZE &&
  	    pconfig->min_hpages > pconfig->max_hpages) {
  		pr_err("minimum size can not be greater than maximum size
  ");
  		return -EINVAL;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1077
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1078
  	return 0;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1079
1080
  
  bad_val:
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1081
1082
  	pr_err("Bad value '%s' for mount option '%s'
  ", args[0].from, p);
c12ddba09   Akinobu Mita   hugetlbfs: return...
1083
   	return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1084
1085
1086
1087
1088
  }
  
  static int
  hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1089
1090
1091
  	int ret;
  	struct hugetlbfs_config config;
  	struct hugetlbfs_sb_info *sbinfo;
10f19a86a   Miklos Szeredi   mount options: fi...
1092
  	save_mount_options(sb, data);
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1093
  	config.max_hpages = -1; /* No limit on size by default */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1094
  	config.nr_inodes = -1; /* No limit on number of inodes by default */
77c70de15   David Howells   CRED: Wrap task c...
1095
1096
  	config.uid = current_fsuid();
  	config.gid = current_fsgid();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1097
  	config.mode = 0755;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1098
  	config.hstate = &default_hstate;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1099
  	config.min_hpages = -1; /* No default minimum size */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1100
  	ret = hugetlbfs_parse_options(data, &config);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1101
1102
1103
1104
1105
1106
1107
  	if (ret)
  		return ret;
  
  	sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL);
  	if (!sbinfo)
  		return -ENOMEM;
  	sb->s_fs_info = sbinfo;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1108
  	sbinfo->hstate = config.hstate;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1109
  	spin_lock_init(&sbinfo->stat_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1110
1111
  	sbinfo->max_inodes = config.nr_inodes;
  	sbinfo->free_inodes = config.nr_inodes;
90481622d   David Gibson   hugepages: fix us...
1112
  	sbinfo->spool = NULL;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1113
1114
1115
1116
1117
1118
1119
1120
1121
  	/*
  	 * Allocate and initialize subpool if maximum or minimum size is
  	 * specified.  Any needed reservations (for minimim size) are taken
  	 * taken when the subpool is created.
  	 */
  	if (config.max_hpages != -1 || config.min_hpages != -1) {
  		sbinfo->spool = hugepage_new_subpool(config.hstate,
  							config.max_hpages,
  							config.min_hpages);
90481622d   David Gibson   hugepages: fix us...
1122
1123
1124
  		if (!sbinfo->spool)
  			goto out_free;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
  	sb->s_maxbytes = MAX_LFS_FILESIZE;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1126
1127
  	sb->s_blocksize = huge_page_size(config.hstate);
  	sb->s_blocksize_bits = huge_page_shift(config.hstate);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1128
1129
1130
  	sb->s_magic = HUGETLBFS_MAGIC;
  	sb->s_op = &hugetlbfs_ops;
  	sb->s_time_gran = 1;
48fde701a   Al Viro   switch open-coded...
1131
1132
  	sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
  	if (!sb->s_root)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1133
  		goto out_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1134
1135
  	return 0;
  out_free:
6e6870d4f   Fabian Frederick   fs/hugetlbfs/inod...
1136
  	kfree(sbinfo->spool);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1137
1138
1139
  	kfree(sbinfo);
  	return -ENOMEM;
  }
3c26ff6e4   Al Viro   convert get_sb_no...
1140
1141
  static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
  	int flags, const char *dev_name, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142
  {
3c26ff6e4   Al Viro   convert get_sb_no...
1143
  	return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1144
1145
1146
1147
  }
  
  static struct file_system_type hugetlbfs_fs_type = {
  	.name		= "hugetlbfs",
3c26ff6e4   Al Viro   convert get_sb_no...
1148
  	.mount		= hugetlbfs_mount,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1149
1150
  	.kill_sb	= kill_litter_super,
  };
42d7395fe   Andi Kleen   mm: support more ...
1151
  static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1152

ef1ff6b8c   From: Mel Gorman   hugetlbfs: do not...
1153
  static int can_do_hugetlb_shm(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1154
  {
a0eb3a05a   Eric W. Biederman   userns: Convert h...
1155
1156
1157
  	kgid_t shm_group;
  	shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group);
  	return capable(CAP_IPC_LOCK) || in_group_p(shm_group);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1158
  }
42d7395fe   Andi Kleen   mm: support more ...
1159
1160
  static int get_hstate_idx(int page_size_log)
  {
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1161
  	struct hstate *h = hstate_sizelog(page_size_log);
42d7395fe   Andi Kleen   mm: support more ...
1162

42d7395fe   Andi Kleen   mm: support more ...
1163
1164
1165
1166
  	if (!h)
  		return -1;
  	return h - hstates;
  }
be1d2cf5e   Fabian Frederick   fs/hugetlbfs/inod...
1167
  static const struct dentry_operations anon_ops = {
118b23022   Al Viro   cope with potenti...
1168
  	.d_dname = simple_dname
0df4d6e5b   Al Viro   hugetlb_file_setu...
1169
  };
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1170
1171
1172
1173
1174
1175
  /*
   * Note that size should be aligned to proper hugepage size in caller side,
   * otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
   */
  struct file *hugetlb_file_setup(const char *name, size_t size,
  				vm_flags_t acctflag, struct user_struct **user,
42d7395fe   Andi Kleen   mm: support more ...
1176
  				int creat_flags, int page_size_log)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1177
  {
39b652527   Anatol Pomozov   fs: Preserve erro...
1178
  	struct file *file = ERR_PTR(-ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1179
  	struct inode *inode;
2c48b9c45   Al Viro   switch alloc_file...
1180
  	struct path path;
0df4d6e5b   Al Viro   hugetlb_file_setu...
1181
  	struct super_block *sb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1182
  	struct qstr quick_string;
42d7395fe   Andi Kleen   mm: support more ...
1183
1184
1185
1186
1187
  	int hstate_idx;
  
  	hstate_idx = get_hstate_idx(page_size_log);
  	if (hstate_idx < 0)
  		return ERR_PTR(-ENODEV);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1188

353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1189
  	*user = NULL;
42d7395fe   Andi Kleen   mm: support more ...
1190
  	if (!hugetlbfs_vfsmount[hstate_idx])
5bc98594d   Akinobu Mita   hugetlbfs: add NU...
1191
  		return ERR_PTR(-ENOENT);
ef1ff6b8c   From: Mel Gorman   hugetlbfs: do not...
1192
  	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1193
1194
  		*user = current_user();
  		if (user_shm_lock(size, *user)) {
21a3c273f   David Rientjes   mm, hugetlb: add ...
1195
  			task_lock(current);
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1196
1197
  			pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated
  ",
21a3c273f   David Rientjes   mm, hugetlb: add ...
1198
1199
  				current->comm, current->pid);
  			task_unlock(current);
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1200
1201
  		} else {
  			*user = NULL;
2584e5173   Ravikiran G Thirumalai   mm: reintroduce a...
1202
  			return ERR_PTR(-EPERM);
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1203
  		}
2584e5173   Ravikiran G Thirumalai   mm: reintroduce a...
1204
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1205

0df4d6e5b   Al Viro   hugetlb_file_setu...
1206
  	sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb;
9d66586f7   Eric W. Biederman   shm: fix the file...
1207
  	quick_string.name = name;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1208
1209
  	quick_string.len = strlen(quick_string.name);
  	quick_string.hash = 0;
0df4d6e5b   Al Viro   hugetlb_file_setu...
1210
  	path.dentry = d_alloc_pseudo(sb, &quick_string);
2c48b9c45   Al Viro   switch alloc_file...
1211
  	if (!path.dentry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1212
  		goto out_shm_unlock;
0df4d6e5b   Al Viro   hugetlb_file_setu...
1213
  	d_set_d_op(path.dentry, &anon_ops);
42d7395fe   Andi Kleen   mm: support more ...
1214
  	path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
39b652527   Anatol Pomozov   fs: Preserve erro...
1215
  	file = ERR_PTR(-ENOSPC);
0df4d6e5b   Al Viro   hugetlb_file_setu...
1216
  	inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1217
  	if (!inode)
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
1218
  		goto out_dentry;
e1832f292   Stephen Smalley   ipc: use private ...
1219
1220
  	if (creat_flags == HUGETLB_SHMFS_INODE)
  		inode->i_flags |= S_PRIVATE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1221

39b652527   Anatol Pomozov   fs: Preserve erro...
1222
  	file = ERR_PTR(-ENOMEM);
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1223
1224
1225
  	if (hugetlb_reserve_pages(inode, 0,
  			size >> huge_page_shift(hstate_inode(inode)), NULL,
  			acctflag))
b45b5bd65   David Gibson   [PATCH] hugepage:...
1226
  		goto out_inode;
2c48b9c45   Al Viro   switch alloc_file...
1227
  	d_instantiate(path.dentry, inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1228
  	inode->i_size = size;
6d6b77f16   Miklos Szeredi   filesystems: add ...
1229
  	clear_nlink(inode);
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
1230

2c48b9c45   Al Viro   switch alloc_file...
1231
  	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
1232
  			&hugetlbfs_file_operations);
39b652527   Anatol Pomozov   fs: Preserve erro...
1233
  	if (IS_ERR(file))
b4d232e65   Al Viro   [PATCH] double ip...
1234
  		goto out_dentry; /* inode is already attached */
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
1235

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1236
  	return file;
b45b5bd65   David Gibson   [PATCH] hugepage:...
1237
1238
  out_inode:
  	iput(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1239
  out_dentry:
2c48b9c45   Al Viro   switch alloc_file...
1240
  	path_put(&path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1241
  out_shm_unlock:
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1242
1243
1244
1245
  	if (*user) {
  		user_shm_unlock(size, *user);
  		*user = NULL;
  	}
39b652527   Anatol Pomozov   fs: Preserve erro...
1246
  	return file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1247
1248
1249
1250
  }
  
  static int __init init_hugetlbfs_fs(void)
  {
42d7395fe   Andi Kleen   mm: support more ...
1251
  	struct hstate *h;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1252
  	int error;
42d7395fe   Andi Kleen   mm: support more ...
1253
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1254

457c1b27e   Nishanth Aravamudan   hugetlb: ensure h...
1255
  	if (!hugepages_supported()) {
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1256
1257
  		pr_info("disabling because there are no supported hugepage sizes
  ");
457c1b27e   Nishanth Aravamudan   hugetlb: ensure h...
1258
1259
  		return -ENOTSUPP;
  	}
d1d5e05ff   Hillf Danton   hugetlbfs: return...
1260
  	error = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261
1262
  	hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
  					sizeof(struct hugetlbfs_inode_info),
5d097056c   Vladimir Davydov   kmemcg: account c...
1263
  					0, SLAB_ACCOUNT, init_once);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1264
  	if (hugetlbfs_inode_cachep == NULL)
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
1265
  		goto out2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1266
1267
1268
1269
  
  	error = register_filesystem(&hugetlbfs_fs_type);
  	if (error)
  		goto out;
42d7395fe   Andi Kleen   mm: support more ...
1270
1271
1272
1273
  	i = 0;
  	for_each_hstate(h) {
  		char buf[50];
  		unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1274

42d7395fe   Andi Kleen   mm: support more ...
1275
1276
1277
  		snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb);
  		hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type,
  							buf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1278

42d7395fe   Andi Kleen   mm: support more ...
1279
  		if (IS_ERR(hugetlbfs_vfsmount[i])) {
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1280
  			pr_err("Cannot mount internal hugetlbfs for "
42d7395fe   Andi Kleen   mm: support more ...
1281
1282
1283
1284
1285
1286
1287
1288
1289
  				"page size %uK", ps_kb);
  			error = PTR_ERR(hugetlbfs_vfsmount[i]);
  			hugetlbfs_vfsmount[i] = NULL;
  		}
  		i++;
  	}
  	/* Non default hstates are optional */
  	if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx]))
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1290
1291
  
   out:
d1d5e05ff   Hillf Danton   hugetlbfs: return...
1292
  	kmem_cache_destroy(hugetlbfs_inode_cachep);
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
1293
   out2:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1294
1295
  	return error;
  }
3e89e1c5e   Paul Gortmaker   hugetlb: make mm ...
1296
  fs_initcall(init_hugetlbfs_fs)