Blame view

fs/hugetlbfs/inode.c 40.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
  /*
   * hugetlbpage-backed filesystem.  Based on ramfs.
   *
6d49e352a   Nadia Yvette Chambers   propagate name ch...
4
   * Nadia Yvette Chambers, 2002
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
   *
   * Copyright (C) 2002 Linus Torvalds.
3e89e1c5e   Paul Gortmaker   hugetlb: make mm ...
7
   * License: GPL
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
   */
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
9
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
11
  #include <linux/thread_info.h>
  #include <asm/current.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
12
  #include <linux/sched/signal.h>		/* remove ASAP */
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
13
  #include <linux/falloc.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
16
  #include <linux/fs.h>
  #include <linux/mount.h>
  #include <linux/file.h>
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
17
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
21
22
  #include <linux/writeback.h>
  #include <linux/pagemap.h>
  #include <linux/highmem.h>
  #include <linux/init.h>
  #include <linux/string.h>
16f7e0fe2   Randy Dunlap   [PATCH] capable/c...
23
  #include <linux/capability.h>
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
24
  #include <linux/ctype.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
  #include <linux/backing-dev.h>
  #include <linux/hugetlb.h>
  #include <linux/pagevec.h>
32021982a   David Howells   hugetlbfs: Conver...
28
  #include <linux/fs_parser.h>
036e08568   Benjamin Herrenschmidt   get_unmapped_area...
29
  #include <linux/mman.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
33
  #include <linux/slab.h>
  #include <linux/dnotify.h>
  #include <linux/statfs.h>
  #include <linux/security.h>
1fd7317d0   Nick Black   Move magic number...
34
  #include <linux/magic.h>
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
35
  #include <linux/migrate.h>
34d0640e2   Al Viro   switch hugetlbfs ...
36
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
38
  #include <linux/uaccess.h>
885902531   Shijie Hu   hugetlbfs: get un...
39
  #include <linux/sched/mm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40

ee9b6d61a   Josef 'Jeff' Sipek   [PATCH] Mark stru...
41
  static const struct super_operations hugetlbfs_ops;
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
42
  static const struct address_space_operations hugetlbfs_aops;
4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
43
  const struct file_operations hugetlbfs_file_operations;
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
44
45
  static const struct inode_operations hugetlbfs_dir_inode_operations;
  static const struct inode_operations hugetlbfs_inode_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
46

32021982a   David Howells   hugetlbfs: Conver...
47
48
49
  enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
  
  struct hugetlbfs_fs_context {
4a25220d4   David Howells   hugetlbfs: Implem...
50
  	struct hstate		*hstate;
32021982a   David Howells   hugetlbfs: Conver...
51
52
  	unsigned long long	max_size_opt;
  	unsigned long long	min_size_opt;
4a25220d4   David Howells   hugetlbfs: Implem...
53
54
55
  	long			max_hpages;
  	long			nr_inodes;
  	long			min_hpages;
32021982a   David Howells   hugetlbfs: Conver...
56
57
  	enum hugetlbfs_size_type max_val_type;
  	enum hugetlbfs_size_type min_val_type;
4a25220d4   David Howells   hugetlbfs: Implem...
58
59
60
  	kuid_t			uid;
  	kgid_t			gid;
  	umode_t			mode;
a1d776ee3   David Gibson   hugetlb: cleanup ...
61
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
  int sysctl_hugetlb_shm_group;
32021982a   David Howells   hugetlbfs: Conver...
63
64
65
66
67
68
69
70
  enum hugetlb_param {
  	Opt_gid,
  	Opt_min_size,
  	Opt_mode,
  	Opt_nr_inodes,
  	Opt_pagesize,
  	Opt_size,
  	Opt_uid,
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
71
  };
d7167b149   Al Viro   fs_parse: fold fs...
72
  static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
32021982a   David Howells   hugetlbfs: Conver...
73
74
75
76
77
78
79
80
81
  	fsparam_u32   ("gid",		Opt_gid),
  	fsparam_string("min_size",	Opt_min_size),
  	fsparam_u32   ("mode",		Opt_mode),
  	fsparam_string("nr_inodes",	Opt_nr_inodes),
  	fsparam_string("pagesize",	Opt_pagesize),
  	fsparam_string("size",		Opt_size),
  	fsparam_u32   ("uid",		Opt_uid),
  	{}
  };
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
  #ifdef CONFIG_NUMA
  static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
  					struct inode *inode, pgoff_t index)
  {
  	vma->vm_policy = mpol_shared_policy_lookup(&HUGETLBFS_I(inode)->policy,
  							index);
  }
  
  static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
  {
  	mpol_cond_put(vma->vm_policy);
  }
  #else
  static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
  					struct inode *inode, pgoff_t index)
  {
  }
  
  static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
  {
  }
  #endif
2e9b367c2   Adam Litke   [PATCH] hugetlb: ...
104
105
106
107
108
109
110
111
112
  static void huge_pagevec_release(struct pagevec *pvec)
  {
  	int i;
  
  	for (i = 0; i < pagevec_count(pvec); ++i)
  		put_page(pvec->pages[i]);
  
  	pagevec_reinit(pvec);
  }
63489f8e8   Mike Kravetz   hugetlbfs: check ...
113
114
115
116
117
118
119
120
121
  /*
   * Mask used when checking the page offset value passed in via system
   * calls.  This value will be converted to a loff_t which is signed.
   * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
   * value.  The extra bit (- 1 in the shift value) is to take the sign
   * bit into account.
   */
  #define PGOFF_LOFFT_MAX \
  	(((1UL << (PAGE_SHIFT + 1)) - 1) <<  (BITS_PER_LONG - (PAGE_SHIFT + 1)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
123
  static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
  {
496ad9aa8   Al Viro   new helper: file_...
124
  	struct inode *inode = file_inode(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
126
  	loff_t len, vma_len;
  	int ret;
a55164389   Andi Kleen   hugetlb: modular ...
127
  	struct hstate *h = hstate_file(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128

68589bc35   Hugh Dickins   [PATCH] hugetlb: ...
129
  	/*
dec4ad86c   David Gibson   hugepage: fix bro...
130
131
132
133
  	 * vma address alignment (but not the pgoff alignment) has
  	 * already been checked by prepare_hugepage_range.  If you add
  	 * any error returns here, do so after setting VM_HUGETLB, so
  	 * is_vm_hugetlb_page tests below unmap_region go the right
45e55300f   Peter Collingbourne   mm: remove unnece...
134
  	 * way when do_mmap unwinds (may be important on powerpc
dec4ad86c   David Gibson   hugepage: fix bro...
135
  	 * and ia64).
68589bc35   Hugh Dickins   [PATCH] hugetlb: ...
136
  	 */
a2fce9143   Naoya Horiguchi   hugetlbfs: stop s...
137
  	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
68589bc35   Hugh Dickins   [PATCH] hugetlb: ...
138
  	vma->vm_ops = &hugetlb_vm_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139

045c7a3f5   Mike Kravetz   hugetlbfs: fix of...
140
  	/*
63489f8e8   Mike Kravetz   hugetlbfs: check ...
141
  	 * page based offset in vm_pgoff could be sufficiently large to
5df63c2a1   Mike Kravetz   hugetlbfs: fix bu...
142
143
144
  	 * overflow a loff_t when converted to byte offset.  This can
  	 * only happen on architectures where sizeof(loff_t) ==
  	 * sizeof(unsigned long).  So, only check in those instances.
045c7a3f5   Mike Kravetz   hugetlbfs: fix of...
145
  	 */
5df63c2a1   Mike Kravetz   hugetlbfs: fix bu...
146
147
148
149
  	if (sizeof(unsigned long) == sizeof(loff_t)) {
  		if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
  			return -EINVAL;
  	}
045c7a3f5   Mike Kravetz   hugetlbfs: fix of...
150

63489f8e8   Mike Kravetz   hugetlbfs: check ...
151
  	/* must be huge page aligned */
2b37c35e6   Becky Bruce   fs/hugetlbfs/inod...
152
  	if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
dec4ad86c   David Gibson   hugepage: fix bro...
153
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
  	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
045c7a3f5   Mike Kravetz   hugetlbfs: fix of...
155
156
157
158
  	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
  	/* check for overflow */
  	if (len < vma_len)
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
159

5955102c9   Al Viro   wrappers for ->i_...
160
  	inode_lock(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
161
  	file_accessed(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
163
  
  	ret = -ENOMEM;
a1e78772d   Mel Gorman   hugetlb: reserve ...
164
  	if (hugetlb_reserve_pages(inode,
a55164389   Andi Kleen   hugetlb: modular ...
165
  				vma->vm_pgoff >> huge_page_order(h),
5a6fe1259   Mel Gorman   Do not account fo...
166
167
  				len >> huge_page_shift(h), vma,
  				vma->vm_flags))
a43a8c39b   Kenneth W Chen   [PATCH] tightenin...
168
  		goto out;
b45b5bd65   David Gibson   [PATCH] hugepage:...
169

4c8872659   Adam Litke   [PATCH] hugetlb: ...
170
  	ret = 0;
b6174df5e   Zhang, Yanmin   [PATCH] mmap zero...
171
  	if (vma->vm_flags & VM_WRITE && inode->i_size < len)
045c7a3f5   Mike Kravetz   hugetlbfs: fix of...
172
  		i_size_write(inode, len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
  out:
5955102c9   Al Viro   wrappers for ->i_...
174
  	inode_unlock(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
177
178
179
  
  	return ret;
  }
  
  /*
3e4e28c5a   Michel Lespinasse   mmap locking API:...
180
   * Called under mmap_write_lock(mm).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
   */
d2ba27e80   Adrian Bunk   proper prototype ...
182
  #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
  static unsigned long
885902531   Shijie Hu   hugetlbfs: get un...
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
  hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
  		unsigned long len, unsigned long pgoff, unsigned long flags)
  {
  	struct hstate *h = hstate_file(file);
  	struct vm_unmapped_area_info info;
  
  	info.flags = 0;
  	info.length = len;
  	info.low_limit = current->mm->mmap_base;
  	info.high_limit = TASK_SIZE;
  	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
  	info.align_offset = 0;
  	return vm_unmapped_area(&info);
  }
  
  static unsigned long
  hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
  		unsigned long len, unsigned long pgoff, unsigned long flags)
  {
  	struct hstate *h = hstate_file(file);
  	struct vm_unmapped_area_info info;
  
  	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
  	info.length = len;
  	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
  	info.high_limit = current->mm->mmap_base;
  	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
  	info.align_offset = 0;
  	addr = vm_unmapped_area(&info);
  
  	/*
  	 * A failed mmap() very likely causes application failure,
  	 * so fall back to the bottom-up function here. This scenario
  	 * can happen with large stack limits and large mmap()
  	 * allocations.
  	 */
  	if (unlikely(offset_in_page(addr))) {
  		VM_BUG_ON(addr != -ENOMEM);
  		info.flags = 0;
  		info.low_limit = current->mm->mmap_base;
  		info.high_limit = TASK_SIZE;
  		addr = vm_unmapped_area(&info);
  	}
  
  	return addr;
  }
  
  static unsigned long
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
233
234
235
236
  hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
  		unsigned long len, unsigned long pgoff, unsigned long flags)
  {
  	struct mm_struct *mm = current->mm;
  	struct vm_area_struct *vma;
a55164389   Andi Kleen   hugetlb: modular ...
237
  	struct hstate *h = hstate_file(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238

a55164389   Andi Kleen   hugetlb: modular ...
239
  	if (len & ~huge_page_mask(h))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
240
241
242
  		return -EINVAL;
  	if (len > TASK_SIZE)
  		return -ENOMEM;
036e08568   Benjamin Herrenschmidt   get_unmapped_area...
243
  	if (flags & MAP_FIXED) {
a55164389   Andi Kleen   hugetlb: modular ...
244
  		if (prepare_hugepage_range(file, addr, len))
036e08568   Benjamin Herrenschmidt   get_unmapped_area...
245
246
247
  			return -EINVAL;
  		return addr;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
  	if (addr) {
a55164389   Andi Kleen   hugetlb: modular ...
249
  		addr = ALIGN(addr, huge_page_size(h));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
251
  		vma = find_vma(mm, addr);
  		if (TASK_SIZE - len >= addr &&
1be7107fb   Hugh Dickins   mm: larger stack ...
252
  		    (!vma || addr + len <= vm_start_gap(vma)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
254
  			return addr;
  	}
885902531   Shijie Hu   hugetlbfs: get un...
255
256
257
258
259
260
261
262
263
264
  	/*
  	 * Use mm->get_unmapped_area value as a hint to use topdown routine.
  	 * If architectures have special needs, they should define their own
  	 * version of hugetlb_get_unmapped_area.
  	 */
  	if (mm->get_unmapped_area == arch_get_unmapped_area_topdown)
  		return hugetlb_get_unmapped_area_topdown(file, addr, len,
  				pgoff, flags);
  	return hugetlb_get_unmapped_area_bottomup(file, addr, len,
  			pgoff, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
265
266
  }
  #endif
34d0640e2   Al Viro   switch hugetlbfs ...
267
  static size_t
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
268
  hugetlbfs_read_actor(struct page *page, unsigned long offset,
34d0640e2   Al Viro   switch hugetlbfs ...
269
  			struct iov_iter *to, unsigned long size)
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
270
  {
34d0640e2   Al Viro   switch hugetlbfs ...
271
  	size_t copied = 0;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
272
  	int i, chunksize;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
273
  	/* Find which 4k chunk and offset with in that chunk */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
274
275
  	i = offset >> PAGE_SHIFT;
  	offset = offset & ~PAGE_MASK;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
276
277
  
  	while (size) {
34d0640e2   Al Viro   switch hugetlbfs ...
278
  		size_t n;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
279
  		chunksize = PAGE_SIZE;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
280
281
282
283
  		if (offset)
  			chunksize -= offset;
  		if (chunksize > size)
  			chunksize = size;
34d0640e2   Al Viro   switch hugetlbfs ...
284
285
286
287
  		n = copy_page_to_iter(&page[i], offset, chunksize, to);
  		copied += n;
  		if (n != chunksize)
  			return copied;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
288
289
  		offset = 0;
  		size -= chunksize;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
290
291
  		i++;
  	}
34d0640e2   Al Viro   switch hugetlbfs ...
292
  	return copied;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
293
294
295
296
297
  }
  
  /*
   * Support for read() - Find the page attached to f_mapping and copy out the
   * data. Its *very* similar to do_generic_mapping_read(), we can't use that
ea1754a08   Kirill A. Shutemov   mm, fs: remove re...
298
   * since it has PAGE_SIZE assumptions.
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
299
   */
34d0640e2   Al Viro   switch hugetlbfs ...
300
  static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
301
  {
34d0640e2   Al Viro   switch hugetlbfs ...
302
303
304
  	struct file *file = iocb->ki_filp;
  	struct hstate *h = hstate_file(file);
  	struct address_space *mapping = file->f_mapping;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
305
  	struct inode *inode = mapping->host;
34d0640e2   Al Viro   switch hugetlbfs ...
306
307
  	unsigned long index = iocb->ki_pos >> huge_page_shift(h);
  	unsigned long offset = iocb->ki_pos & ~huge_page_mask(h);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
308
309
310
  	unsigned long end_index;
  	loff_t isize;
  	ssize_t retval = 0;
34d0640e2   Al Viro   switch hugetlbfs ...
311
  	while (iov_iter_count(to)) {
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
312
  		struct page *page;
34d0640e2   Al Viro   switch hugetlbfs ...
313
  		size_t nr, copied;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
314
315
  
  		/* nr is the maximum number of bytes to copy from this page */
a55164389   Andi Kleen   hugetlb: modular ...
316
  		nr = huge_page_size(h);
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
317
318
  		isize = i_size_read(inode);
  		if (!isize)
34d0640e2   Al Viro   switch hugetlbfs ...
319
  			break;
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
320
  		end_index = (isize - 1) >> huge_page_shift(h);
34d0640e2   Al Viro   switch hugetlbfs ...
321
322
323
  		if (index > end_index)
  			break;
  		if (index == end_index) {
a55164389   Andi Kleen   hugetlb: modular ...
324
  			nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
325
  			if (nr <= offset)
34d0640e2   Al Viro   switch hugetlbfs ...
326
  				break;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
327
328
329
330
  		}
  		nr = nr - offset;
  
  		/* Find the page */
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
331
  		page = find_lock_page(mapping, index);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
332
333
334
335
336
  		if (unlikely(page == NULL)) {
  			/*
  			 * We have a HOLE, zero out the user-buffer for the
  			 * length of the hole or request.
  			 */
34d0640e2   Al Viro   switch hugetlbfs ...
337
  			copied = iov_iter_zero(nr, to);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
338
  		} else {
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
339
  			unlock_page(page);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
340
341
342
  			/*
  			 * We have the page, copy it to user space buffer.
  			 */
34d0640e2   Al Viro   switch hugetlbfs ...
343
  			copied = hugetlbfs_read_actor(page, offset, to, nr);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
344
  			put_page(page);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
345
  		}
34d0640e2   Al Viro   switch hugetlbfs ...
346
347
348
349
350
351
  		offset += copied;
  		retval += copied;
  		if (copied != nr && iov_iter_count(to)) {
  			if (!retval)
  				retval = -EFAULT;
  			break;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
352
  		}
a55164389   Andi Kleen   hugetlb: modular ...
353
354
  		index += offset >> huge_page_shift(h);
  		offset &= ~huge_page_mask(h);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
355
  	}
34d0640e2   Al Viro   switch hugetlbfs ...
356
  	iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
357
358
  	return retval;
  }
800d15a53   Nick Piggin   implement simple ...
359
360
361
362
  static int hugetlbfs_write_begin(struct file *file,
  			struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
364
365
  {
  	return -EINVAL;
  }
800d15a53   Nick Piggin   implement simple ...
366
367
368
  static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369
  {
800d15a53   Nick Piggin   implement simple ...
370
  	BUG();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
372
  	return -EINVAL;
  }
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
373
  static void remove_huge_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374
  {
b9ea25152   Konstantin Khlebnikov   page_writeback: c...
375
  	ClearPageDirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376
  	ClearPageUptodate(page);
bd65cb86c   Minchan Kim   mm: hugetlbfs: ch...
377
  	delete_from_page_cache(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378
  }
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
379
  static void
f808c13fd   Davidlohr Bueso   lib/interval_tree...
380
  hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
  {
  	struct vm_area_struct *vma;
  
  	/*
  	 * end == 0 indicates that the entire range after
  	 * start should be unmapped.
  	 */
  	vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
  		unsigned long v_offset;
  		unsigned long v_end;
  
  		/*
  		 * Can the expression below overflow on 32-bit arches?
  		 * No, because the interval tree returns us only those vmas
  		 * which overlap the truncated area starting at pgoff,
  		 * and no vma on a 32-bit arch can span beyond the 4GB.
  		 */
  		if (vma->vm_pgoff < start)
  			v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
  		else
  			v_offset = 0;
  
  		if (!end)
  			v_end = vma->vm_end;
  		else {
  			v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
  							+ vma->vm_start;
  			if (v_end > vma->vm_end)
  				v_end = vma->vm_end;
  		}
  
  		unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
  									NULL);
  	}
  }
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
416
417
418
419
  
  /*
   * remove_inode_hugepages handles two distinct cases: truncation and hole
   * punch.  There are subtle differences in operation for each case.
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
420
   *
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
421
422
423
   * truncation is indicated by end of range being LLONG_MAX
   *	In this case, we first scan the range and release found pages.
   *	After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
e7c580977   Mike Kravetz   hugetlbfs: revert...
424
   *	maps and global counts.  Page faults can not race with truncation
87bf91d39   Mike Kravetz   hugetlbfs: Use i_...
425
426
427
   *	in this routine.  hugetlb_no_page() holds i_mmap_rwsem and prevents
   *	page faults in the truncated range by checking i_size.  i_size is
   *	modified while holding i_mmap_rwsem.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
428
429
430
431
   * hole punch is indicated if end is not LLONG_MAX
   *	In the hole punch case we scan the range and release found pages.
   *	Only when releasing a page is the associated region/reserv map
   *	deleted.  The region/reserv map for ranges without associated
e7c580977   Mike Kravetz   hugetlbfs: revert...
432
433
   *	pages are not modified.  Page faults can race with hole punch.
   *	This is indicated if we find a mapped page.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
434
435
436
437
438
   * Note: If the passed end of range value is beyond the end of file, but
   * not LLONG_MAX this routine still performs a hole punch operation.
   */
  static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
  				   loff_t lend)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
439
  {
a55164389   Andi Kleen   hugetlb: modular ...
440
  	struct hstate *h = hstate_inode(inode);
b45b5bd65   David Gibson   [PATCH] hugepage:...
441
  	struct address_space *mapping = &inode->i_data;
a55164389   Andi Kleen   hugetlb: modular ...
442
  	const pgoff_t start = lstart >> huge_page_shift(h);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
443
444
  	const pgoff_t end = lend >> huge_page_shift(h);
  	struct vm_area_struct pseudo_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445
  	struct pagevec pvec;
d72dc8a25   Jan Kara   mm: make pagevec_...
446
  	pgoff_t next, index;
a43a8c39b   Kenneth W Chen   [PATCH] tightenin...
447
  	int i, freed = 0;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
448
  	bool truncate_op = (lend == LLONG_MAX);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449

2c4541e24   Kirill A. Shutemov   mm: use vma_init(...
450
  	vma_init(&pseudo_vma, current->mm);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
451
  	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
866798201   Mel Gorman   mm, pagevec: remo...
452
  	pagevec_init(&pvec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
  	next = start;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
454
455
  	while (next < end) {
  		/*
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
456
  		 * When no more pages are found, we are done.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
457
  		 */
397162ffa   Jan Kara   mm: remove nr_pag...
458
  		if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1))
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
459
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
460
461
462
  
  		for (i = 0; i < pagevec_count(&pvec); ++i) {
  			struct page *page = pvec.pages[i];
e7c580977   Mike Kravetz   hugetlbfs: revert...
463
  			u32 hash;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
464

d72dc8a25   Jan Kara   mm: make pagevec_...
465
  			index = page->index;
188b04a7d   Wei Yang   hugetlb: remove u...
466
  			hash = hugetlb_fault_mutex_hash(mapping, index);
87bf91d39   Mike Kravetz   hugetlbfs: Use i_...
467
468
469
470
471
472
473
474
475
  			if (!truncate_op) {
  				/*
  				 * Only need to hold the fault mutex in the
  				 * hole punch case.  This prevents races with
  				 * page faults.  Races are not possible in the
  				 * case of truncation.
  				 */
  				mutex_lock(&hugetlb_fault_mutex_table[hash]);
  			}
e7c580977   Mike Kravetz   hugetlbfs: revert...
476

4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
477
  			/*
e7c580977   Mike Kravetz   hugetlbfs: revert...
478
479
480
481
482
483
484
  			 * If page is mapped, it was faulted in after being
  			 * unmapped in caller.  Unmap (again) now after taking
  			 * the fault mutex.  The mutex will prevent faults
  			 * until we finish removing the page.
  			 *
  			 * This race can only happen in the hole punch case.
  			 * Getting here in a truncate operation is a bug.
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
485
  			 */
e7c580977   Mike Kravetz   hugetlbfs: revert...
486
487
  			if (unlikely(page_mapped(page))) {
  				BUG_ON(truncate_op);
c0d0381ad   Mike Kravetz   hugetlbfs: use i_...
488
  				mutex_unlock(&hugetlb_fault_mutex_table[hash]);
e7c580977   Mike Kravetz   hugetlbfs: revert...
489
  				i_mmap_lock_write(mapping);
c0d0381ad   Mike Kravetz   hugetlbfs: use i_...
490
  				mutex_lock(&hugetlb_fault_mutex_table[hash]);
e7c580977   Mike Kravetz   hugetlbfs: revert...
491
492
493
494
495
  				hugetlb_vmdelete_list(&mapping->i_mmap,
  					index * pages_per_huge_page(h),
  					(index + 1) * pages_per_huge_page(h));
  				i_mmap_unlock_write(mapping);
  			}
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
496
497
498
499
500
501
502
  
  			lock_page(page);
  			/*
  			 * We must free the huge page and remove from page
  			 * cache (remove_huge_page) BEFORE removing the
  			 * region/reserve map (hugetlb_unreserve_pages).  In
  			 * rare out of memory conditions, removal of the
72e2936c0   zhong jiang   mm: remove unnece...
503
504
505
  			 * region/reserve map could fail. Correspondingly,
  			 * the subpool and global reserve usage count can need
  			 * to be adjusted.
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
506
  			 */
72e2936c0   zhong jiang   mm: remove unnece...
507
  			VM_BUG_ON(PagePrivate(page));
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
508
509
510
511
  			remove_huge_page(page);
  			freed++;
  			if (!truncate_op) {
  				if (unlikely(hugetlb_unreserve_pages(inode,
d72dc8a25   Jan Kara   mm: make pagevec_...
512
  							index, index + 1, 1)))
72e2936c0   zhong jiang   mm: remove unnece...
513
  					hugetlb_fix_reserve_counts(inode);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
514
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
515
  			unlock_page(page);
87bf91d39   Mike Kravetz   hugetlbfs: Use i_...
516
517
  			if (!truncate_op)
  				mutex_unlock(&hugetlb_fault_mutex_table[hash]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518
519
  		}
  		huge_pagevec_release(&pvec);
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
520
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
521
  	}
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
522
523
524
  
  	if (truncate_op)
  		(void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525
  }
2bbbda308   Al Viro   switch hugetlbfs ...
526
  static void hugetlbfs_evict_inode(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
527
  {
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
528
  	struct resv_map *resv_map;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
529
  	remove_inode_hugepages(inode, 0, LLONG_MAX);
f27a5136f   Mike Kravetz   hugetlbfs: always...
530
531
532
533
534
535
536
537
538
  
  	/*
  	 * Get the resv_map from the address space embedded in the inode.
  	 * This is the address space which points to any resv_map allocated
  	 * at inode creation time.  If this is a device special inode,
  	 * i_mapping may not point to the original address space.
  	 */
  	resv_map = (struct resv_map *)(&inode->i_data)->private_data;
  	/* Only regular and link inodes have associated reserve maps */
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
539
540
  	if (resv_map)
  		resv_map_release(&resv_map->refs);
dbd5768f8   Jan Kara   vfs: Rename end_w...
541
  	clear_inode(inode);
149f4211a   Christoph Hellwig   [PATCH] hugetlbfs...
542
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
543
544
  static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
  {
856fc2950   Hugh Dickins   [PATCH] hugetlb: ...
545
  	pgoff_t pgoff;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
546
  	struct address_space *mapping = inode->i_mapping;
a55164389   Andi Kleen   hugetlb: modular ...
547
  	struct hstate *h = hstate_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
548

a55164389   Andi Kleen   hugetlb: modular ...
549
  	BUG_ON(offset & ~huge_page_mask(h));
856fc2950   Hugh Dickins   [PATCH] hugetlb: ...
550
  	pgoff = offset >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
551

83cde9e8b   Davidlohr Bueso   mm: use new helpe...
552
  	i_mmap_lock_write(mapping);
87bf91d39   Mike Kravetz   hugetlbfs: Use i_...
553
  	i_size_write(inode, offset);
f808c13fd   Davidlohr Bueso   lib/interval_tree...
554
  	if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
1bfad99ab   Mike Kravetz   hugetlbfs: hugetl...
555
  		hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
c86aa7bbf   Mike Kravetz   hugetlbfs: Use i_...
556
  	i_mmap_unlock_write(mapping);
e7c580977   Mike Kravetz   hugetlbfs: revert...
557
  	remove_inode_hugepages(inode, offset, LLONG_MAX);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
558
559
  	return 0;
  }
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
  static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
  {
  	struct hstate *h = hstate_inode(inode);
  	loff_t hpage_size = huge_page_size(h);
  	loff_t hole_start, hole_end;
  
  	/*
  	 * For hole punch round up the beginning offset of the hole and
  	 * round down the end.
  	 */
  	hole_start = round_up(offset, hpage_size);
  	hole_end = round_down(offset + len, hpage_size);
  
  	if (hole_end > hole_start) {
  		struct address_space *mapping = inode->i_mapping;
ff62a3421   Marc-André Lureau   hugetlb: implemen...
575
  		struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
576

5955102c9   Al Viro   wrappers for ->i_...
577
  		inode_lock(inode);
ff62a3421   Marc-André Lureau   hugetlb: implemen...
578
579
  
  		/* protected by i_mutex */
ab3948f58   Joel Fernandes (Google)   mm/memfd: add an ...
580
  		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
ff62a3421   Marc-André Lureau   hugetlb: implemen...
581
582
583
  			inode_unlock(inode);
  			return -EPERM;
  		}
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
584
  		i_mmap_lock_write(mapping);
f808c13fd   Davidlohr Bueso   lib/interval_tree...
585
  		if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
586
587
588
  			hugetlb_vmdelete_list(&mapping->i_mmap,
  						hole_start >> PAGE_SHIFT,
  						hole_end  >> PAGE_SHIFT);
c86aa7bbf   Mike Kravetz   hugetlbfs: Use i_...
589
  		i_mmap_unlock_write(mapping);
e7c580977   Mike Kravetz   hugetlbfs: revert...
590
  		remove_inode_hugepages(inode, hole_start, hole_end);
5955102c9   Al Viro   wrappers for ->i_...
591
  		inode_unlock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
592
593
594
595
596
597
598
599
600
  	}
  
  	return 0;
  }
  
  static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
  				loff_t len)
  {
  	struct inode *inode = file_inode(file);
ff62a3421   Marc-André Lureau   hugetlb: implemen...
601
  	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
  	struct address_space *mapping = inode->i_mapping;
  	struct hstate *h = hstate_inode(inode);
  	struct vm_area_struct pseudo_vma;
  	struct mm_struct *mm = current->mm;
  	loff_t hpage_size = huge_page_size(h);
  	unsigned long hpage_shift = huge_page_shift(h);
  	pgoff_t start, index, end;
  	int error;
  	u32 hash;
  
  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
  		return -EOPNOTSUPP;
  
  	if (mode & FALLOC_FL_PUNCH_HOLE)
  		return hugetlbfs_punch_hole(inode, offset, len);
  
  	/*
  	 * Default preallocate case.
  	 * For this range, start is rounded down and end is rounded up
  	 * as well as being converted to page offsets.
  	 */
  	start = offset >> hpage_shift;
  	end = (offset + len + hpage_size - 1) >> hpage_shift;
5955102c9   Al Viro   wrappers for ->i_...
625
  	inode_lock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
626
627
628
629
630
  
  	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
  	error = inode_newsize_ok(inode, offset + len);
  	if (error)
  		goto out;
ff62a3421   Marc-André Lureau   hugetlb: implemen...
631
632
633
634
  	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
  		error = -EPERM;
  		goto out;
  	}
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
635
636
637
638
639
  	/*
  	 * Initialize a pseudo vma as this is required by the huge page
  	 * allocation routines.  If NUMA is configured, use page index
  	 * as input to create an allocation policy.
  	 */
2c4541e24   Kirill A. Shutemov   mm: use vma_init(...
640
  	vma_init(&pseudo_vma, mm);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
  	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
  	pseudo_vma.vm_file = file;
  
  	for (index = start; index < end; index++) {
  		/*
  		 * This is supposed to be the vaddr where the page is being
  		 * faulted in, but we have no vaddr here.
  		 */
  		struct page *page;
  		unsigned long addr;
  		int avoid_reserve = 0;
  
  		cond_resched();
  
  		/*
  		 * fallocate(2) manpage permits EINTR; we may have been
  		 * interrupted because we are using up too much memory.
  		 */
  		if (signal_pending(current)) {
  			error = -EINTR;
  			break;
  		}
  
  		/* Set numa allocation policy based on index */
  		hugetlb_set_vma_policy(&pseudo_vma, inode, index);
  
  		/* addr is the offset within the file (zero based) */
  		addr = index * hpage_size;
87bf91d39   Mike Kravetz   hugetlbfs: Use i_...
669
670
671
672
673
  		/*
  		 * fault mutex taken here, protects against fault path
  		 * and hole punch.  inode_lock previously taken protects
  		 * against truncation.
  		 */
188b04a7d   Wei Yang   hugetlb: remove u...
674
  		hash = hugetlb_fault_mutex_hash(mapping, index);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
  		mutex_lock(&hugetlb_fault_mutex_table[hash]);
  
  		/* See if already present in mapping to avoid alloc/free */
  		page = find_get_page(mapping, index);
  		if (page) {
  			put_page(page);
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  			hugetlb_drop_vma_policy(&pseudo_vma);
  			continue;
  		}
  
  		/* Allocate page and add to page cache */
  		page = alloc_huge_page(&pseudo_vma, addr, avoid_reserve);
  		hugetlb_drop_vma_policy(&pseudo_vma);
  		if (IS_ERR(page)) {
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  			error = PTR_ERR(page);
  			goto out;
  		}
  		clear_huge_page(page, addr, pages_per_huge_page(h));
  		__SetPageUptodate(page);
  		error = huge_add_to_page_cache(page, mapping, index);
  		if (unlikely(error)) {
  			put_page(page);
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  			goto out;
  		}
  
  		mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  
  		/*
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
706
  		 * unlock_page because locked by add_to_page_cache()
72639e6df   Nadav Amit   fs/hugetlbfs/inod...
707
  		 * page_put due to reference from alloc_huge_page()
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
708
  		 */
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
709
  		unlock_page(page);
72639e6df   Nadav Amit   fs/hugetlbfs/inod...
710
  		put_page(page);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
711
712
713
714
  	}
  
  	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
  		i_size_write(inode, offset + len);
078cd8279   Deepa Dinamani   fs: Replace CURRE...
715
  	inode->i_ctime = current_time(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
716
  out:
5955102c9   Al Viro   wrappers for ->i_...
717
  	inode_unlock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
718
719
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720
721
  static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
2b0143b5c   David Howells   VFS: normal files...
722
  	struct inode *inode = d_inode(dentry);
a55164389   Andi Kleen   hugetlb: modular ...
723
  	struct hstate *h = hstate_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
725
  	int error;
  	unsigned int ia_valid = attr->ia_valid;
ff62a3421   Marc-André Lureau   hugetlb: implemen...
726
  	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727
728
  
  	BUG_ON(!inode);
31051c85b   Jan Kara   fs: Give dentry t...
729
  	error = setattr_prepare(dentry, attr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730
  	if (error)
1025774ce   Christoph Hellwig   remove inode_setattr
731
  		return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
732
733
  
  	if (ia_valid & ATTR_SIZE) {
ff62a3421   Marc-André Lureau   hugetlb: implemen...
734
735
736
737
  		loff_t oldsize = inode->i_size;
  		loff_t newsize = attr->ia_size;
  
  		if (newsize & ~huge_page_mask(h))
1025774ce   Christoph Hellwig   remove inode_setattr
738
  			return -EINVAL;
ff62a3421   Marc-André Lureau   hugetlb: implemen...
739
740
741
742
743
  		/* protected by i_mutex */
  		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
  		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
  			return -EPERM;
  		error = hugetlb_vmtruncate(inode, newsize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
744
  		if (error)
1025774ce   Christoph Hellwig   remove inode_setattr
745
  			return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
746
  	}
1025774ce   Christoph Hellwig   remove inode_setattr
747
748
749
750
  
  	setattr_copy(inode, attr);
  	mark_inode_dirty(inode);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
751
  }
7d54fa647   Al Viro   hugetlbfs: switch...
752
  static struct inode *hugetlbfs_get_root(struct super_block *sb,
32021982a   David Howells   hugetlbfs: Conver...
753
  					struct hugetlbfs_fs_context *ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
754
755
  {
  	struct inode *inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
756
757
758
  
  	inode = new_inode(sb);
  	if (inode) {
85fe4025c   Christoph Hellwig   fs: do not assign...
759
  		inode->i_ino = get_next_ino();
32021982a   David Howells   hugetlbfs: Conver...
760
761
762
  		inode->i_mode = S_IFDIR | ctx->mode;
  		inode->i_uid = ctx->uid;
  		inode->i_gid = ctx->gid;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
763
  		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
7d54fa647   Al Viro   hugetlbfs: switch...
764
765
766
767
  		inode->i_op = &hugetlbfs_dir_inode_operations;
  		inode->i_fop = &simple_dir_operations;
  		/* directory inodes start off with i_nlink == 2 (for "." entry) */
  		inc_nlink(inode);
65ed76010   Aneesh Kumar K.V   hugetlbfs: lockde...
768
  		lockdep_annotate_inode_mutex_key(inode);
7d54fa647   Al Viro   hugetlbfs: switch...
769
770
771
  	}
  	return inode;
  }
b610ded71   Michal Hocko   hugetlb: fix lock...
772
  /*
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
773
   * Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
b610ded71   Michal Hocko   hugetlb: fix lock...
774
   * be taken from reclaim -- unlike regular filesystems. This needs an
88f306b68   Kirill A. Shutemov   mm: fix locking o...
775
   * annotation because huge_pmd_share() does an allocation under hugetlb's
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
776
   * i_mmap_rwsem.
b610ded71   Michal Hocko   hugetlb: fix lock...
777
   */
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
778
  static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
b610ded71   Michal Hocko   hugetlb: fix lock...
779

7d54fa647   Al Viro   hugetlbfs: switch...
780
781
  static struct inode *hugetlbfs_get_inode(struct super_block *sb,
  					struct inode *dir,
18df22524   Al Viro   hugetlbfs: propag...
782
  					umode_t mode, dev_t dev)
7d54fa647   Al Viro   hugetlbfs: switch...
783
784
  {
  	struct inode *inode;
58b6e5e8f   Mike Kravetz   hugetlbfs: fix me...
785
  	struct resv_map *resv_map = NULL;
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
786

58b6e5e8f   Mike Kravetz   hugetlbfs: fix me...
787
788
789
790
791
792
793
794
795
  	/*
  	 * Reserve maps are only needed for inodes that can have associated
  	 * page allocations.
  	 */
  	if (S_ISREG(mode) || S_ISLNK(mode)) {
  		resv_map = resv_map_alloc();
  		if (!resv_map)
  			return NULL;
  	}
7d54fa647   Al Viro   hugetlbfs: switch...
796
797
798
  
  	inode = new_inode(sb);
  	if (inode) {
ff62a3421   Marc-André Lureau   hugetlb: implemen...
799
  		struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
7d54fa647   Al Viro   hugetlbfs: switch...
800
801
  		inode->i_ino = get_next_ino();
  		inode_init_owner(inode, dir, mode);
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
802
803
  		lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
  				&hugetlbfs_i_mmap_rwsem_key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
804
  		inode->i_mapping->a_ops = &hugetlbfs_aops;
078cd8279   Deepa Dinamani   fs: Replace CURRE...
805
  		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
806
  		inode->i_mapping->private_data = resv_map;
ff62a3421   Marc-André Lureau   hugetlb: implemen...
807
  		info->seals = F_SEAL_SEAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
808
809
810
811
812
813
814
815
816
817
818
819
820
  		switch (mode & S_IFMT) {
  		default:
  			init_special_inode(inode, mode, dev);
  			break;
  		case S_IFREG:
  			inode->i_op = &hugetlbfs_inode_operations;
  			inode->i_fop = &hugetlbfs_file_operations;
  			break;
  		case S_IFDIR:
  			inode->i_op = &hugetlbfs_dir_inode_operations;
  			inode->i_fop = &simple_dir_operations;
  
  			/* directory inodes start off with i_nlink == 2 (for "." entry) */
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
821
  			inc_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
822
823
824
  			break;
  		case S_IFLNK:
  			inode->i_op = &page_symlink_inode_operations;
21fc61c73   Al Viro   don't put symlink...
825
  			inode_nohighmem(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
826
827
  			break;
  		}
e096d0c7e   Josh Boyer   lockdep: Add help...
828
  		lockdep_annotate_inode_mutex_key(inode);
58b6e5e8f   Mike Kravetz   hugetlbfs: fix me...
829
830
831
832
  	} else {
  		if (resv_map)
  			kref_put(&resv_map->refs, resv_map_release);
  	}
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
833

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
834
835
836
837
838
839
  	return inode;
  }
  
  /*
   * File creation. Allocate an inode, and we're done..
   */
1ab5b82f5   Piotr Sarna   hugetlbfs: add O_...
840
841
842
843
844
  static int do_hugetlbfs_mknod(struct inode *dir,
  			struct dentry *dentry,
  			umode_t mode,
  			dev_t dev,
  			bool tmpfile)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
845
846
847
  {
  	struct inode *inode;
  	int error = -ENOSPC;
7d54fa647   Al Viro   hugetlbfs: switch...
848
849
  
  	inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
850
  	if (inode) {
078cd8279   Deepa Dinamani   fs: Replace CURRE...
851
  		dir->i_ctime = dir->i_mtime = current_time(dir);
1ab5b82f5   Piotr Sarna   hugetlbfs: add O_...
852
853
854
855
856
857
  		if (tmpfile) {
  			d_tmpfile(dentry, inode);
  		} else {
  			d_instantiate(dentry, inode);
  			dget(dentry);/* Extra count - pin the dentry in core */
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
858
859
860
861
  		error = 0;
  	}
  	return error;
  }
1ab5b82f5   Piotr Sarna   hugetlbfs: add O_...
862
863
864
865
866
  static int hugetlbfs_mknod(struct inode *dir,
  			struct dentry *dentry, umode_t mode, dev_t dev)
  {
  	return do_hugetlbfs_mknod(dir, dentry, mode, dev, false);
  }
18bb1db3e   Al Viro   switch vfs_mkdir(...
867
  static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
868
869
870
  {
  	int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
  	if (!retval)
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
871
  		inc_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
872
873
  	return retval;
  }
ebfc3b49a   Al Viro   don't pass nameid...
874
  static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
875
876
877
  {
  	return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
  }
1ab5b82f5   Piotr Sarna   hugetlbfs: add O_...
878
879
880
881
882
  static int hugetlbfs_tmpfile(struct inode *dir,
  			struct dentry *dentry, umode_t mode)
  {
  	return do_hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0, true);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
883
884
885
886
887
  static int hugetlbfs_symlink(struct inode *dir,
  			struct dentry *dentry, const char *symname)
  {
  	struct inode *inode;
  	int error = -ENOSPC;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
888

7d54fa647   Al Viro   hugetlbfs: switch...
889
  	inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
890
891
892
893
894
895
896
897
898
  	if (inode) {
  		int l = strlen(symname)+1;
  		error = page_symlink(inode, symname, l);
  		if (!error) {
  			d_instantiate(dentry, inode);
  			dget(dentry);
  		} else
  			iput(inode);
  	}
078cd8279   Deepa Dinamani   fs: Replace CURRE...
899
  	dir->i_ctime = dir->i_mtime = current_time(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
900
901
902
903
904
  
  	return error;
  }
  
  /*
6649a3863   Ken Chen   [PATCH] hugetlb: ...
905
   * mark the head page dirty
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
906
907
908
   */
  static int hugetlbfs_set_page_dirty(struct page *page)
  {
d85f33855   Christoph Lameter   Make page->privat...
909
  	struct page *head = compound_head(page);
6649a3863   Ken Chen   [PATCH] hugetlb: ...
910
911
  
  	SetPageDirty(head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
912
913
  	return 0;
  }
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
914
  static int hugetlbfs_migrate_page(struct address_space *mapping,
b969c4ab9   Mel Gorman   mm: compaction: d...
915
  				struct page *newpage, struct page *page,
a6bc32b89   Mel Gorman   mm: compaction: i...
916
  				enum migrate_mode mode)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
917
918
919
920
  {
  	int rc;
  
  	rc = migrate_huge_page_move_mapping(mapping, newpage, page);
78bd52097   Rafael Aquini   mm: adjust addres...
921
  	if (rc != MIGRATEPAGE_SUCCESS)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
922
  		return rc;
cb6acd01e   Mike Kravetz   hugetlbfs: fix ra...
923
924
925
926
927
928
929
930
931
932
933
  
  	/*
  	 * page_private is subpool pointer in hugetlb pages.  Transfer to
  	 * new page.  PagePrivate is not associated with page_private for
  	 * hugetlb pages and can not be set here as only page_huge_active
  	 * pages can be migrated.
  	 */
  	if (page_private(page)) {
  		set_page_private(newpage, page_private(page));
  		set_page_private(page, 0);
  	}
2916ecc0f   Jérôme Glisse   mm/migrate: new m...
934
935
936
937
  	if (mode != MIGRATE_SYNC_NO_COPY)
  		migrate_page_copy(newpage, page);
  	else
  		migrate_page_states(newpage, page);
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
938

78bd52097   Rafael Aquini   mm: adjust addres...
939
  	return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
940
  }
78bb92034   Naoya Horiguchi   mm: hwpoison: dis...
941
942
943
944
  static int hugetlbfs_error_remove_page(struct address_space *mapping,
  				struct page *page)
  {
  	struct inode *inode = mapping->host;
ab615a5b8   Mike Kravetz   fs/hugetlbfs/inod...
945
  	pgoff_t index = page->index;
78bb92034   Naoya Horiguchi   mm: hwpoison: dis...
946
947
  
  	remove_huge_page(page);
ab615a5b8   Mike Kravetz   fs/hugetlbfs/inod...
948
949
  	if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
  		hugetlb_fix_reserve_counts(inode);
78bb92034   Naoya Horiguchi   mm: hwpoison: dis...
950
951
  	return 0;
  }
4a25220d4   David Howells   hugetlbfs: Implem...
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
  /*
   * Display the mount options in /proc/mounts.
   */
  static int hugetlbfs_show_options(struct seq_file *m, struct dentry *root)
  {
  	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(root->d_sb);
  	struct hugepage_subpool *spool = sbinfo->spool;
  	unsigned long hpage_size = huge_page_size(sbinfo->hstate);
  	unsigned hpage_shift = huge_page_shift(sbinfo->hstate);
  	char mod;
  
  	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
  		seq_printf(m, ",uid=%u",
  			   from_kuid_munged(&init_user_ns, sbinfo->uid));
  	if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
  		seq_printf(m, ",gid=%u",
  			   from_kgid_munged(&init_user_ns, sbinfo->gid));
  	if (sbinfo->mode != 0755)
  		seq_printf(m, ",mode=%o", sbinfo->mode);
  	if (sbinfo->max_inodes != -1)
  		seq_printf(m, ",nr_inodes=%lu", sbinfo->max_inodes);
  
  	hpage_size /= 1024;
  	mod = 'K';
  	if (hpage_size >= 1024) {
  		hpage_size /= 1024;
  		mod = 'M';
  	}
  	seq_printf(m, ",pagesize=%lu%c", hpage_size, mod);
  	if (spool) {
  		if (spool->max_hpages != -1)
  			seq_printf(m, ",size=%llu",
  				   (unsigned long long)spool->max_hpages << hpage_shift);
  		if (spool->min_hpages != -1)
  			seq_printf(m, ",min_size=%llu",
  				   (unsigned long long)spool->min_hpages << hpage_shift);
  	}
  	return 0;
  }
726c33422   David Howells   [PATCH] VFS: Perm...
991
  static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
992
  {
726c33422   David Howells   [PATCH] VFS: Perm...
993
  	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
2b0143b5c   David Howells   VFS: normal files...
994
  	struct hstate *h = hstate_inode(d_inode(dentry));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
995
996
  
  	buf->f_type = HUGETLBFS_MAGIC;
a55164389   Andi Kleen   hugetlb: modular ...
997
  	buf->f_bsize = huge_page_size(h);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
998
999
  	if (sbinfo) {
  		spin_lock(&sbinfo->stat_lock);
74a8a65c5   David Gibson   [PATCH] Fix huget...
1000
1001
  		/* If no limits set, just report 0 for max/free/used
  		 * blocks, like simple_statfs() */
90481622d   David Gibson   hugepages: fix us...
1002
1003
1004
1005
1006
1007
1008
1009
1010
  		if (sbinfo->spool) {
  			long free_pages;
  
  			spin_lock(&sbinfo->spool->lock);
  			buf->f_blocks = sbinfo->spool->max_hpages;
  			free_pages = sbinfo->spool->max_hpages
  				- sbinfo->spool->used_hpages;
  			buf->f_bavail = buf->f_bfree = free_pages;
  			spin_unlock(&sbinfo->spool->lock);
74a8a65c5   David Gibson   [PATCH] Fix huget...
1011
1012
1013
  			buf->f_files = sbinfo->max_inodes;
  			buf->f_ffree = sbinfo->free_inodes;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	buf->f_namelen = NAME_MAX;
  	return 0;
  }
  
  static void hugetlbfs_put_super(struct super_block *sb)
  {
  	struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb);
  
  	if (sbi) {
  		sb->s_fs_info = NULL;
90481622d   David Gibson   hugepages: fix us...
1026
1027
1028
  
  		if (sbi->spool)
  			hugepage_put_subpool(sbi->spool);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1029
1030
1031
  		kfree(sbi);
  	}
  }
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
  static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo)
  {
  	if (sbinfo->free_inodes >= 0) {
  		spin_lock(&sbinfo->stat_lock);
  		if (unlikely(!sbinfo->free_inodes)) {
  			spin_unlock(&sbinfo->stat_lock);
  			return 0;
  		}
  		sbinfo->free_inodes--;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  
  	return 1;
  }
  
  static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
  {
  	if (sbinfo->free_inodes >= 0) {
  		spin_lock(&sbinfo->stat_lock);
  		sbinfo->free_inodes++;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  }
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
1055
  static struct kmem_cache *hugetlbfs_inode_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1056
1057
1058
  
  static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
  {
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
1059
  	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1060
  	struct hugetlbfs_inode_info *p;
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
1061
1062
  	if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
  		return NULL;
e94b17660   Christoph Lameter   [PATCH] slab: rem...
1063
  	p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL);
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
1064
1065
  	if (unlikely(!p)) {
  		hugetlbfs_inc_free_inodes(sbinfo);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1066
  		return NULL;
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
1067
  	}
4742a35d9   Mike Kravetz   hugetlbfs: initia...
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
  
  	/*
  	 * Any time after allocation, hugetlbfs_destroy_inode can be called
  	 * for the inode.  mpol_free_shared_policy is unconditionally called
  	 * as part of hugetlbfs_destroy_inode.  So, initialize policy here
  	 * in case of a quick call to destroy.
  	 *
  	 * Note that the policy is initialized even if we are creating a
  	 * private inode.  This simplifies hugetlbfs_destroy_inode.
  	 */
  	mpol_shared_policy_init(&p->policy, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1079
1080
  	return &p->vfs_inode;
  }
b62de3225   Al Viro   hugetlb: make use...
1081
  static void hugetlbfs_free_inode(struct inode *inode)
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
1082
  {
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
1083
1084
  	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1085
1086
  static void hugetlbfs_destroy_inode(struct inode *inode)
  {
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
1087
  	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1088
  	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1089
  }
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
1090
  static const struct address_space_operations hugetlbfs_aops = {
800d15a53   Nick Piggin   implement simple ...
1091
1092
  	.write_begin	= hugetlbfs_write_begin,
  	.write_end	= hugetlbfs_write_end,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
  	.set_page_dirty	= hugetlbfs_set_page_dirty,
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
1094
  	.migratepage    = hugetlbfs_migrate_page,
78bb92034   Naoya Horiguchi   mm: hwpoison: dis...
1095
  	.error_remove_page	= hugetlbfs_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1096
  };
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
1097

51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
1098
  static void init_once(void *foo)
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
1099
1100
  {
  	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
1101
  	inode_init_once(&ei->vfs_inode);
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
1102
  }
4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
1103
  const struct file_operations hugetlbfs_file_operations = {
34d0640e2   Al Viro   switch hugetlbfs ...
1104
  	.read_iter		= hugetlbfs_read_iter,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1105
  	.mmap			= hugetlbfs_file_mmap,
1b061d924   Christoph Hellwig   rename the generi...
1106
  	.fsync			= noop_fsync,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1107
  	.get_unmapped_area	= hugetlb_get_unmapped_area,
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
1108
1109
  	.llseek			= default_llseek,
  	.fallocate		= hugetlbfs_fallocate,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1110
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
1111
  static const struct inode_operations hugetlbfs_dir_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
  	.create		= hugetlbfs_create,
  	.lookup		= simple_lookup,
  	.link		= simple_link,
  	.unlink		= simple_unlink,
  	.symlink	= hugetlbfs_symlink,
  	.mkdir		= hugetlbfs_mkdir,
  	.rmdir		= simple_rmdir,
  	.mknod		= hugetlbfs_mknod,
  	.rename		= simple_rename,
  	.setattr	= hugetlbfs_setattr,
1ab5b82f5   Piotr Sarna   hugetlbfs: add O_...
1122
  	.tmpfile	= hugetlbfs_tmpfile,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1123
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
1124
  static const struct inode_operations hugetlbfs_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
1126
  	.setattr	= hugetlbfs_setattr,
  };
ee9b6d61a   Josef 'Jeff' Sipek   [PATCH] Mark stru...
1127
  static const struct super_operations hugetlbfs_ops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1128
  	.alloc_inode    = hugetlbfs_alloc_inode,
b62de3225   Al Viro   hugetlb: make use...
1129
  	.free_inode     = hugetlbfs_free_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1130
  	.destroy_inode  = hugetlbfs_destroy_inode,
2bbbda308   Al Viro   switch hugetlbfs ...
1131
  	.evict_inode	= hugetlbfs_evict_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1132
  	.statfs		= hugetlbfs_statfs,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1133
  	.put_super	= hugetlbfs_put_super,
4a25220d4   David Howells   hugetlbfs: Implem...
1134
  	.show_options	= hugetlbfs_show_options,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1135
  };
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1136
1137
1138
1139
1140
  /*
   * Convert size option passed from command line to number of huge pages
   * in the pool specified by hstate.  Size option could be in bytes
   * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
   */
4a25220d4   David Howells   hugetlbfs: Implem...
1141
  static long
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1142
  hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
4a25220d4   David Howells   hugetlbfs: Implem...
1143
  			 enum hugetlbfs_size_type val_type)
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
  {
  	if (val_type == NO_SIZE)
  		return -1;
  
  	if (val_type == SIZE_PERCENT) {
  		size_opt <<= huge_page_shift(h);
  		size_opt *= h->max_huge_pages;
  		do_div(size_opt, 100);
  	}
  
  	size_opt >>= huge_page_shift(h);
  	return size_opt;
  }
32021982a   David Howells   hugetlbfs: Conver...
1157
1158
1159
1160
  /*
   * Parse one mount parameter.
   */
  static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1161
  {
32021982a   David Howells   hugetlbfs: Conver...
1162
1163
1164
1165
1166
  	struct hugetlbfs_fs_context *ctx = fc->fs_private;
  	struct fs_parse_result result;
  	char *rest;
  	unsigned long ps;
  	int opt;
d7167b149   Al Viro   fs_parse: fold fs...
1167
  	opt = fs_parse(fc, hugetlb_fs_parameters, param, &result);
32021982a   David Howells   hugetlbfs: Conver...
1168
1169
1170
1171
1172
1173
1174
1175
  	if (opt < 0)
  		return opt;
  
  	switch (opt) {
  	case Opt_uid:
  		ctx->uid = make_kuid(current_user_ns(), result.uint_32);
  		if (!uid_valid(ctx->uid))
  			goto bad_val;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1176
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1177

32021982a   David Howells   hugetlbfs: Conver...
1178
1179
1180
1181
1182
  	case Opt_gid:
  		ctx->gid = make_kgid(current_user_ns(), result.uint_32);
  		if (!gid_valid(ctx->gid))
  			goto bad_val;
  		return 0;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1183

32021982a   David Howells   hugetlbfs: Conver...
1184
1185
1186
  	case Opt_mode:
  		ctx->mode = result.uint_32 & 01777U;
  		return 0;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1187

32021982a   David Howells   hugetlbfs: Conver...
1188
1189
1190
1191
1192
1193
1194
1195
1196
  	case Opt_size:
  		/* memparse() will accept a K/M/G without a digit */
  		if (!isdigit(param->string[0]))
  			goto bad_val;
  		ctx->max_size_opt = memparse(param->string, &rest);
  		ctx->max_val_type = SIZE_STD;
  		if (*rest == '%')
  			ctx->max_val_type = SIZE_PERCENT;
  		return 0;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1197

32021982a   David Howells   hugetlbfs: Conver...
1198
1199
1200
1201
1202
1203
  	case Opt_nr_inodes:
  		/* memparse() will accept a K/M/G without a digit */
  		if (!isdigit(param->string[0]))
  			goto bad_val;
  		ctx->nr_inodes = memparse(param->string, &rest);
  		return 0;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1204

32021982a   David Howells   hugetlbfs: Conver...
1205
1206
1207
1208
1209
1210
1211
  	case Opt_pagesize:
  		ps = memparse(param->string, &rest);
  		ctx->hstate = size_to_hstate(ps);
  		if (!ctx->hstate) {
  			pr_err("Unsupported page size %lu MB
  ", ps >> 20);
  			return -EINVAL;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1212
  		}
32021982a   David Howells   hugetlbfs: Conver...
1213
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1214

32021982a   David Howells   hugetlbfs: Conver...
1215
1216
1217
1218
1219
1220
1221
1222
1223
  	case Opt_min_size:
  		/* memparse() will accept a K/M/G without a digit */
  		if (!isdigit(param->string[0]))
  			goto bad_val;
  		ctx->min_size_opt = memparse(param->string, &rest);
  		ctx->min_val_type = SIZE_STD;
  		if (*rest == '%')
  			ctx->min_val_type = SIZE_PERCENT;
  		return 0;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1224

32021982a   David Howells   hugetlbfs: Conver...
1225
1226
1227
  	default:
  		return -EINVAL;
  	}
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1228

32021982a   David Howells   hugetlbfs: Conver...
1229
  bad_val:
b5db30cfb   Al Viro   hugetlbfs: switch...
1230
1231
  	return invalfc(fc, "Bad value '%s' for mount option '%s'
  ",
32021982a   David Howells   hugetlbfs: Conver...
1232
1233
  		      param->string, param->key);
  }
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1234

32021982a   David Howells   hugetlbfs: Conver...
1235
1236
1237
1238
1239
1240
  /*
   * Validate the parsed options.
   */
  static int hugetlbfs_validate(struct fs_context *fc)
  {
  	struct hugetlbfs_fs_context *ctx = fc->fs_private;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1241

7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1242
1243
1244
1245
  	/*
  	 * Use huge page pool size (in hstate) to convert the size
  	 * options to number of huge pages.  If NO_SIZE, -1 is returned.
  	 */
32021982a   David Howells   hugetlbfs: Conver...
1246
1247
1248
1249
1250
1251
  	ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
  						   ctx->max_size_opt,
  						   ctx->max_val_type);
  	ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
  						   ctx->min_size_opt,
  						   ctx->min_val_type);
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1252
1253
1254
1255
  
  	/*
  	 * If max_size was specified, then min_size must be smaller
  	 */
32021982a   David Howells   hugetlbfs: Conver...
1256
1257
1258
1259
  	if (ctx->max_val_type > NO_SIZE &&
  	    ctx->min_hpages > ctx->max_hpages) {
  		pr_err("Minimum size can not be greater than maximum size
  ");
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1260
  		return -EINVAL;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1261
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1262
1263
1264
1265
  	return 0;
  }
  
  static int
32021982a   David Howells   hugetlbfs: Conver...
1266
  hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1267
  {
32021982a   David Howells   hugetlbfs: Conver...
1268
  	struct hugetlbfs_fs_context *ctx = fc->fs_private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1269
  	struct hugetlbfs_sb_info *sbinfo;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1270
1271
1272
1273
1274
  	sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL);
  	if (!sbinfo)
  		return -ENOMEM;
  	sb->s_fs_info = sbinfo;
  	spin_lock_init(&sbinfo->stat_lock);
32021982a   David Howells   hugetlbfs: Conver...
1275
1276
1277
1278
1279
1280
1281
  	sbinfo->hstate		= ctx->hstate;
  	sbinfo->max_inodes	= ctx->nr_inodes;
  	sbinfo->free_inodes	= ctx->nr_inodes;
  	sbinfo->spool		= NULL;
  	sbinfo->uid		= ctx->uid;
  	sbinfo->gid		= ctx->gid;
  	sbinfo->mode		= ctx->mode;
4a25220d4   David Howells   hugetlbfs: Implem...
1282

7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1283
1284
1285
1286
1287
  	/*
  	 * Allocate and initialize subpool if maximum or minimum size is
  	 * specified.  Any needed reservations (for minimim size) are taken
  	 * taken when the subpool is created.
  	 */
32021982a   David Howells   hugetlbfs: Conver...
1288
1289
1290
1291
  	if (ctx->max_hpages != -1 || ctx->min_hpages != -1) {
  		sbinfo->spool = hugepage_new_subpool(ctx->hstate,
  						     ctx->max_hpages,
  						     ctx->min_hpages);
90481622d   David Gibson   hugepages: fix us...
1292
1293
1294
  		if (!sbinfo->spool)
  			goto out_free;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1295
  	sb->s_maxbytes = MAX_LFS_FILESIZE;
32021982a   David Howells   hugetlbfs: Conver...
1296
1297
  	sb->s_blocksize = huge_page_size(ctx->hstate);
  	sb->s_blocksize_bits = huge_page_shift(ctx->hstate);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1298
1299
1300
  	sb->s_magic = HUGETLBFS_MAGIC;
  	sb->s_op = &hugetlbfs_ops;
  	sb->s_time_gran = 1;
15568299b   Mike Kravetz   hugetlbfs: preven...
1301
1302
1303
1304
1305
1306
  
  	/*
  	 * Due to the special and limited functionality of hugetlbfs, it does
  	 * not work well as a stacking filesystem.
  	 */
  	sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
32021982a   David Howells   hugetlbfs: Conver...
1307
  	sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx));
48fde701a   Al Viro   switch open-coded...
1308
  	if (!sb->s_root)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1309
  		goto out_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1310
1311
  	return 0;
  out_free:
6e6870d4f   Fabian Frederick   fs/hugetlbfs/inod...
1312
  	kfree(sbinfo->spool);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1313
1314
1315
  	kfree(sbinfo);
  	return -ENOMEM;
  }
32021982a   David Howells   hugetlbfs: Conver...
1316
1317
1318
1319
1320
  static int hugetlbfs_get_tree(struct fs_context *fc)
  {
  	int err = hugetlbfs_validate(fc);
  	if (err)
  		return err;
2ac295d4f   Al Viro   convenience helpe...
1321
  	return get_tree_nodev(fc, hugetlbfs_fill_super);
32021982a   David Howells   hugetlbfs: Conver...
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
  }
  
  static void hugetlbfs_fs_context_free(struct fs_context *fc)
  {
  	kfree(fc->fs_private);
  }
  
  static const struct fs_context_operations hugetlbfs_fs_context_ops = {
  	.free		= hugetlbfs_fs_context_free,
  	.parse_param	= hugetlbfs_parse_param,
  	.get_tree	= hugetlbfs_get_tree,
  };
  
  static int hugetlbfs_init_fs_context(struct fs_context *fc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1336
  {
32021982a   David Howells   hugetlbfs: Conver...
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
  	struct hugetlbfs_fs_context *ctx;
  
  	ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL);
  	if (!ctx)
  		return -ENOMEM;
  
  	ctx->max_hpages	= -1; /* No limit on size by default */
  	ctx->nr_inodes	= -1; /* No limit on number of inodes by default */
  	ctx->uid	= current_fsuid();
  	ctx->gid	= current_fsgid();
  	ctx->mode	= 0755;
  	ctx->hstate	= &default_hstate;
  	ctx->min_hpages	= -1; /* No default minimum size */
  	ctx->max_val_type = NO_SIZE;
  	ctx->min_val_type = NO_SIZE;
  	fc->fs_private = ctx;
  	fc->ops	= &hugetlbfs_fs_context_ops;
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1355
1356
1357
  }
  
  static struct file_system_type hugetlbfs_fs_type = {
32021982a   David Howells   hugetlbfs: Conver...
1358
1359
  	.name			= "hugetlbfs",
  	.init_fs_context	= hugetlbfs_init_fs_context,
d7167b149   Al Viro   fs_parse: fold fs...
1360
  	.parameters		= hugetlb_fs_parameters,
32021982a   David Howells   hugetlbfs: Conver...
1361
  	.kill_sb		= kill_litter_super,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1362
  };
42d7395fe   Andi Kleen   mm: support more ...
1363
  static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1364

ef1ff6b8c   From: Mel Gorman   hugetlbfs: do not...
1365
  static int can_do_hugetlb_shm(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1366
  {
a0eb3a05a   Eric W. Biederman   userns: Convert h...
1367
1368
1369
  	kgid_t shm_group;
  	shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group);
  	return capable(CAP_IPC_LOCK) || in_group_p(shm_group);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1370
  }
42d7395fe   Andi Kleen   mm: support more ...
1371
1372
  static int get_hstate_idx(int page_size_log)
  {
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1373
  	struct hstate *h = hstate_sizelog(page_size_log);
42d7395fe   Andi Kleen   mm: support more ...
1374

42d7395fe   Andi Kleen   mm: support more ...
1375
1376
1377
1378
  	if (!h)
  		return -1;
  	return h - hstates;
  }
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1379
1380
1381
1382
1383
1384
  /*
   * Note that size should be aligned to proper hugepage size in caller side,
   * otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
   */
  struct file *hugetlb_file_setup(const char *name, size_t size,
  				vm_flags_t acctflag, struct user_struct **user,
42d7395fe   Andi Kleen   mm: support more ...
1385
  				int creat_flags, int page_size_log)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1386
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1387
  	struct inode *inode;
e68375c85   Al Viro   hugetlb_file_setu...
1388
  	struct vfsmount *mnt;
42d7395fe   Andi Kleen   mm: support more ...
1389
  	int hstate_idx;
e68375c85   Al Viro   hugetlb_file_setu...
1390
  	struct file *file;
42d7395fe   Andi Kleen   mm: support more ...
1391
1392
1393
1394
  
  	hstate_idx = get_hstate_idx(page_size_log);
  	if (hstate_idx < 0)
  		return ERR_PTR(-ENODEV);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1395

353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1396
  	*user = NULL;
e68375c85   Al Viro   hugetlb_file_setu...
1397
1398
  	mnt = hugetlbfs_vfsmount[hstate_idx];
  	if (!mnt)
5bc98594d   Akinobu Mita   hugetlbfs: add NU...
1399
  		return ERR_PTR(-ENOENT);
ef1ff6b8c   From: Mel Gorman   hugetlbfs: do not...
1400
  	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1401
1402
  		*user = current_user();
  		if (user_shm_lock(size, *user)) {
21a3c273f   David Rientjes   mm, hugetlb: add ...
1403
  			task_lock(current);
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1404
1405
  			pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated
  ",
21a3c273f   David Rientjes   mm, hugetlb: add ...
1406
1407
  				current->comm, current->pid);
  			task_unlock(current);
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1408
1409
  		} else {
  			*user = NULL;
2584e5173   Ravikiran G Thirumalai   mm: reintroduce a...
1410
  			return ERR_PTR(-EPERM);
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1411
  		}
2584e5173   Ravikiran G Thirumalai   mm: reintroduce a...
1412
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1413

39b652527   Anatol Pomozov   fs: Preserve erro...
1414
  	file = ERR_PTR(-ENOSPC);
e68375c85   Al Viro   hugetlb_file_setu...
1415
  	inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1416
  	if (!inode)
e68375c85   Al Viro   hugetlb_file_setu...
1417
  		goto out;
e1832f292   Stephen Smalley   ipc: use private ...
1418
1419
  	if (creat_flags == HUGETLB_SHMFS_INODE)
  		inode->i_flags |= S_PRIVATE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1420

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1421
  	inode->i_size = size;
6d6b77f16   Miklos Szeredi   filesystems: add ...
1422
  	clear_nlink(inode);
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
1423

e68375c85   Al Viro   hugetlb_file_setu...
1424
1425
1426
1427
1428
1429
1430
1431
1432
  	if (hugetlb_reserve_pages(inode, 0,
  			size >> huge_page_shift(hstate_inode(inode)), NULL,
  			acctflag))
  		file = ERR_PTR(-ENOMEM);
  	else
  		file = alloc_file_pseudo(inode, mnt, name, O_RDWR,
  					&hugetlbfs_file_operations);
  	if (!IS_ERR(file))
  		return file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1433

b45b5bd65   David Gibson   [PATCH] hugepage:...
1434
  	iput(inode);
e68375c85   Al Viro   hugetlb_file_setu...
1435
  out:
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1436
1437
1438
1439
  	if (*user) {
  		user_shm_unlock(size, *user);
  		*user = NULL;
  	}
39b652527   Anatol Pomozov   fs: Preserve erro...
1440
  	return file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1441
  }
32021982a   David Howells   hugetlbfs: Conver...
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
  static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
  {
  	struct fs_context *fc;
  	struct vfsmount *mnt;
  
  	fc = fs_context_for_mount(&hugetlbfs_fs_type, SB_KERNMOUNT);
  	if (IS_ERR(fc)) {
  		mnt = ERR_CAST(fc);
  	} else {
  		struct hugetlbfs_fs_context *ctx = fc->fs_private;
  		ctx->hstate = h;
  		mnt = fc_mount(fc);
  		put_fs_context(fc);
  	}
  	if (IS_ERR(mnt))
  		pr_err("Cannot mount internal hugetlbfs for page size %uK",
  		       1U << (h->order + PAGE_SHIFT - 10));
  	return mnt;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1461
1462
  static int __init init_hugetlbfs_fs(void)
  {
32021982a   David Howells   hugetlbfs: Conver...
1463
  	struct vfsmount *mnt;
42d7395fe   Andi Kleen   mm: support more ...
1464
  	struct hstate *h;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1465
  	int error;
42d7395fe   Andi Kleen   mm: support more ...
1466
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1467

457c1b27e   Nishanth Aravamudan   hugetlb: ensure h...
1468
  	if (!hugepages_supported()) {
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1469
1470
  		pr_info("disabling because there are no supported hugepage sizes
  ");
457c1b27e   Nishanth Aravamudan   hugetlb: ensure h...
1471
1472
  		return -ENOTSUPP;
  	}
d1d5e05ff   Hillf Danton   hugetlbfs: return...
1473
  	error = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1474
1475
  	hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
  					sizeof(struct hugetlbfs_inode_info),
5d097056c   Vladimir Davydov   kmemcg: account c...
1476
  					0, SLAB_ACCOUNT, init_once);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1477
  	if (hugetlbfs_inode_cachep == NULL)
8fc312b32   Mike Kravetz   mm/hugetlbfs: fix...
1478
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1479
1480
1481
  
  	error = register_filesystem(&hugetlbfs_fs_type);
  	if (error)
8fc312b32   Mike Kravetz   mm/hugetlbfs: fix...
1482
  		goto out_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1483

8fc312b32   Mike Kravetz   mm/hugetlbfs: fix...
1484
1485
1486
1487
1488
1489
1490
1491
1492
  	/* default hstate mount is required */
  	mnt = mount_one_hugetlbfs(&hstates[default_hstate_idx]);
  	if (IS_ERR(mnt)) {
  		error = PTR_ERR(mnt);
  		goto out_unreg;
  	}
  	hugetlbfs_vfsmount[default_hstate_idx] = mnt;
  
  	/* other hstates are optional */
42d7395fe   Andi Kleen   mm: support more ...
1493
1494
  	i = 0;
  	for_each_hstate(h) {
15f0ec941   Jan Stancek   mm/hugetlbfs: fix...
1495
1496
  		if (i == default_hstate_idx) {
  			i++;
8fc312b32   Mike Kravetz   mm/hugetlbfs: fix...
1497
  			continue;
15f0ec941   Jan Stancek   mm/hugetlbfs: fix...
1498
  		}
8fc312b32   Mike Kravetz   mm/hugetlbfs: fix...
1499

32021982a   David Howells   hugetlbfs: Conver...
1500
  		mnt = mount_one_hugetlbfs(h);
8fc312b32   Mike Kravetz   mm/hugetlbfs: fix...
1501
1502
1503
1504
  		if (IS_ERR(mnt))
  			hugetlbfs_vfsmount[i] = NULL;
  		else
  			hugetlbfs_vfsmount[i] = mnt;
42d7395fe   Andi Kleen   mm: support more ...
1505
1506
  		i++;
  	}
32021982a   David Howells   hugetlbfs: Conver...
1507
1508
  
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1509

8fc312b32   Mike Kravetz   mm/hugetlbfs: fix...
1510
1511
1512
   out_unreg:
  	(void)unregister_filesystem(&hugetlbfs_fs_type);
   out_free:
d1d5e05ff   Hillf Danton   hugetlbfs: return...
1513
  	kmem_cache_destroy(hugetlbfs_inode_cachep);
8fc312b32   Mike Kravetz   mm/hugetlbfs: fix...
1514
   out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1515
1516
  	return error;
  }
3e89e1c5e   Paul Gortmaker   hugetlb: make mm ...
1517
  fs_initcall(init_hugetlbfs_fs)