Blame view

fs/hugetlbfs/inode.c 34.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
  /*
   * hugetlbpage-backed filesystem.  Based on ramfs.
   *
6d49e352a   Nadia Yvette Chambers   propagate name ch...
4
   * Nadia Yvette Chambers, 2002
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
   *
   * Copyright (C) 2002 Linus Torvalds.
3e89e1c5e   Paul Gortmaker   hugetlb: make mm ...
7
   * License: GPL
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
   */
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
9
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
11
12
  #include <linux/thread_info.h>
  #include <asm/current.h>
  #include <linux/sched.h>		/* remove ASAP */
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
13
  #include <linux/falloc.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
16
  #include <linux/fs.h>
  #include <linux/mount.h>
  #include <linux/file.h>
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
17
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
21
22
  #include <linux/writeback.h>
  #include <linux/pagemap.h>
  #include <linux/highmem.h>
  #include <linux/init.h>
  #include <linux/string.h>
16f7e0fe2   Randy Dunlap   [PATCH] capable/c...
23
  #include <linux/capability.h>
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
24
  #include <linux/ctype.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
  #include <linux/backing-dev.h>
  #include <linux/hugetlb.h>
  #include <linux/pagevec.h>
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
28
  #include <linux/parser.h>
036e08568   Benjamin Herrenschmidt   get_unmapped_area...
29
  #include <linux/mman.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
33
  #include <linux/slab.h>
  #include <linux/dnotify.h>
  #include <linux/statfs.h>
  #include <linux/security.h>
1fd7317d0   Nick Black   Move magic number...
34
  #include <linux/magic.h>
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
35
  #include <linux/migrate.h>
34d0640e2   Al Viro   switch hugetlbfs ...
36
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
38
  
  #include <asm/uaccess.h>
ee9b6d61a   Josef 'Jeff' Sipek   [PATCH] Mark stru...
39
  static const struct super_operations hugetlbfs_ops;
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
40
  static const struct address_space_operations hugetlbfs_aops;
4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
41
  const struct file_operations hugetlbfs_file_operations;
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
42
43
  static const struct inode_operations hugetlbfs_dir_inode_operations;
  static const struct inode_operations hugetlbfs_inode_operations;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44

a1d776ee3   David Gibson   hugetlb: cleanup ...
45
  struct hugetlbfs_config {
a0eb3a05a   Eric W. Biederman   userns: Convert h...
46
47
  	kuid_t   uid;
  	kgid_t   gid;
a1d776ee3   David Gibson   hugetlb: cleanup ...
48
  	umode_t mode;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
49
  	long	max_hpages;
a1d776ee3   David Gibson   hugetlb: cleanup ...
50
51
  	long	nr_inodes;
  	struct hstate *hstate;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
52
  	long    min_hpages;
a1d776ee3   David Gibson   hugetlb: cleanup ...
53
54
55
56
57
58
59
60
61
62
63
  };
  
  struct hugetlbfs_inode_info {
  	struct shared_policy policy;
  	struct inode vfs_inode;
  };
  
  static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
  {
  	return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64
  int sysctl_hugetlb_shm_group;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
65
66
67
  enum {
  	Opt_size, Opt_nr_inodes,
  	Opt_mode, Opt_uid, Opt_gid,
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
68
  	Opt_pagesize, Opt_min_size,
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
69
70
  	Opt_err,
  };
a447c0932   Steven Whitehouse   vfs: Use const fo...
71
  static const match_table_t tokens = {
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
72
73
74
75
76
  	{Opt_size,	"size=%s"},
  	{Opt_nr_inodes,	"nr_inodes=%s"},
  	{Opt_mode,	"mode=%o"},
  	{Opt_uid,	"uid=%u"},
  	{Opt_gid,	"gid=%u"},
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
77
  	{Opt_pagesize,	"pagesize=%s"},
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
78
  	{Opt_min_size,	"min_size=%s"},
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
79
80
  	{Opt_err,	NULL},
  };
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  #ifdef CONFIG_NUMA
  static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
  					struct inode *inode, pgoff_t index)
  {
  	vma->vm_policy = mpol_shared_policy_lookup(&HUGETLBFS_I(inode)->policy,
  							index);
  }
  
  static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
  {
  	mpol_cond_put(vma->vm_policy);
  }
  #else
  static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
  					struct inode *inode, pgoff_t index)
  {
  }
  
  static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
  {
  }
  #endif
2e9b367c2   Adam Litke   [PATCH] hugetlb: ...
103
104
105
106
107
108
109
110
111
  static void huge_pagevec_release(struct pagevec *pvec)
  {
  	int i;
  
  	for (i = 0; i < pagevec_count(pvec); ++i)
  		put_page(pvec->pages[i]);
  
  	pagevec_reinit(pvec);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
113
  static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
  {
496ad9aa8   Al Viro   new helper: file_...
114
  	struct inode *inode = file_inode(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
116
  	loff_t len, vma_len;
  	int ret;
a55164389   Andi Kleen   hugetlb: modular ...
117
  	struct hstate *h = hstate_file(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118

68589bc35   Hugh Dickins   [PATCH] hugetlb: ...
119
  	/*
dec4ad86c   David Gibson   hugepage: fix bro...
120
121
122
123
124
125
  	 * vma address alignment (but not the pgoff alignment) has
  	 * already been checked by prepare_hugepage_range.  If you add
  	 * any error returns here, do so after setting VM_HUGETLB, so
  	 * is_vm_hugetlb_page tests below unmap_region go the right
  	 * way when do_mmap_pgoff unwinds (may be important on powerpc
  	 * and ia64).
68589bc35   Hugh Dickins   [PATCH] hugetlb: ...
126
  	 */
a2fce9143   Naoya Horiguchi   hugetlbfs: stop s...
127
  	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
68589bc35   Hugh Dickins   [PATCH] hugetlb: ...
128
  	vma->vm_ops = &hugetlb_vm_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129

2b37c35e6   Becky Bruce   fs/hugetlbfs/inod...
130
  	if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
dec4ad86c   David Gibson   hugepage: fix bro...
131
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
  	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
5955102c9   Al Viro   wrappers for ->i_...
133
  	inode_lock(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
134
  	file_accessed(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135
136
137
  
  	ret = -ENOMEM;
  	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
138

a1e78772d   Mel Gorman   hugetlb: reserve ...
139
  	if (hugetlb_reserve_pages(inode,
a55164389   Andi Kleen   hugetlb: modular ...
140
  				vma->vm_pgoff >> huge_page_order(h),
5a6fe1259   Mel Gorman   Do not account fo...
141
142
  				len >> huge_page_shift(h), vma,
  				vma->vm_flags))
a43a8c39b   Kenneth W Chen   [PATCH] tightenin...
143
  		goto out;
b45b5bd65   David Gibson   [PATCH] hugepage:...
144

4c8872659   Adam Litke   [PATCH] hugetlb: ...
145
  	ret = 0;
b6174df5e   Zhang, Yanmin   [PATCH] mmap zero...
146
  	if (vma->vm_flags & VM_WRITE && inode->i_size < len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
147
148
  		inode->i_size = len;
  out:
5955102c9   Al Viro   wrappers for ->i_...
149
  	inode_unlock(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
151
152
153
154
  
  	return ret;
  }
  
  /*
508034a32   Hugh Dickins   [PATCH] mm: unmap...
155
   * Called under down_write(mmap_sem).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
   */
d2ba27e80   Adrian Bunk   proper prototype ...
157
  #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
159
160
161
162
163
  static unsigned long
  hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
  		unsigned long len, unsigned long pgoff, unsigned long flags)
  {
  	struct mm_struct *mm = current->mm;
  	struct vm_area_struct *vma;
a55164389   Andi Kleen   hugetlb: modular ...
164
  	struct hstate *h = hstate_file(file);
086593559   Michel Lespinasse   mm: use vm_unmapp...
165
  	struct vm_unmapped_area_info info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166

a55164389   Andi Kleen   hugetlb: modular ...
167
  	if (len & ~huge_page_mask(h))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
170
  		return -EINVAL;
  	if (len > TASK_SIZE)
  		return -ENOMEM;
036e08568   Benjamin Herrenschmidt   get_unmapped_area...
171
  	if (flags & MAP_FIXED) {
a55164389   Andi Kleen   hugetlb: modular ...
172
  		if (prepare_hugepage_range(file, addr, len))
036e08568   Benjamin Herrenschmidt   get_unmapped_area...
173
174
175
  			return -EINVAL;
  		return addr;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
  	if (addr) {
a55164389   Andi Kleen   hugetlb: modular ...
177
  		addr = ALIGN(addr, huge_page_size(h));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
179
180
181
182
  		vma = find_vma(mm, addr);
  		if (TASK_SIZE - len >= addr &&
  		    (!vma || addr + len <= vma->vm_start))
  			return addr;
  	}
086593559   Michel Lespinasse   mm: use vm_unmapp...
183
184
185
186
187
188
189
  	info.flags = 0;
  	info.length = len;
  	info.low_limit = TASK_UNMAPPED_BASE;
  	info.high_limit = TASK_SIZE;
  	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
  	info.align_offset = 0;
  	return vm_unmapped_area(&info);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
191
  }
  #endif
34d0640e2   Al Viro   switch hugetlbfs ...
192
  static size_t
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
193
  hugetlbfs_read_actor(struct page *page, unsigned long offset,
34d0640e2   Al Viro   switch hugetlbfs ...
194
  			struct iov_iter *to, unsigned long size)
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
195
  {
34d0640e2   Al Viro   switch hugetlbfs ...
196
  	size_t copied = 0;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
197
  	int i, chunksize;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
198
199
200
201
202
  	/* Find which 4k chunk and offset with in that chunk */
  	i = offset >> PAGE_CACHE_SHIFT;
  	offset = offset & ~PAGE_CACHE_MASK;
  
  	while (size) {
34d0640e2   Al Viro   switch hugetlbfs ...
203
  		size_t n;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
204
205
206
207
208
  		chunksize = PAGE_CACHE_SIZE;
  		if (offset)
  			chunksize -= offset;
  		if (chunksize > size)
  			chunksize = size;
34d0640e2   Al Viro   switch hugetlbfs ...
209
210
211
212
  		n = copy_page_to_iter(&page[i], offset, chunksize, to);
  		copied += n;
  		if (n != chunksize)
  			return copied;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
213
214
  		offset = 0;
  		size -= chunksize;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
215
216
  		i++;
  	}
34d0640e2   Al Viro   switch hugetlbfs ...
217
  	return copied;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
218
219
220
221
222
223
224
  }
  
  /*
   * Support for read() - Find the page attached to f_mapping and copy out the
   * data. Its *very* similar to do_generic_mapping_read(), we can't use that
   * since it has PAGE_CACHE_SIZE assumptions.
   */
34d0640e2   Al Viro   switch hugetlbfs ...
225
  static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
226
  {
34d0640e2   Al Viro   switch hugetlbfs ...
227
228
229
  	struct file *file = iocb->ki_filp;
  	struct hstate *h = hstate_file(file);
  	struct address_space *mapping = file->f_mapping;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
230
  	struct inode *inode = mapping->host;
34d0640e2   Al Viro   switch hugetlbfs ...
231
232
  	unsigned long index = iocb->ki_pos >> huge_page_shift(h);
  	unsigned long offset = iocb->ki_pos & ~huge_page_mask(h);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
233
234
235
  	unsigned long end_index;
  	loff_t isize;
  	ssize_t retval = 0;
34d0640e2   Al Viro   switch hugetlbfs ...
236
  	while (iov_iter_count(to)) {
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
237
  		struct page *page;
34d0640e2   Al Viro   switch hugetlbfs ...
238
  		size_t nr, copied;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
239
240
  
  		/* nr is the maximum number of bytes to copy from this page */
a55164389   Andi Kleen   hugetlb: modular ...
241
  		nr = huge_page_size(h);
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
242
243
  		isize = i_size_read(inode);
  		if (!isize)
34d0640e2   Al Viro   switch hugetlbfs ...
244
  			break;
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
245
  		end_index = (isize - 1) >> huge_page_shift(h);
34d0640e2   Al Viro   switch hugetlbfs ...
246
247
248
  		if (index > end_index)
  			break;
  		if (index == end_index) {
a55164389   Andi Kleen   hugetlb: modular ...
249
  			nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
250
  			if (nr <= offset)
34d0640e2   Al Viro   switch hugetlbfs ...
251
  				break;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
252
253
254
255
  		}
  		nr = nr - offset;
  
  		/* Find the page */
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
256
  		page = find_lock_page(mapping, index);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
257
258
259
260
261
  		if (unlikely(page == NULL)) {
  			/*
  			 * We have a HOLE, zero out the user-buffer for the
  			 * length of the hole or request.
  			 */
34d0640e2   Al Viro   switch hugetlbfs ...
262
  			copied = iov_iter_zero(nr, to);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
263
  		} else {
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
264
  			unlock_page(page);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
265
266
267
  			/*
  			 * We have the page, copy it to user space buffer.
  			 */
34d0640e2   Al Viro   switch hugetlbfs ...
268
  			copied = hugetlbfs_read_actor(page, offset, to, nr);
a05b0855f   Aneesh Kumar K.V   hugetlbfs: avoid ...
269
  			page_cache_release(page);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
270
  		}
34d0640e2   Al Viro   switch hugetlbfs ...
271
272
273
274
275
276
  		offset += copied;
  		retval += copied;
  		if (copied != nr && iov_iter_count(to)) {
  			if (!retval)
  				retval = -EFAULT;
  			break;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
277
  		}
a55164389   Andi Kleen   hugetlb: modular ...
278
279
  		index += offset >> huge_page_shift(h);
  		offset &= ~huge_page_mask(h);
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
280
  	}
34d0640e2   Al Viro   switch hugetlbfs ...
281
  	iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset;
e63e1e5a6   Badari Pulavarty   hugetlbfs read() ...
282
283
  	return retval;
  }
800d15a53   Nick Piggin   implement simple ...
284
285
286
287
  static int hugetlbfs_write_begin(struct file *file,
  			struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned flags,
  			struct page **pagep, void **fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
288
289
290
  {
  	return -EINVAL;
  }
800d15a53   Nick Piggin   implement simple ...
291
292
293
  static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
  			loff_t pos, unsigned len, unsigned copied,
  			struct page *page, void *fsdata)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294
  {
800d15a53   Nick Piggin   implement simple ...
295
  	BUG();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
297
  	return -EINVAL;
  }
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
298
  static void remove_huge_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299
  {
b9ea25152   Konstantin Khlebnikov   page_writeback: c...
300
  	ClearPageDirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
  	ClearPageUptodate(page);
bd65cb86c   Minchan Kim   mm: hugetlbfs: ch...
302
  	delete_from_page_cache(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
303
  }
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
  static void
  hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
  {
  	struct vm_area_struct *vma;
  
  	/*
  	 * end == 0 indicates that the entire range after
  	 * start should be unmapped.
  	 */
  	vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
  		unsigned long v_offset;
  		unsigned long v_end;
  
  		/*
  		 * Can the expression below overflow on 32-bit arches?
  		 * No, because the interval tree returns us only those vmas
  		 * which overlap the truncated area starting at pgoff,
  		 * and no vma on a 32-bit arch can span beyond the 4GB.
  		 */
  		if (vma->vm_pgoff < start)
  			v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
  		else
  			v_offset = 0;
  
  		if (!end)
  			v_end = vma->vm_end;
  		else {
  			v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
  							+ vma->vm_start;
  			if (v_end > vma->vm_end)
  				v_end = vma->vm_end;
  		}
  
  		unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
  									NULL);
  	}
  }
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
341
342
343
344
  
  /*
   * remove_inode_hugepages handles two distinct cases: truncation and hole
   * punch.  There are subtle differences in operation for each case.
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
345
   *
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
346
347
348
   * truncation is indicated by end of range being LLONG_MAX
   *	In this case, we first scan the range and release found pages.
   *	After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
349
350
351
352
353
   *	maps and global counts.  Page faults can not race with truncation
   *	in this routine.  hugetlb_no_page() prevents page faults in the
   *	truncated range.  It checks i_size before allocation, and again after
   *	with the page table lock for the page held.  The same lock must be
   *	acquired to unmap a page.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
354
355
356
357
   * hole punch is indicated if end is not LLONG_MAX
   *	In the hole punch case we scan the range and release found pages.
   *	Only when releasing a page is the associated region/reserv map
   *	deleted.  The region/reserv map for ranges without associated
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
358
359
   *	pages are not modified.  Page faults can race with hole punch.
   *	This is indicated if we find a mapped page.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
360
361
362
363
364
   * Note: If the passed end of range value is beyond the end of file, but
   * not LLONG_MAX this routine still performs a hole punch operation.
   */
  static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
  				   loff_t lend)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
365
  {
a55164389   Andi Kleen   hugetlb: modular ...
366
  	struct hstate *h = hstate_inode(inode);
b45b5bd65   David Gibson   [PATCH] hugepage:...
367
  	struct address_space *mapping = &inode->i_data;
a55164389   Andi Kleen   hugetlb: modular ...
368
  	const pgoff_t start = lstart >> huge_page_shift(h);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
369
370
  	const pgoff_t end = lend >> huge_page_shift(h);
  	struct vm_area_struct pseudo_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
372
  	struct pagevec pvec;
  	pgoff_t next;
a43a8c39b   Kenneth W Chen   [PATCH] tightenin...
373
  	int i, freed = 0;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
374
375
  	long lookup_nr = PAGEVEC_SIZE;
  	bool truncate_op = (lend == LLONG_MAX);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376

b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
377
378
  	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
  	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
379
380
  	pagevec_init(&pvec, 0);
  	next = start;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
381
382
  	while (next < end) {
  		/*
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
383
  		 * Don't grab more pages than the number left in the range.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
384
385
386
387
388
  		 */
  		if (end - next < lookup_nr)
  			lookup_nr = end - next;
  
  		/*
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
389
  		 * When no more pages are found, we are done.
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
390
  		 */
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
391
392
  		if (!pagevec_lookup(&pvec, mapping, next, lookup_nr))
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
393
394
395
  
  		for (i = 0; i < pagevec_count(&pvec); ++i) {
  			struct page *page = pvec.pages[i];
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
396
  			bool rsv_on_error;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
397
  			u32 hash;
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
398
399
400
401
402
403
404
405
  			/*
  			 * The page (index) could be beyond end.  This is
  			 * only possible in the punch hole case as end is
  			 * max page offset in the truncate case.
  			 */
  			next = page->index;
  			if (next >= end)
  				break;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
406
407
408
409
  			hash = hugetlb_fault_mutex_hash(h, current->mm,
  							&pseudo_vma,
  							mapping, next, 0);
  			mutex_lock(&hugetlb_fault_mutex_table[hash]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410

4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
411
412
413
414
415
416
417
418
419
420
  			/*
  			 * If page is mapped, it was faulted in after being
  			 * unmapped in caller.  Unmap (again) now after taking
  			 * the fault mutex.  The mutex will prevent faults
  			 * until we finish removing the page.
  			 *
  			 * This race can only happen in the hole punch case.
  			 * Getting here in a truncate operation is a bug.
  			 */
  			if (unlikely(page_mapped(page))) {
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
421
  				BUG_ON(truncate_op);
4aae8d1c0   Mike Kravetz   mm/hugetlbfs: unm...
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
  
  				i_mmap_lock_write(mapping);
  				hugetlb_vmdelete_list(&mapping->i_mmap,
  					next * pages_per_huge_page(h),
  					(next + 1) * pages_per_huge_page(h));
  				i_mmap_unlock_write(mapping);
  			}
  
  			lock_page(page);
  			/*
  			 * We must free the huge page and remove from page
  			 * cache (remove_huge_page) BEFORE removing the
  			 * region/reserve map (hugetlb_unreserve_pages).  In
  			 * rare out of memory conditions, removal of the
  			 * region/reserve map could fail.  Before free'ing
  			 * the page, note PagePrivate which is used in case
  			 * of error.
  			 */
  			rsv_on_error = !PagePrivate(page);
  			remove_huge_page(page);
  			freed++;
  			if (!truncate_op) {
  				if (unlikely(hugetlb_unreserve_pages(inode,
  							next, next + 1, 1)))
  					hugetlb_fix_reserve_counts(inode,
  								rsv_on_error);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
448
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
  			unlock_page(page);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
450
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
451
  		}
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
452
  		++next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
  		huge_pagevec_release(&pvec);
1817889e3   Mike Kravetz   mm/hugetlbfs: fix...
454
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
  	}
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
456
457
458
  
  	if (truncate_op)
  		(void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
459
  }
2bbbda308   Al Viro   switch hugetlbfs ...
460
  static void hugetlbfs_evict_inode(struct inode *inode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
  {
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
462
  	struct resv_map *resv_map;
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
463
  	remove_inode_hugepages(inode, 0, LLONG_MAX);
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
464
465
466
467
  	resv_map = (struct resv_map *)inode->i_mapping->private_data;
  	/* root inode doesn't have the resv_map, so we should check it */
  	if (resv_map)
  		resv_map_release(&resv_map->refs);
dbd5768f8   Jan Kara   vfs: Rename end_w...
468
  	clear_inode(inode);
149f4211a   Christoph Hellwig   [PATCH] hugetlbfs...
469
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470
471
  static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
  {
856fc2950   Hugh Dickins   [PATCH] hugetlb: ...
472
  	pgoff_t pgoff;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473
  	struct address_space *mapping = inode->i_mapping;
a55164389   Andi Kleen   hugetlb: modular ...
474
  	struct hstate *h = hstate_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475

a55164389   Andi Kleen   hugetlb: modular ...
476
  	BUG_ON(offset & ~huge_page_mask(h));
856fc2950   Hugh Dickins   [PATCH] hugetlb: ...
477
  	pgoff = offset >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
478

7aa91e104   Ken Chen   hugetlb: allow ex...
479
  	i_size_write(inode, offset);
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
480
  	i_mmap_lock_write(mapping);
6b2dbba8b   Michel Lespinasse   mm: replace vma p...
481
  	if (!RB_EMPTY_ROOT(&mapping->i_mmap))
1bfad99ab   Mike Kravetz   hugetlbfs: hugetl...
482
  		hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
483
  	i_mmap_unlock_write(mapping);
b5cec28d3   Mike Kravetz   hugetlbfs: trunca...
484
  	remove_inode_hugepages(inode, offset, LLONG_MAX);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
485
486
  	return 0;
  }
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
  static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
  {
  	struct hstate *h = hstate_inode(inode);
  	loff_t hpage_size = huge_page_size(h);
  	loff_t hole_start, hole_end;
  
  	/*
  	 * For hole punch round up the beginning offset of the hole and
  	 * round down the end.
  	 */
  	hole_start = round_up(offset, hpage_size);
  	hole_end = round_down(offset + len, hpage_size);
  
  	if (hole_end > hole_start) {
  		struct address_space *mapping = inode->i_mapping;
5955102c9   Al Viro   wrappers for ->i_...
502
  		inode_lock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
503
504
505
506
507
508
509
  		i_mmap_lock_write(mapping);
  		if (!RB_EMPTY_ROOT(&mapping->i_mmap))
  			hugetlb_vmdelete_list(&mapping->i_mmap,
  						hole_start >> PAGE_SHIFT,
  						hole_end  >> PAGE_SHIFT);
  		i_mmap_unlock_write(mapping);
  		remove_inode_hugepages(inode, hole_start, hole_end);
5955102c9   Al Viro   wrappers for ->i_...
510
  		inode_unlock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
  	}
  
  	return 0;
  }
  
  static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
  				loff_t len)
  {
  	struct inode *inode = file_inode(file);
  	struct address_space *mapping = inode->i_mapping;
  	struct hstate *h = hstate_inode(inode);
  	struct vm_area_struct pseudo_vma;
  	struct mm_struct *mm = current->mm;
  	loff_t hpage_size = huge_page_size(h);
  	unsigned long hpage_shift = huge_page_shift(h);
  	pgoff_t start, index, end;
  	int error;
  	u32 hash;
  
  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
  		return -EOPNOTSUPP;
  
  	if (mode & FALLOC_FL_PUNCH_HOLE)
  		return hugetlbfs_punch_hole(inode, offset, len);
  
  	/*
  	 * Default preallocate case.
  	 * For this range, start is rounded down and end is rounded up
  	 * as well as being converted to page offsets.
  	 */
  	start = offset >> hpage_shift;
  	end = (offset + len + hpage_size - 1) >> hpage_shift;
5955102c9   Al Viro   wrappers for ->i_...
543
  	inode_lock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
  
  	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
  	error = inode_newsize_ok(inode, offset + len);
  	if (error)
  		goto out;
  
  	/*
  	 * Initialize a pseudo vma as this is required by the huge page
  	 * allocation routines.  If NUMA is configured, use page index
  	 * as input to create an allocation policy.
  	 */
  	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
  	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
  	pseudo_vma.vm_file = file;
  
  	for (index = start; index < end; index++) {
  		/*
  		 * This is supposed to be the vaddr where the page is being
  		 * faulted in, but we have no vaddr here.
  		 */
  		struct page *page;
  		unsigned long addr;
  		int avoid_reserve = 0;
  
  		cond_resched();
  
  		/*
  		 * fallocate(2) manpage permits EINTR; we may have been
  		 * interrupted because we are using up too much memory.
  		 */
  		if (signal_pending(current)) {
  			error = -EINTR;
  			break;
  		}
  
  		/* Set numa allocation policy based on index */
  		hugetlb_set_vma_policy(&pseudo_vma, inode, index);
  
  		/* addr is the offset within the file (zero based) */
  		addr = index * hpage_size;
  
  		/* mutex taken here, fault path and hole punch */
  		hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
  						index, addr);
  		mutex_lock(&hugetlb_fault_mutex_table[hash]);
  
  		/* See if already present in mapping to avoid alloc/free */
  		page = find_get_page(mapping, index);
  		if (page) {
  			put_page(page);
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  			hugetlb_drop_vma_policy(&pseudo_vma);
  			continue;
  		}
  
  		/* Allocate page and add to page cache */
  		page = alloc_huge_page(&pseudo_vma, addr, avoid_reserve);
  		hugetlb_drop_vma_policy(&pseudo_vma);
  		if (IS_ERR(page)) {
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  			error = PTR_ERR(page);
  			goto out;
  		}
  		clear_huge_page(page, addr, pages_per_huge_page(h));
  		__SetPageUptodate(page);
  		error = huge_add_to_page_cache(page, mapping, index);
  		if (unlikely(error)) {
  			put_page(page);
  			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  			goto out;
  		}
  
  		mutex_unlock(&hugetlb_fault_mutex_table[hash]);
  
  		/*
  		 * page_put due to reference from alloc_huge_page()
  		 * unlock_page because locked by add_to_page_cache()
  		 */
  		put_page(page);
  		unlock_page(page);
  	}
  
  	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
  		i_size_write(inode, offset + len);
  	inode->i_ctime = CURRENT_TIME;
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
629
  out:
5955102c9   Al Viro   wrappers for ->i_...
630
  	inode_unlock(inode);
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
631
632
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
633
634
  static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
2b0143b5c   David Howells   VFS: normal files...
635
  	struct inode *inode = d_inode(dentry);
a55164389   Andi Kleen   hugetlb: modular ...
636
  	struct hstate *h = hstate_inode(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
637
638
639
640
641
642
643
  	int error;
  	unsigned int ia_valid = attr->ia_valid;
  
  	BUG_ON(!inode);
  
  	error = inode_change_ok(inode, attr);
  	if (error)
1025774ce   Christoph Hellwig   remove inode_setattr
644
  		return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
645
646
647
  
  	if (ia_valid & ATTR_SIZE) {
  		error = -EINVAL;
1025774ce   Christoph Hellwig   remove inode_setattr
648
649
650
  		if (attr->ia_size & ~huge_page_mask(h))
  			return -EINVAL;
  		error = hugetlb_vmtruncate(inode, attr->ia_size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
651
  		if (error)
1025774ce   Christoph Hellwig   remove inode_setattr
652
  			return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
653
  	}
1025774ce   Christoph Hellwig   remove inode_setattr
654
655
656
657
  
  	setattr_copy(inode, attr);
  	mark_inode_dirty(inode);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658
  }
7d54fa647   Al Viro   hugetlbfs: switch...
659
660
  static struct inode *hugetlbfs_get_root(struct super_block *sb,
  					struct hugetlbfs_config *config)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
661
662
  {
  	struct inode *inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
663
664
665
666
  
  	inode = new_inode(sb);
  	if (inode) {
  		struct hugetlbfs_inode_info *info;
85fe4025c   Christoph Hellwig   fs: do not assign...
667
  		inode->i_ino = get_next_ino();
7d54fa647   Al Viro   hugetlbfs: switch...
668
669
670
671
672
673
674
675
676
677
  		inode->i_mode = S_IFDIR | config->mode;
  		inode->i_uid = config->uid;
  		inode->i_gid = config->gid;
  		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
  		info = HUGETLBFS_I(inode);
  		mpol_shared_policy_init(&info->policy, NULL);
  		inode->i_op = &hugetlbfs_dir_inode_operations;
  		inode->i_fop = &simple_dir_operations;
  		/* directory inodes start off with i_nlink == 2 (for "." entry) */
  		inc_nlink(inode);
65ed76010   Aneesh Kumar K.V   hugetlbfs: lockde...
678
  		lockdep_annotate_inode_mutex_key(inode);
7d54fa647   Al Viro   hugetlbfs: switch...
679
680
681
  	}
  	return inode;
  }
b610ded71   Michal Hocko   hugetlb: fix lock...
682
  /*
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
683
   * Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
b610ded71   Michal Hocko   hugetlb: fix lock...
684
   * be taken from reclaim -- unlike regular filesystems. This needs an
88f306b68   Kirill A. Shutemov   mm: fix locking o...
685
   * annotation because huge_pmd_share() does an allocation under hugetlb's
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
686
   * i_mmap_rwsem.
b610ded71   Michal Hocko   hugetlb: fix lock...
687
   */
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
688
  static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
b610ded71   Michal Hocko   hugetlb: fix lock...
689

7d54fa647   Al Viro   hugetlbfs: switch...
690
691
  static struct inode *hugetlbfs_get_inode(struct super_block *sb,
  					struct inode *dir,
18df22524   Al Viro   hugetlbfs: propag...
692
  					umode_t mode, dev_t dev)
7d54fa647   Al Viro   hugetlbfs: switch...
693
694
  {
  	struct inode *inode;
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
695
696
697
698
699
  	struct resv_map *resv_map;
  
  	resv_map = resv_map_alloc();
  	if (!resv_map)
  		return NULL;
7d54fa647   Al Viro   hugetlbfs: switch...
700
701
702
703
704
705
  
  	inode = new_inode(sb);
  	if (inode) {
  		struct hugetlbfs_inode_info *info;
  		inode->i_ino = get_next_ino();
  		inode_init_owner(inode, dir, mode);
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
706
707
  		lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
  				&hugetlbfs_i_mmap_rwsem_key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
  		inode->i_mapping->a_ops = &hugetlbfs_aops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
709
  		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
710
  		inode->i_mapping->private_data = resv_map;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
711
  		info = HUGETLBFS_I(inode);
6bfde05bf   Eric B Munson   hugetlbfs: allow ...
712
713
714
  		/*
  		 * The policy is initialized here even if we are creating a
  		 * private inode because initialization simply creates an
4a8c7bb59   Nathan Zimmer   mm/mempolicy.c: c...
715
  		 * an empty rb tree and calls rwlock_init(), later when we
6bfde05bf   Eric B Munson   hugetlbfs: allow ...
716
717
718
  		 * call mpol_free_shared_policy() it will just return because
  		 * the rb tree will still be empty.
  		 */
71fe804b6   Lee Schermerhorn   mempolicy: use st...
719
  		mpol_shared_policy_init(&info->policy, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720
721
722
723
724
725
726
727
728
729
730
731
732
  		switch (mode & S_IFMT) {
  		default:
  			init_special_inode(inode, mode, dev);
  			break;
  		case S_IFREG:
  			inode->i_op = &hugetlbfs_inode_operations;
  			inode->i_fop = &hugetlbfs_file_operations;
  			break;
  		case S_IFDIR:
  			inode->i_op = &hugetlbfs_dir_inode_operations;
  			inode->i_fop = &simple_dir_operations;
  
  			/* directory inodes start off with i_nlink == 2 (for "." entry) */
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
733
  			inc_nlink(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
734
735
736
  			break;
  		case S_IFLNK:
  			inode->i_op = &page_symlink_inode_operations;
21fc61c73   Al Viro   don't put symlink...
737
  			inode_nohighmem(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
738
739
  			break;
  		}
e096d0c7e   Josh Boyer   lockdep: Add help...
740
  		lockdep_annotate_inode_mutex_key(inode);
9119a41e9   Joonsoo Kim   mm, hugetlb: unif...
741
742
  	} else
  		kref_put(&resv_map->refs, resv_map_release);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
743
744
745
746
747
748
749
  	return inode;
  }
  
  /*
   * File creation. Allocate an inode, and we're done..
   */
  static int hugetlbfs_mknod(struct inode *dir,
1a67aafb5   Al Viro   switch ->mknod() ...
750
  			struct dentry *dentry, umode_t mode, dev_t dev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
751
752
753
  {
  	struct inode *inode;
  	int error = -ENOSPC;
7d54fa647   Al Viro   hugetlbfs: switch...
754
755
  
  	inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
756
757
758
759
760
761
762
763
  	if (inode) {
  		dir->i_ctime = dir->i_mtime = CURRENT_TIME;
  		d_instantiate(dentry, inode);
  		dget(dentry);	/* Extra count - pin the dentry in core */
  		error = 0;
  	}
  	return error;
  }
18bb1db3e   Al Viro   switch vfs_mkdir(...
764
  static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
765
766
767
  {
  	int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
  	if (!retval)
d8c76e6f4   Dave Hansen   [PATCH] r/o bind ...
768
  		inc_nlink(dir);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
769
770
  	return retval;
  }
ebfc3b49a   Al Viro   don't pass nameid...
771
  static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
774
775
776
777
778
779
780
  {
  	return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
  }
  
  static int hugetlbfs_symlink(struct inode *dir,
  			struct dentry *dentry, const char *symname)
  {
  	struct inode *inode;
  	int error = -ENOSPC;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
781

7d54fa647   Al Viro   hugetlbfs: switch...
782
  	inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
  	if (inode) {
  		int l = strlen(symname)+1;
  		error = page_symlink(inode, symname, l);
  		if (!error) {
  			d_instantiate(dentry, inode);
  			dget(dentry);
  		} else
  			iput(inode);
  	}
  	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
  
  	return error;
  }
  
  /*
6649a3863   Ken Chen   [PATCH] hugetlb: ...
798
   * mark the head page dirty
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
799
800
801
   */
  static int hugetlbfs_set_page_dirty(struct page *page)
  {
d85f33855   Christoph Lameter   Make page->privat...
802
  	struct page *head = compound_head(page);
6649a3863   Ken Chen   [PATCH] hugetlb: ...
803
804
  
  	SetPageDirty(head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
805
806
  	return 0;
  }
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
807
  static int hugetlbfs_migrate_page(struct address_space *mapping,
b969c4ab9   Mel Gorman   mm: compaction: d...
808
  				struct page *newpage, struct page *page,
a6bc32b89   Mel Gorman   mm: compaction: i...
809
  				enum migrate_mode mode)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
810
811
812
813
  {
  	int rc;
  
  	rc = migrate_huge_page_move_mapping(mapping, newpage, page);
78bd52097   Rafael Aquini   mm: adjust addres...
814
  	if (rc != MIGRATEPAGE_SUCCESS)
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
815
816
  		return rc;
  	migrate_page_copy(newpage, page);
78bd52097   Rafael Aquini   mm: adjust addres...
817
  	return MIGRATEPAGE_SUCCESS;
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
818
  }
726c33422   David Howells   [PATCH] VFS: Perm...
819
  static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820
  {
726c33422   David Howells   [PATCH] VFS: Perm...
821
  	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
2b0143b5c   David Howells   VFS: normal files...
822
  	struct hstate *h = hstate_inode(d_inode(dentry));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
824
  
  	buf->f_type = HUGETLBFS_MAGIC;
a55164389   Andi Kleen   hugetlb: modular ...
825
  	buf->f_bsize = huge_page_size(h);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
826
827
  	if (sbinfo) {
  		spin_lock(&sbinfo->stat_lock);
74a8a65c5   David Gibson   [PATCH] Fix huget...
828
829
  		/* If no limits set, just report 0 for max/free/used
  		 * blocks, like simple_statfs() */
90481622d   David Gibson   hugepages: fix us...
830
831
832
833
834
835
836
837
838
  		if (sbinfo->spool) {
  			long free_pages;
  
  			spin_lock(&sbinfo->spool->lock);
  			buf->f_blocks = sbinfo->spool->max_hpages;
  			free_pages = sbinfo->spool->max_hpages
  				- sbinfo->spool->used_hpages;
  			buf->f_bavail = buf->f_bfree = free_pages;
  			spin_unlock(&sbinfo->spool->lock);
74a8a65c5   David Gibson   [PATCH] Fix huget...
839
840
841
  			buf->f_files = sbinfo->max_inodes;
  			buf->f_ffree = sbinfo->free_inodes;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
842
843
844
845
846
847
848
849
850
851
852
853
  		spin_unlock(&sbinfo->stat_lock);
  	}
  	buf->f_namelen = NAME_MAX;
  	return 0;
  }
  
  static void hugetlbfs_put_super(struct super_block *sb)
  {
  	struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb);
  
  	if (sbi) {
  		sb->s_fs_info = NULL;
90481622d   David Gibson   hugepages: fix us...
854
855
856
  
  		if (sbi->spool)
  			hugepage_put_subpool(sbi->spool);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
857
858
859
  		kfree(sbi);
  	}
  }
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
  static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo)
  {
  	if (sbinfo->free_inodes >= 0) {
  		spin_lock(&sbinfo->stat_lock);
  		if (unlikely(!sbinfo->free_inodes)) {
  			spin_unlock(&sbinfo->stat_lock);
  			return 0;
  		}
  		sbinfo->free_inodes--;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  
  	return 1;
  }
  
  static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
  {
  	if (sbinfo->free_inodes >= 0) {
  		spin_lock(&sbinfo->stat_lock);
  		sbinfo->free_inodes++;
  		spin_unlock(&sbinfo->stat_lock);
  	}
  }
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
883
  static struct kmem_cache *hugetlbfs_inode_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
884
885
886
  
  static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
  {
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
887
  	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
888
  	struct hugetlbfs_inode_info *p;
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
889
890
  	if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
  		return NULL;
e94b17660   Christoph Lameter   [PATCH] slab: rem...
891
  	p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL);
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
892
893
  	if (unlikely(!p)) {
  		hugetlbfs_inc_free_inodes(sbinfo);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
894
  		return NULL;
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
895
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
896
897
  	return &p->vfs_inode;
  }
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
898
899
900
  static void hugetlbfs_i_callback(struct rcu_head *head)
  {
  	struct inode *inode = container_of(head, struct inode, i_rcu);
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
901
902
  	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
903
904
  static void hugetlbfs_destroy_inode(struct inode *inode)
  {
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
905
  	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
906
  	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
907
  	call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
908
  }
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
909
  static const struct address_space_operations hugetlbfs_aops = {
800d15a53   Nick Piggin   implement simple ...
910
911
  	.write_begin	= hugetlbfs_write_begin,
  	.write_end	= hugetlbfs_write_end,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
912
  	.set_page_dirty	= hugetlbfs_set_page_dirty,
290408d4a   Naoya Horiguchi   hugetlb: hugepage...
913
  	.migratepage    = hugetlbfs_migrate_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
914
  };
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
915

51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
916
  static void init_once(void *foo)
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
917
918
  {
  	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
919
  	inode_init_once(&ei->vfs_inode);
96527980d   Christoph Hellwig   [PATCH] hugetlbfs...
920
  }
4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
921
  const struct file_operations hugetlbfs_file_operations = {
34d0640e2   Al Viro   switch hugetlbfs ...
922
  	.read_iter		= hugetlbfs_read_iter,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
923
  	.mmap			= hugetlbfs_file_mmap,
1b061d924   Christoph Hellwig   rename the generi...
924
  	.fsync			= noop_fsync,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
925
  	.get_unmapped_area	= hugetlb_get_unmapped_area,
70c3547e3   Mike Kravetz   hugetlbfs: add hu...
926
927
  	.llseek			= default_llseek,
  	.fallocate		= hugetlbfs_fallocate,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
928
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
929
  static const struct inode_operations hugetlbfs_dir_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
930
931
932
933
934
935
936
937
938
939
940
  	.create		= hugetlbfs_create,
  	.lookup		= simple_lookup,
  	.link		= simple_link,
  	.unlink		= simple_unlink,
  	.symlink	= hugetlbfs_symlink,
  	.mkdir		= hugetlbfs_mkdir,
  	.rmdir		= simple_rmdir,
  	.mknod		= hugetlbfs_mknod,
  	.rename		= simple_rename,
  	.setattr	= hugetlbfs_setattr,
  };
92e1d5be9   Arjan van de Ven   [PATCH] mark stru...
941
  static const struct inode_operations hugetlbfs_inode_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
942
943
  	.setattr	= hugetlbfs_setattr,
  };
ee9b6d61a   Josef 'Jeff' Sipek   [PATCH] Mark stru...
944
  static const struct super_operations hugetlbfs_ops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
945
946
  	.alloc_inode    = hugetlbfs_alloc_inode,
  	.destroy_inode  = hugetlbfs_destroy_inode,
2bbbda308   Al Viro   switch hugetlbfs ...
947
  	.evict_inode	= hugetlbfs_evict_inode,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
948
  	.statfs		= hugetlbfs_statfs,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
949
  	.put_super	= hugetlbfs_put_super,
10f19a86a   Miklos Szeredi   mount options: fi...
950
  	.show_options	= generic_show_options,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
951
  };
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
  enum { NO_SIZE, SIZE_STD, SIZE_PERCENT };
  
  /*
   * Convert size option passed from command line to number of huge pages
   * in the pool specified by hstate.  Size option could be in bytes
   * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
   */
  static long long
  hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
  								int val_type)
  {
  	if (val_type == NO_SIZE)
  		return -1;
  
  	if (val_type == SIZE_PERCENT) {
  		size_opt <<= huge_page_shift(h);
  		size_opt *= h->max_huge_pages;
  		do_div(size_opt, 100);
  	}
  
  	size_opt >>= huge_page_shift(h);
  	return size_opt;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975
976
977
  static int
  hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
  {
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
978
979
980
  	char *p, *rest;
  	substring_t args[MAX_OPT_ARGS];
  	int option;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
981
982
  	unsigned long long max_size_opt = 0, min_size_opt = 0;
  	int max_val_type = NO_SIZE, min_val_type = NO_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
983
984
985
  
  	if (!options)
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
986

e73a75fa7   Randy Dunlap   hugetlbfs: use li...
987
988
  	while ((p = strsep(&options, ",")) != NULL) {
  		int token;
b4c07bce7   Lee Schermerhorn   hugetlbfs: handle...
989
990
  		if (!*p)
  			continue;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
991
992
993
994
995
996
  
  		token = match_token(p, tokens, args);
  		switch (token) {
  		case Opt_uid:
  			if (match_int(&args[0], &option))
   				goto bad_val;
a0eb3a05a   Eric W. Biederman   userns: Convert h...
997
998
999
  			pconfig->uid = make_kuid(current_user_ns(), option);
  			if (!uid_valid(pconfig->uid))
  				goto bad_val;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1000
1001
1002
1003
1004
  			break;
  
  		case Opt_gid:
  			if (match_int(&args[0], &option))
   				goto bad_val;
a0eb3a05a   Eric W. Biederman   userns: Convert h...
1005
1006
1007
  			pconfig->gid = make_kgid(current_user_ns(), option);
  			if (!gid_valid(pconfig->gid))
  				goto bad_val;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1008
1009
1010
1011
1012
  			break;
  
  		case Opt_mode:
  			if (match_octal(&args[0], &option))
   				goto bad_val;
75897d60a   Ken Chen   hugetlb: allow st...
1013
  			pconfig->mode = option & 01777U;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1014
1015
1016
  			break;
  
  		case Opt_size: {
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1017
1018
1019
  			/* memparse() will accept a K/M/G without a digit */
  			if (!isdigit(*args[0].from))
  				goto bad_val;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1020
1021
  			max_size_opt = memparse(args[0].from, &rest);
  			max_val_type = SIZE_STD;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1022
  			if (*rest == '%')
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1023
  				max_val_type = SIZE_PERCENT;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1024
1025
  			break;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026

e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1027
1028
1029
1030
1031
1032
  		case Opt_nr_inodes:
  			/* memparse() will accept a K/M/G without a digit */
  			if (!isdigit(*args[0].from))
  				goto bad_val;
  			pconfig->nr_inodes = memparse(args[0].from, &rest);
  			break;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1033
1034
1035
1036
1037
  		case Opt_pagesize: {
  			unsigned long ps;
  			ps = memparse(args[0].from, &rest);
  			pconfig->hstate = size_to_hstate(ps);
  			if (!pconfig->hstate) {
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1038
1039
  				pr_err("Unsupported page size %lu MB
  ",
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1040
1041
1042
1043
1044
  					ps >> 20);
  				return -EINVAL;
  			}
  			break;
  		}
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
  		case Opt_min_size: {
  			/* memparse() will accept a K/M/G without a digit */
  			if (!isdigit(*args[0].from))
  				goto bad_val;
  			min_size_opt = memparse(args[0].from, &rest);
  			min_val_type = SIZE_STD;
  			if (*rest == '%')
  				min_val_type = SIZE_PERCENT;
  			break;
  		}
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1055
  		default:
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1056
1057
  			pr_err("Bad mount option: \"%s\"
  ", p);
b4c07bce7   Lee Schermerhorn   hugetlbfs: handle...
1058
  			return -EINVAL;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1059
1060
  			break;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1061
  	}
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1062

7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
  	/*
  	 * Use huge page pool size (in hstate) to convert the size
  	 * options to number of huge pages.  If NO_SIZE, -1 is returned.
  	 */
  	pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
  						max_size_opt, max_val_type);
  	pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
  						min_size_opt, min_val_type);
  
  	/*
  	 * If max_size was specified, then min_size must be smaller
  	 */
  	if (max_val_type > NO_SIZE &&
  	    pconfig->min_hpages > pconfig->max_hpages) {
  		pr_err("minimum size can not be greater than maximum size
  ");
  		return -EINVAL;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1080
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1081
  	return 0;
e73a75fa7   Randy Dunlap   hugetlbfs: use li...
1082
1083
  
  bad_val:
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1084
1085
  	pr_err("Bad value '%s' for mount option '%s'
  ", args[0].from, p);
c12ddba09   Akinobu Mita   hugetlbfs: return...
1086
   	return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1087
1088
1089
1090
1091
  }
  
  static int
  hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1092
1093
1094
  	int ret;
  	struct hugetlbfs_config config;
  	struct hugetlbfs_sb_info *sbinfo;
10f19a86a   Miklos Szeredi   mount options: fi...
1095
  	save_mount_options(sb, data);
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1096
  	config.max_hpages = -1; /* No limit on size by default */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1097
  	config.nr_inodes = -1; /* No limit on number of inodes by default */
77c70de15   David Howells   CRED: Wrap task c...
1098
1099
  	config.uid = current_fsuid();
  	config.gid = current_fsgid();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1100
  	config.mode = 0755;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1101
  	config.hstate = &default_hstate;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1102
  	config.min_hpages = -1; /* No default minimum size */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1103
  	ret = hugetlbfs_parse_options(data, &config);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1104
1105
1106
1107
1108
1109
1110
  	if (ret)
  		return ret;
  
  	sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL);
  	if (!sbinfo)
  		return -ENOMEM;
  	sb->s_fs_info = sbinfo;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1111
  	sbinfo->hstate = config.hstate;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1112
  	spin_lock_init(&sbinfo->stat_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1113
1114
  	sbinfo->max_inodes = config.nr_inodes;
  	sbinfo->free_inodes = config.nr_inodes;
90481622d   David Gibson   hugepages: fix us...
1115
  	sbinfo->spool = NULL;
7ca02d0ae   Mike Kravetz   hugetlbfs: accept...
1116
1117
1118
1119
1120
1121
1122
1123
1124
  	/*
  	 * Allocate and initialize subpool if maximum or minimum size is
  	 * specified.  Any needed reservations (for minimim size) are taken
  	 * taken when the subpool is created.
  	 */
  	if (config.max_hpages != -1 || config.min_hpages != -1) {
  		sbinfo->spool = hugepage_new_subpool(config.hstate,
  							config.max_hpages,
  							config.min_hpages);
90481622d   David Gibson   hugepages: fix us...
1125
1126
1127
  		if (!sbinfo->spool)
  			goto out_free;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1128
  	sb->s_maxbytes = MAX_LFS_FILESIZE;
a137e1cc6   Andi Kleen   hugetlbfs: per mo...
1129
1130
  	sb->s_blocksize = huge_page_size(config.hstate);
  	sb->s_blocksize_bits = huge_page_shift(config.hstate);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1131
1132
1133
  	sb->s_magic = HUGETLBFS_MAGIC;
  	sb->s_op = &hugetlbfs_ops;
  	sb->s_time_gran = 1;
48fde701a   Al Viro   switch open-coded...
1134
1135
  	sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
  	if (!sb->s_root)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1136
  		goto out_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1137
1138
  	return 0;
  out_free:
6e6870d4f   Fabian Frederick   fs/hugetlbfs/inod...
1139
  	kfree(sbinfo->spool);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1140
1141
1142
  	kfree(sbinfo);
  	return -ENOMEM;
  }
3c26ff6e4   Al Viro   convert get_sb_no...
1143
1144
  static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
  	int flags, const char *dev_name, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1145
  {
3c26ff6e4   Al Viro   convert get_sb_no...
1146
  	return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1147
1148
1149
1150
  }
  
  static struct file_system_type hugetlbfs_fs_type = {
  	.name		= "hugetlbfs",
3c26ff6e4   Al Viro   convert get_sb_no...
1151
  	.mount		= hugetlbfs_mount,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1152
1153
  	.kill_sb	= kill_litter_super,
  };
42d7395fe   Andi Kleen   mm: support more ...
1154
  static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1155

ef1ff6b8c   From: Mel Gorman   hugetlbfs: do not...
1156
  static int can_do_hugetlb_shm(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1157
  {
a0eb3a05a   Eric W. Biederman   userns: Convert h...
1158
1159
1160
  	kgid_t shm_group;
  	shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group);
  	return capable(CAP_IPC_LOCK) || in_group_p(shm_group);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1161
  }
42d7395fe   Andi Kleen   mm: support more ...
1162
1163
  static int get_hstate_idx(int page_size_log)
  {
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1164
  	struct hstate *h = hstate_sizelog(page_size_log);
42d7395fe   Andi Kleen   mm: support more ...
1165

42d7395fe   Andi Kleen   mm: support more ...
1166
1167
1168
1169
  	if (!h)
  		return -1;
  	return h - hstates;
  }
be1d2cf5e   Fabian Frederick   fs/hugetlbfs/inod...
1170
  static const struct dentry_operations anon_ops = {
118b23022   Al Viro   cope with potenti...
1171
  	.d_dname = simple_dname
0df4d6e5b   Al Viro   hugetlb_file_setu...
1172
  };
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1173
1174
1175
1176
1177
1178
  /*
   * Note that size should be aligned to proper hugepage size in caller side,
   * otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
   */
  struct file *hugetlb_file_setup(const char *name, size_t size,
  				vm_flags_t acctflag, struct user_struct **user,
42d7395fe   Andi Kleen   mm: support more ...
1179
  				int creat_flags, int page_size_log)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1180
  {
39b652527   Anatol Pomozov   fs: Preserve erro...
1181
  	struct file *file = ERR_PTR(-ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1182
  	struct inode *inode;
2c48b9c45   Al Viro   switch alloc_file...
1183
  	struct path path;
0df4d6e5b   Al Viro   hugetlb_file_setu...
1184
  	struct super_block *sb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1185
  	struct qstr quick_string;
42d7395fe   Andi Kleen   mm: support more ...
1186
1187
1188
1189
1190
  	int hstate_idx;
  
  	hstate_idx = get_hstate_idx(page_size_log);
  	if (hstate_idx < 0)
  		return ERR_PTR(-ENODEV);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1191

353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1192
  	*user = NULL;
42d7395fe   Andi Kleen   mm: support more ...
1193
  	if (!hugetlbfs_vfsmount[hstate_idx])
5bc98594d   Akinobu Mita   hugetlbfs: add NU...
1194
  		return ERR_PTR(-ENOENT);
ef1ff6b8c   From: Mel Gorman   hugetlbfs: do not...
1195
  	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1196
1197
  		*user = current_user();
  		if (user_shm_lock(size, *user)) {
21a3c273f   David Rientjes   mm, hugetlb: add ...
1198
  			task_lock(current);
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1199
1200
  			pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated
  ",
21a3c273f   David Rientjes   mm, hugetlb: add ...
1201
1202
  				current->comm, current->pid);
  			task_unlock(current);
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1203
1204
  		} else {
  			*user = NULL;
2584e5173   Ravikiran G Thirumalai   mm: reintroduce a...
1205
  			return ERR_PTR(-EPERM);
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1206
  		}
2584e5173   Ravikiran G Thirumalai   mm: reintroduce a...
1207
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1208

0df4d6e5b   Al Viro   hugetlb_file_setu...
1209
  	sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb;
9d66586f7   Eric W. Biederman   shm: fix the file...
1210
  	quick_string.name = name;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1211
1212
  	quick_string.len = strlen(quick_string.name);
  	quick_string.hash = 0;
0df4d6e5b   Al Viro   hugetlb_file_setu...
1213
  	path.dentry = d_alloc_pseudo(sb, &quick_string);
2c48b9c45   Al Viro   switch alloc_file...
1214
  	if (!path.dentry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1215
  		goto out_shm_unlock;
0df4d6e5b   Al Viro   hugetlb_file_setu...
1216
  	d_set_d_op(path.dentry, &anon_ops);
42d7395fe   Andi Kleen   mm: support more ...
1217
  	path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
39b652527   Anatol Pomozov   fs: Preserve erro...
1218
  	file = ERR_PTR(-ENOSPC);
0df4d6e5b   Al Viro   hugetlb_file_setu...
1219
  	inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1220
  	if (!inode)
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
1221
  		goto out_dentry;
e1832f292   Stephen Smalley   ipc: use private ...
1222
1223
  	if (creat_flags == HUGETLB_SHMFS_INODE)
  		inode->i_flags |= S_PRIVATE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1224

39b652527   Anatol Pomozov   fs: Preserve erro...
1225
  	file = ERR_PTR(-ENOMEM);
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1226
1227
1228
  	if (hugetlb_reserve_pages(inode, 0,
  			size >> huge_page_shift(hstate_inode(inode)), NULL,
  			acctflag))
b45b5bd65   David Gibson   [PATCH] hugepage:...
1229
  		goto out_inode;
2c48b9c45   Al Viro   switch alloc_file...
1230
  	d_instantiate(path.dentry, inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1231
  	inode->i_size = size;
6d6b77f16   Miklos Szeredi   filesystems: add ...
1232
  	clear_nlink(inode);
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
1233

2c48b9c45   Al Viro   switch alloc_file...
1234
  	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
1235
  			&hugetlbfs_file_operations);
39b652527   Anatol Pomozov   fs: Preserve erro...
1236
  	if (IS_ERR(file))
b4d232e65   Al Viro   [PATCH] double ip...
1237
  		goto out_dentry; /* inode is already attached */
ce8d2cdf3   Dave Hansen   r/o bind mounts: ...
1238

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1239
  	return file;
b45b5bd65   David Gibson   [PATCH] hugepage:...
1240
1241
  out_inode:
  	iput(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1242
  out_dentry:
2c48b9c45   Al Viro   switch alloc_file...
1243
  	path_put(&path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1244
  out_shm_unlock:
353d5c30c   Hugh Dickins   mm: fix hugetlb b...
1245
1246
1247
1248
  	if (*user) {
  		user_shm_unlock(size, *user);
  		*user = NULL;
  	}
39b652527   Anatol Pomozov   fs: Preserve erro...
1249
  	return file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1250
1251
1252
1253
  }
  
  static int __init init_hugetlbfs_fs(void)
  {
42d7395fe   Andi Kleen   mm: support more ...
1254
  	struct hstate *h;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
  	int error;
42d7395fe   Andi Kleen   mm: support more ...
1256
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1257

457c1b27e   Nishanth Aravamudan   hugetlb: ensure h...
1258
  	if (!hugepages_supported()) {
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1259
1260
  		pr_info("disabling because there are no supported hugepage sizes
  ");
457c1b27e   Nishanth Aravamudan   hugetlb: ensure h...
1261
1262
  		return -ENOTSUPP;
  	}
d1d5e05ff   Hillf Danton   hugetlbfs: return...
1263
  	error = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1264
1265
  	hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
  					sizeof(struct hugetlbfs_inode_info),
5d097056c   Vladimir Davydov   kmemcg: account c...
1266
  					0, SLAB_ACCOUNT, init_once);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1267
  	if (hugetlbfs_inode_cachep == NULL)
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
1268
  		goto out2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1269
1270
1271
1272
  
  	error = register_filesystem(&hugetlbfs_fs_type);
  	if (error)
  		goto out;
42d7395fe   Andi Kleen   mm: support more ...
1273
1274
1275
1276
  	i = 0;
  	for_each_hstate(h) {
  		char buf[50];
  		unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1277

42d7395fe   Andi Kleen   mm: support more ...
1278
1279
1280
  		snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb);
  		hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type,
  							buf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1281

42d7395fe   Andi Kleen   mm: support more ...
1282
  		if (IS_ERR(hugetlbfs_vfsmount[i])) {
9b857d26d   Andrew Morton   fs/hugetlbfs/inod...
1283
  			pr_err("Cannot mount internal hugetlbfs for "
42d7395fe   Andi Kleen   mm: support more ...
1284
1285
1286
1287
1288
1289
1290
1291
1292
  				"page size %uK", ps_kb);
  			error = PTR_ERR(hugetlbfs_vfsmount[i]);
  			hugetlbfs_vfsmount[i] = NULL;
  		}
  		i++;
  	}
  	/* Non default hstates are optional */
  	if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx]))
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1293
1294
  
   out:
d1d5e05ff   Hillf Danton   hugetlbfs: return...
1295
  	kmem_cache_destroy(hugetlbfs_inode_cachep);
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
1296
   out2:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1297
1298
  	return error;
  }
3e89e1c5e   Paul Gortmaker   hugetlb: make mm ...
1299
  fs_initcall(init_hugetlbfs_fs)