Merge tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs

Pull xfs update from Ben Myers: "Several enhancements and cleanups: - make inode32 and inode64 remountable options - SEEK_HOLE/SEEK_DATA enhancements - cleanup struct declarations in xfs_mount.h" * tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs: xfs: Make inode32 a remountable option xfs: add inode64->inode32 transition into xfs_set_inode32() xfs: Fix mp->m_maxagi update during inode64 remount xfs: reduce code duplication handling inode32/64 options xfs: make inode64 as the default allocation mode xfs: Fix m_agirotor reset during AG selection Make inode64 a remountable option xfs: stop the sync worker before xfs_unmountfs xfs: xfs_seek_hole() refinement with hole searching from page cache for unwritten extents xfs: xfs_seek_data() refinement with unwritten extents check up from page cache xfs: Introduce a helper routine to probe data or hole offset from page cache xfs: Remove type argument from xfs_seek_data()/xfs_seek_hole() xfs: fix race while discarding buffers [V4] xfs: check for possible overflow in xfs_ioc_trim xfs: unlock the AGI buffer when looping in xfs_dialloc xfs: kill struct declarations in xfs_mount.h xfs: fix uninitialised variable in xfs_rtbuf_get()

Merge tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs
Pull xfs update from Ben Myers: "Several enhancements and cleanups: - make inode32 and inode64 remountable options - SEEK_HOLE/SEEK_DATA enhancements - cleanup struct declarations in xfs_mount.h" * tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs: xfs: Make inode32 a remountable option xfs: add inode64->inode32 transition into xfs_set_inode32() xfs: Fix mp->m_maxagi update during inode64 remount xfs: reduce code duplication handling inode32/64 options xfs: make inode64 as the default allocation mode xfs: Fix m_agirotor reset during AG selection Make inode64 a remountable option xfs: stop the sync worker before xfs_unmountfs xfs: xfs_seek_hole() refinement with hole searching from page cache for unwritten extents xfs: xfs_seek_data() refinement with unwritten extents check up from page cache xfs: Introduce a helper routine to probe data or hole offset from page cache xfs: Remove type argument from xfs_seek_data()/xfs_seek_hole() xfs: fix race while discarding buffers [V4] xfs: check for possible overflow in xfs_ioc_trim xfs: unlock the AGI buffer when looping in xfs_dialloc xfs: kill struct declarations in xfs_mount.h xfs: fix uninitialised variable in xfs_rtbuf_get()
Linus Torvalds
2 parents aab174f0df 2ea0392983
Showing 7 changed files Side-by-side Diff
fs/xfs/xfs_file.c
fs/xfs/xfs_ialloc.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_super.c
fs/xfs/xfs_super.h
fs/xfs/xfs_trace.h
@@ -36,6 +36,7 @@
  
 #include <linux/dcache.h>
 #include <linux/falloc.h>
+#include <linux/pagevec.h>
  
 static const struct vm_operations_struct xfs_file_vm_ops;
  
  
  
@@ -959,17 +960,232 @@
 	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
 }
  
+/*
+ * This type is designed to indicate the type of offset we would like
+ * to search from page cache for either xfs_seek_data() or xfs_seek_hole().
+ */
+enum {
+	HOLE_OFF = 0,
+	DATA_OFF,
+};
+
+/*
+ * Lookup the desired type of offset from the given page.
+ *
+ * On success, return true and the offset argument will point to the
+ * start of the region that was found.  Otherwise this function will
+ * return false and keep the offset argument unchanged.
+ */
+STATIC bool
+xfs_lookup_buffer_offset(
+	struct page		*page,
+	loff_t			*offset,
+	unsigned int		type)
+{
+	loff_t			lastoff = page_offset(page);
+	bool			found = false;
+	struct buffer_head	*bh, *head;
+
+	bh = head = page_buffers(page);
+	do {
+		/*
+		 * Unwritten extents that have data in the page
+		 * cache covering them can be identified by the
+		 * BH_Unwritten state flag.  Pages with multiple
+		 * buffers might have a mix of holes, data and
+		 * unwritten extents - any buffer with valid
+		 * data in it should have BH_Uptodate flag set
+		 * on it.
+		 */
+		if (buffer_unwritten(bh) ||
+		    buffer_uptodate(bh)) {
+			if (type == DATA_OFF)
+				found = true;
+		} else {
+			if (type == HOLE_OFF)
+				found = true;
+		}
+
+		if (found) {
+			*offset = lastoff;
+			break;
+		}
+		lastoff += bh->b_size;
+	} while ((bh = bh->b_this_page) != head);
+
+	return found;
+}
+
+/*
+ * This routine is called to find out and return a data or hole offset
+ * from the page cache for unwritten extents according to the desired
+ * type for xfs_seek_data() or xfs_seek_hole().
+ *
+ * The argument offset is used to tell where we start to search from the
+ * page cache.  Map is used to figure out the end points of the range to
+ * lookup pages.
+ *
+ * Return true if the desired type of offset was found, and the argument
+ * offset is filled with that address.  Otherwise, return false and keep
+ * offset unchanged.
+ */
+STATIC bool
+xfs_find_get_desired_pgoff(
+	struct inode		*inode,
+	struct xfs_bmbt_irec	*map,
+	unsigned int		type,
+	loff_t			*offset)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct pagevec		pvec;
+	pgoff_t			index;
+	pgoff_t			end;
+	loff_t			endoff;
+	loff_t			startoff = *offset;
+	loff_t			lastoff = startoff;
+	bool			found = false;
+
+	pagevec_init(&pvec, 0);
+
+	index = startoff >> PAGE_CACHE_SHIFT;
+	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
+	end = endoff >> PAGE_CACHE_SHIFT;
+	do {
+		int		want;
+		unsigned	nr_pages;
+		unsigned int	i;
+
+		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
+					  want);
+		/*
+		 * No page mapped into given range.  If we are searching holes
+		 * and if this is the first time we got into the loop, it means
+		 * that the given offset is landed in a hole, return it.
+		 *
+		 * If we have already stepped through some block buffers to find
+		 * holes but they all contains data.  In this case, the last
+		 * offset is already updated and pointed to the end of the last
+		 * mapped page, if it does not reach the endpoint to search,
+		 * that means there should be a hole between them.
+		 */
+		if (nr_pages == 0) {
+			/* Data search found nothing */
+			if (type == DATA_OFF)
+				break;
+
+			ASSERT(type == HOLE_OFF);
+			if (lastoff == startoff || lastoff < endoff) {
+				found = true;
+				*offset = lastoff;
+			}
+			break;
+		}
+
+		/*
+		 * At lease we found one page.  If this is the first time we
+		 * step into the loop, and if the first page index offset is
+		 * greater than the given search offset, a hole was found.
+		 */
+		if (type == HOLE_OFF && lastoff == startoff &&
+		    lastoff < page_offset(pvec.pages[0])) {
+			found = true;
+			break;
+		}
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page	*page = pvec.pages[i];
+			loff_t		b_offset;
+
+			/*
+			 * At this point, the page may be truncated or
+			 * invalidated (changing page->mapping to NULL),
+			 * or even swizzled back from swapper_space to tmpfs
+			 * file mapping. However, page->index will not change
+			 * because we have a reference on the page.
+			 *
+			 * Searching done if the page index is out of range.
+			 * If the current offset is not reaches the end of
+			 * the specified search range, there should be a hole
+			 * between them.
+			 */
+			if (page->index > end) {
+				if (type == HOLE_OFF && lastoff < endoff) {
+					*offset = lastoff;
+					found = true;
+				}
+				goto out;
+			}
+
+			lock_page(page);
+			/*
+			 * Page truncated or invalidated(page->mapping == NULL).
+			 * We can freely skip it and proceed to check the next
+			 * page.
+			 */
+			if (unlikely(page->mapping != inode->i_mapping)) {
+				unlock_page(page);
+				continue;
+			}
+
+			if (!page_has_buffers(page)) {
+				unlock_page(page);
+				continue;
+			}
+
+			found = xfs_lookup_buffer_offset(page, &b_offset, type);
+			if (found) {
+				/*
+				 * The found offset may be less than the start
+				 * point to search if this is the first time to
+				 * come here.
+				 */
+				*offset = max_t(loff_t, startoff, b_offset);
+				unlock_page(page);
+				goto out;
+			}
+
+			/*
+			 * We either searching data but nothing was found, or
+			 * searching hole but found a data buffer.  In either
+			 * case, probably the next page contains the desired
+			 * things, update the last offset to it so.
+			 */
+			lastoff = page_offset(page) + PAGE_SIZE;
+			unlock_page(page);
+		}
+
+		/*
+		 * The number of returned pages less than our desired, search
+		 * done.  In this case, nothing was found for searching data,
+		 * but we found a hole behind the last offset.
+		 */
+		if (nr_pages < want) {
+			if (type == HOLE_OFF) {
+				*offset = lastoff;
+				found = true;
+			}
+			break;
+		}
+
+		index = pvec.pages[i - 1]->index + 1;
+		pagevec_release(&pvec);
+	} while (index <= end);
+
+out:
+	pagevec_release(&pvec);
+	return found;
+}
+
 STATIC loff_t
 xfs_seek_data(
 	struct file		*file,
-	loff_t			start,
-	u32			type)
+	loff_t			start)
 {
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_bmbt_irec	map[2];
-	int			nmap = 2;
 	loff_t			uninitialized_var(offset);
 	xfs_fsize_t		isize;
 	xfs_fileoff_t		fsbno;
  
  
  
  
  
  
@@ -985,36 +1201,74 @@
 		goto out_unlock;
 	}
  
-	fsbno = XFS_B_TO_FSBT(mp, start);
-
 	/*
 	 * Try to read extents from the first block indicated
 	 * by fsbno to the end block of the file.
 	 */
+	fsbno = XFS_B_TO_FSBT(mp, start);
 	end = XFS_B_TO_FSB(mp, isize);
+	for (;;) {
+		struct xfs_bmbt_irec	map[2];
+		int			nmap = 2;
+		unsigned int		i;
  
-	error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
-			       XFS_BMAPI_ENTIRE);
-	if (error)
-		goto out_unlock;
+		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+				       XFS_BMAPI_ENTIRE);
+		if (error)
+			goto out_unlock;
  
-	/*
-	 * Treat unwritten extent as data extent since it might
-	 * contains dirty data in page cache.
-	 */
-	if (map[0].br_startblock != HOLESTARTBLOCK) {
-		offset = max_t(loff_t, start,
-			       XFS_FSB_TO_B(mp, map[0].br_startoff));
-	} else {
+		/* No extents at given offset, must be beyond EOF */
+		if (nmap == 0) {
+			error = ENXIO;
+			goto out_unlock;
+		}
+
+		for (i = 0; i < nmap; i++) {
+			offset = max_t(loff_t, start,
+				       XFS_FSB_TO_B(mp, map[i].br_startoff));
+
+			/* Landed in a data extent */
+			if (map[i].br_startblock == DELAYSTARTBLOCK ||
+			    (map[i].br_state == XFS_EXT_NORM &&
+			     !isnullstartblock(map[i].br_startblock)))
+				goto out;
+
+			/*
+			 * Landed in an unwritten extent, try to search data
+			 * from page cache.
+			 */
+			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
+				if (xfs_find_get_desired_pgoff(inode, &map[i],
+							DATA_OFF, &offset))
+					goto out;
+			}
+		}
+
+		/*
+		 * map[0] is hole or its an unwritten extent but
+		 * without data in page cache.  Probably means that
+		 * we are reading after EOF if nothing in map[1].
+		 */
 		if (nmap == 1) {
 			error = ENXIO;
 			goto out_unlock;
 		}
  
-		offset = max_t(loff_t, start,
-			       XFS_FSB_TO_B(mp, map[1].br_startoff));
+		ASSERT(i > 1);
+
+		/*
+		 * Nothing was found, proceed to the next round of search
+		 * if reading offset not beyond or hit EOF.
+		 */
+		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
+		start = XFS_FSB_TO_B(mp, fsbno);
+		if (start >= isize) {
+			error = ENXIO;
+			goto out_unlock;
+		}
 	}
  
+out:
 	if (offset != file->f_pos)
 		file->f_pos = offset;
  
  
  
@@ -1029,16 +1283,15 @@
 STATIC loff_t
 xfs_seek_hole(
 	struct file		*file,
-	loff_t			start,
-	u32			type)
+	loff_t			start)
 {
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	loff_t			uninitialized_var(offset);
-	loff_t			holeoff;
 	xfs_fsize_t		isize;
 	xfs_fileoff_t		fsbno;
+	xfs_filblks_t		end;
 	uint			lock;
 	int			error;
  
  
  
  
  
@@ -1054,21 +1307,77 @@
 	}
  
 	fsbno = XFS_B_TO_FSBT(mp, start);
-	error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
-	if (error)
-		goto out_unlock;
+	end = XFS_B_TO_FSB(mp, isize);
  
-	holeoff = XFS_FSB_TO_B(mp, fsbno);
-	if (holeoff <= start)
-		offset = start;
-	else {
+	for (;;) {
+		struct xfs_bmbt_irec	map[2];
+		int			nmap = 2;
+		unsigned int		i;
+
+		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+				       XFS_BMAPI_ENTIRE);
+		if (error)
+			goto out_unlock;
+
+		/* No extents at given offset, must be beyond EOF */
+		if (nmap == 0) {
+			error = ENXIO;
+			goto out_unlock;
+		}
+
+		for (i = 0; i < nmap; i++) {
+			offset = max_t(loff_t, start,
+				       XFS_FSB_TO_B(mp, map[i].br_startoff));
+
+			/* Landed in a hole */
+			if (map[i].br_startblock == HOLESTARTBLOCK)
+				goto out;
+
+			/*
+			 * Landed in an unwritten extent, try to search hole
+			 * from page cache.
+			 */
+			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
+				if (xfs_find_get_desired_pgoff(inode, &map[i],
+							HOLE_OFF, &offset))
+					goto out;
+			}
+		}
+
 		/*
-		 * xfs_bmap_first_unused() could return a value bigger than
-		 * isize if there are no more holes past the supplied offset.
+		 * map[0] contains data or its unwritten but contains
+		 * data in page cache, probably means that we are
+		 * reading after EOF.  We should fix offset to point
+		 * to the end of the file(i.e., there is an implicit
+		 * hole at the end of any file).
 		 */
-		offset = min_t(loff_t, holeoff, isize);
+		if (nmap == 1) {
+			offset = isize;
+			break;
+		}
+
+		ASSERT(i > 1);
+
+		/*
+		 * Both mappings contains data, proceed to the next round of
+		 * search if the current reading offset not beyond or hit EOF.
+		 */
+		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
+		start = XFS_FSB_TO_B(mp, fsbno);
+		if (start >= isize) {
+			offset = isize;
+			break;
+		}
 	}
  
+out:
+	/*
+	 * At this point, we must have found a hole.  However, the returned
+	 * offset may be bigger than the file size as it may be aligned to
+	 * page boundary for unwritten extents, we need to deal with this
+	 * situation in particular.
+	 */
+	offset = min_t(loff_t, offset, isize);
 	if (offset != file->f_pos)
 		file->f_pos = offset;
  
  
@@ -1092,9 +1401,9 @@
 	case SEEK_SET:
 		return generic_file_llseek(file, offset, origin);
 	case SEEK_DATA:
-		return xfs_seek_data(file, offset, origin);
+		return xfs_seek_data(file, offset);
 	case SEEK_HOLE:
-		return xfs_seek_hole(file, offset, origin);
+		return xfs_seek_hole(file, offset);
 	default:
 		return -EINVAL;
 	}
@@ -431,7 +431,7 @@
  
 	spin_lock(&mp->m_agirotor_lock);
 	agno = mp->m_agirotor;
-	if (++mp->m_agirotor == mp->m_maxagi)
+	if (++mp->m_agirotor >= mp->m_maxagi)
 		mp->m_agirotor = 0;
 	spin_unlock(&mp->m_agirotor_lock);
  
@@ -440,7 +440,7 @@
 	xfs_agnumber_t	agcount,
 	xfs_agnumber_t	*maxagi)
 {
-	xfs_agnumber_t	index, max_metadata;
+	xfs_agnumber_t	index;
 	xfs_agnumber_t	first_initialised = 0;
 	xfs_perag_t	*pag;
 	xfs_agino_t	agino;
@@ -500,43 +500,10 @@
 	else
 		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
  
-	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
-		/*
-		 * Calculate how much should be reserved for inodes to meet
-		 * the max inode percentage.
-		 */
-		if (mp->m_maxicount) {
-			__uint64_t	icount;
-
-			icount = sbp->sb_dblocks * sbp->sb_imax_pct;
-			do_div(icount, 100);
-			icount += sbp->sb_agblocks - 1;
-			do_div(icount, sbp->sb_agblocks);
-			max_metadata = icount;
-		} else {
-			max_metadata = agcount;
-		}
-
-		for (index = 0; index < agcount; index++) {
-			ino = XFS_AGINO_TO_INO(mp, index, agino);
-			if (ino > XFS_MAXINUMBER_32) {
-				index++;
-				break;
-			}
-
-			pag = xfs_perag_get(mp, index);
-			pag->pagi_inodeok = 1;
-			if (index < max_metadata)
-				pag->pagf_metadata = 1;
-			xfs_perag_put(pag);
-		}
-	} else {
-		for (index = 0; index < agcount; index++) {
-			pag = xfs_perag_get(mp, index);
-			pag->pagi_inodeok = 1;
-			xfs_perag_put(pag);
-		}
-	}
+	if (mp->m_flags & XFS_MOUNT_32BITINODES)
+		index = xfs_set_inode32(mp);
+	else
+		index = xfs_set_inode64(mp);
  
 	if (maxagi)
 		*maxagi = index;
@@ -54,12 +54,7 @@
 #include "xfs_sync.h"
  
 struct xlog;
-struct xfs_mount_args;
 struct xfs_inode;
-struct xfs_bmbt_irec;
-struct xfs_bmap_free;
-struct xfs_extdelta;
-struct xfs_swapext;
 struct xfs_mru_cache;
 struct xfs_nameops;
 struct xfs_ail;
@@ -88,6 +88,8 @@
 					 * unwritten extent conversion */
 #define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
 #define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
+#define MNTOPT_32BITINODE   "inode32"	/* inode allocation limited to
+					 * XFS_MAXINUMBER_32 */
 #define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
 #define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
 #define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
  
@@ -120,12 +122,18 @@
  * in the future, too.
  */
 enum {
-	Opt_barrier, Opt_nobarrier, Opt_err
+	Opt_barrier,
+	Opt_nobarrier,
+	Opt_inode64,
+	Opt_inode32,
+	Opt_err
 };
  
 static const match_table_t tokens = {
 	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
+	{Opt_inode64, "inode64"},
+	{Opt_inode32, "inode32"},
 	{Opt_err, NULL}
 };
  
  
@@ -197,7 +205,9 @@
 	 */
 	mp->m_flags |= XFS_MOUNT_BARRIER;
 	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+#if !XFS_BIG_INUMS
 	mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+#endif
  
 	/*
 	 * These can be overridden by the mount option parsing.
@@ -294,6 +304,8 @@
 				return EINVAL;
 			}
 			dswidth = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
+			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
 			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 #if !XFS_BIG_INUMS
@@ -492,6 +504,7 @@
 		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
 		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
 		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },
+		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_32BITINODE },
 		{ 0, NULL }
 	};
 	static struct proc_xfs_info xfs_info_unset[] = {
@@ -591,6 +604,80 @@
 	return (((__uint64_t)pagefactor) << bitshift) - 1;
 }
  
+xfs_agnumber_t
+xfs_set_inode32(struct xfs_mount *mp)
+{
+	xfs_agnumber_t	index = 0;
+	xfs_agnumber_t	maxagi = 0;
+	xfs_sb_t	*sbp = &mp->m_sb;
+	xfs_agnumber_t	max_metadata;
+	xfs_agino_t	agino =	XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
+	xfs_ino_t	ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
+	xfs_perag_t	*pag;
+
+	/* Calculate how much should be reserved for inodes to meet
+	 * the max inode percentage.
+	 */
+	if (mp->m_maxicount) {
+		__uint64_t	icount;
+
+		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
+		do_div(icount, 100);
+		icount += sbp->sb_agblocks - 1;
+		do_div(icount, sbp->sb_agblocks);
+		max_metadata = icount;
+	} else {
+		max_metadata = sbp->sb_agcount;
+	}
+
+	for (index = 0; index < sbp->sb_agcount; index++) {
+		ino = XFS_AGINO_TO_INO(mp, index, agino);
+
+		if (ino > XFS_MAXINUMBER_32) {
+			pag = xfs_perag_get(mp, index);
+			pag->pagi_inodeok = 0;
+			pag->pagf_metadata = 0;
+			xfs_perag_put(pag);
+			continue;
+		}
+
+		pag = xfs_perag_get(mp, index);
+		pag->pagi_inodeok = 1;
+		maxagi++;
+		if (index < max_metadata)
+			pag->pagf_metadata = 1;
+		xfs_perag_put(pag);
+	}
+	mp->m_flags |= (XFS_MOUNT_32BITINODES |
+			XFS_MOUNT_SMALL_INUMS);
+
+	return maxagi;
+}
+
+xfs_agnumber_t
+xfs_set_inode64(struct xfs_mount *mp)
+{
+	xfs_agnumber_t index = 0;
+
+	for (index = 0; index < mp->m_sb.sb_agcount; index++) {
+		struct xfs_perag	*pag;
+
+		pag = xfs_perag_get(mp, index);
+		pag->pagi_inodeok = 1;
+		pag->pagf_metadata = 0;
+		xfs_perag_put(pag);
+	}
+
+	/* There is no need for lock protection on m_flags,
+	 * the rw_semaphore of the VFS superblock is locked
+	 * during mount/umount/remount operations, so this is
+	 * enough to avoid concurency on the m_flags field
+	 */
+	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
+			 XFS_MOUNT_SMALL_INUMS);
+	return index;
+}
+
 STATIC int
 xfs_blkdev_get(
 	xfs_mount_t		*mp,
@@ -1055,6 +1142,12 @@
 			break;
 		case Opt_nobarrier:
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
+			break;
+		case Opt_inode64:
+			mp->m_maxagi = xfs_set_inode64(mp);
+			break;
+		case Opt_inode32:
+			mp->m_maxagi = xfs_set_inode32(mp);
 			break;
 		default:
 			/*
@@ -75,6 +75,8 @@
 extern __uint64_t xfs_max_file_offset(unsigned int);
  
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
+extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
+extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
  
 extern const struct export_operations xfs_export_operations;
 extern const struct xattr_handler *xfs_xattr_handlers[];
@@ -37,6 +37,7 @@
 struct xlog_recover_item;
 struct xfs_buf_log_format;
 struct xfs_inode_log_format;
+struct xfs_bmbt_irec;
  
 DECLARE_EVENT_CLASS(xfs_attr_list_class,
 	TP_PROTO(struct xfs_attr_list_context *ctx),
...	...	@@ -36,6 +36,7 @@
36	36
37	37	#include <linux/dcache.h>
38	38	#include <linux/falloc.h>
	39	+#include <linux/pagevec.h>
39	40
40	41	static const struct vm_operations_struct xfs_file_vm_ops;
41	42
42	43
43	44
...	...	@@ -959,17 +960,232 @@
959	960	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
960	961	}
961	962
	963	+/*
	964	+ * This type is designed to indicate the type of offset we would like
	965	+ * to search from page cache for either xfs_seek_data() or xfs_seek_hole().
	966	+ */
	967	+enum {
	968	+ HOLE_OFF = 0,
	969	+ DATA_OFF,
	970	+};
	971	+
	972	+/*
	973	+ * Lookup the desired type of offset from the given page.
	974	+ *
	975	+ * On success, return true and the offset argument will point to the
	976	+ * start of the region that was found. Otherwise this function will
	977	+ * return false and keep the offset argument unchanged.
	978	+ */
	979	+STATIC bool
	980	+xfs_lookup_buffer_offset(
	981	+ struct page *page,
	982	+ loff_t *offset,
	983	+ unsigned int type)
	984	+{
	985	+ loff_t lastoff = page_offset(page);
	986	+ bool found = false;
	987	+ struct buffer_head bh, head;
	988	+
	989	+ bh = head = page_buffers(page);
	990	+ do {
	991	+ /*
	992	+ * Unwritten extents that have data in the page
	993	+ * cache covering them can be identified by the
	994	+ * BH_Unwritten state flag. Pages with multiple
	995	+ * buffers might have a mix of holes, data and
	996	+ * unwritten extents - any buffer with valid
	997	+ * data in it should have BH_Uptodate flag set
	998	+ * on it.
	999	+ */
	1000	+ if (buffer_unwritten(bh) \|\|
	1001	+ buffer_uptodate(bh)) {
	1002	+ if (type == DATA_OFF)
	1003	+ found = true;
	1004	+ } else {
	1005	+ if (type == HOLE_OFF)
	1006	+ found = true;
	1007	+ }
	1008	+
	1009	+ if (found) {
	1010	+ *offset = lastoff;
	1011	+ break;
	1012	+ }
	1013	+ lastoff += bh->b_size;
	1014	+ } while ((bh = bh->b_this_page) != head);
	1015	+
	1016	+ return found;
	1017	+}
	1018	+
	1019	+/*
	1020	+ * This routine is called to find out and return a data or hole offset
	1021	+ * from the page cache for unwritten extents according to the desired
	1022	+ * type for xfs_seek_data() or xfs_seek_hole().
	1023	+ *
	1024	+ * The argument offset is used to tell where we start to search from the
	1025	+ * page cache. Map is used to figure out the end points of the range to
	1026	+ * lookup pages.
	1027	+ *
	1028	+ * Return true if the desired type of offset was found, and the argument
	1029	+ * offset is filled with that address. Otherwise, return false and keep
	1030	+ * offset unchanged.
	1031	+ */
	1032	+STATIC bool
	1033	+xfs_find_get_desired_pgoff(
	1034	+ struct inode *inode,
	1035	+ struct xfs_bmbt_irec *map,
	1036	+ unsigned int type,
	1037	+ loff_t *offset)
	1038	+{
	1039	+ struct xfs_inode *ip = XFS_I(inode);
	1040	+ struct xfs_mount *mp = ip->i_mount;
	1041	+ struct pagevec pvec;
	1042	+ pgoff_t index;
	1043	+ pgoff_t end;
	1044	+ loff_t endoff;
	1045	+ loff_t startoff = *offset;
	1046	+ loff_t lastoff = startoff;
	1047	+ bool found = false;
	1048	+
	1049	+ pagevec_init(&pvec, 0);
	1050	+
	1051	+ index = startoff >> PAGE_CACHE_SHIFT;
	1052	+ endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
	1053	+ end = endoff >> PAGE_CACHE_SHIFT;
	1054	+ do {
	1055	+ int want;
	1056	+ unsigned nr_pages;
	1057	+ unsigned int i;
	1058	+
	1059	+ want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
	1060	+ nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
	1061	+ want);
	1062	+ /*
	1063	+ * No page mapped into given range. If we are searching holes
	1064	+ * and if this is the first time we got into the loop, it means
	1065	+ * that the given offset is landed in a hole, return it.
	1066	+ *
	1067	+ * If we have already stepped through some block buffers to find
	1068	+ * holes but they all contains data. In this case, the last
	1069	+ * offset is already updated and pointed to the end of the last
	1070	+ * mapped page, if it does not reach the endpoint to search,
	1071	+ * that means there should be a hole between them.
	1072	+ */
	1073	+ if (nr_pages == 0) {
	1074	+ /* Data search found nothing */
	1075	+ if (type == DATA_OFF)
	1076	+ break;
	1077	+
	1078	+ ASSERT(type == HOLE_OFF);
	1079	+ if (lastoff == startoff \|\| lastoff < endoff) {
	1080	+ found = true;
	1081	+ *offset = lastoff;
	1082	+ }
	1083	+ break;
	1084	+ }
	1085	+
	1086	+ /*
	1087	+ * At lease we found one page. If this is the first time we
	1088	+ * step into the loop, and if the first page index offset is
	1089	+ * greater than the given search offset, a hole was found.
	1090	+ */
	1091	+ if (type == HOLE_OFF && lastoff == startoff &&
	1092	+ lastoff < page_offset(pvec.pages[0])) {
	1093	+ found = true;
	1094	+ break;
	1095	+ }
	1096	+
	1097	+ for (i = 0; i < nr_pages; i++) {
	1098	+ struct page *page = pvec.pages[i];
	1099	+ loff_t b_offset;
	1100	+
	1101	+ /*
	1102	+ * At this point, the page may be truncated or
	1103	+ * invalidated (changing page->mapping to NULL),
	1104	+ * or even swizzled back from swapper_space to tmpfs
	1105	+ * file mapping. However, page->index will not change
	1106	+ * because we have a reference on the page.
	1107	+ *
	1108	+ * Searching done if the page index is out of range.
	1109	+ * If the current offset is not reaches the end of
	1110	+ * the specified search range, there should be a hole
	1111	+ * between them.
	1112	+ */
	1113	+ if (page->index > end) {
	1114	+ if (type == HOLE_OFF && lastoff < endoff) {
	1115	+ *offset = lastoff;
	1116	+ found = true;
	1117	+ }
	1118	+ goto out;
	1119	+ }
	1120	+
	1121	+ lock_page(page);
	1122	+ /*
	1123	+ * Page truncated or invalidated(page->mapping == NULL).
	1124	+ * We can freely skip it and proceed to check the next
	1125	+ * page.
	1126	+ */
	1127	+ if (unlikely(page->mapping != inode->i_mapping)) {
	1128	+ unlock_page(page);
	1129	+ continue;
	1130	+ }
	1131	+
	1132	+ if (!page_has_buffers(page)) {
	1133	+ unlock_page(page);
	1134	+ continue;
	1135	+ }
	1136	+
	1137	+ found = xfs_lookup_buffer_offset(page, &b_offset, type);
	1138	+ if (found) {
	1139	+ /*
	1140	+ * The found offset may be less than the start
	1141	+ * point to search if this is the first time to
	1142	+ * come here.
	1143	+ */
	1144	+ *offset = max_t(loff_t, startoff, b_offset);
	1145	+ unlock_page(page);
	1146	+ goto out;
	1147	+ }
	1148	+
	1149	+ /*
	1150	+ * We either searching data but nothing was found, or
	1151	+ * searching hole but found a data buffer. In either
	1152	+ * case, probably the next page contains the desired
	1153	+ * things, update the last offset to it so.
	1154	+ */
	1155	+ lastoff = page_offset(page) + PAGE_SIZE;
	1156	+ unlock_page(page);
	1157	+ }
	1158	+
	1159	+ /*
	1160	+ * The number of returned pages less than our desired, search
	1161	+ * done. In this case, nothing was found for searching data,
	1162	+ * but we found a hole behind the last offset.
	1163	+ */
	1164	+ if (nr_pages < want) {
	1165	+ if (type == HOLE_OFF) {
	1166	+ *offset = lastoff;
	1167	+ found = true;
	1168	+ }
	1169	+ break;
	1170	+ }
	1171	+
	1172	+ index = pvec.pages[i - 1]->index + 1;
	1173	+ pagevec_release(&pvec);
	1174	+ } while (index <= end);
	1175	+
	1176	+out:
	1177	+ pagevec_release(&pvec);
	1178	+ return found;
	1179	+}
	1180	+
962	1181	STATIC loff_t
963	1182	xfs_seek_data(
964	1183	struct file *file,
965		- loff_t start,
966		- u32 type)
	1184	+ loff_t start)
967	1185	{
968	1186	struct inode *inode = file->f_mapping->host;
969	1187	struct xfs_inode *ip = XFS_I(inode);
970	1188	struct xfs_mount *mp = ip->i_mount;
971		- struct xfs_bmbt_irec map[2];
972		- int nmap = 2;
973	1189	loff_t uninitialized_var(offset);
974	1190	xfs_fsize_t isize;
975	1191	xfs_fileoff_t fsbno;
976	1192
977	1193
978	1194
979	1195
980	1196
981	1197
...	...	@@ -985,36 +1201,74 @@
985	1201	goto out_unlock;
986	1202	}
987	1203
988		- fsbno = XFS_B_TO_FSBT(mp, start);
989		-
990	1204	/*
991	1205	* Try to read extents from the first block indicated
992	1206	* by fsbno to the end block of the file.
993	1207	*/
	1208	+ fsbno = XFS_B_TO_FSBT(mp, start);
994	1209	end = XFS_B_TO_FSB(mp, isize);
	1210	+ for (;;) {
	1211	+ struct xfs_bmbt_irec map[2];
	1212	+ int nmap = 2;
	1213	+ unsigned int i;
995	1214
996		- error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
997		- XFS_BMAPI_ENTIRE);
998		- if (error)
999		- goto out_unlock;
	1215	+ error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
	1216	+ XFS_BMAPI_ENTIRE);
	1217	+ if (error)
	1218	+ goto out_unlock;
1000	1219
1001		- /*
1002		- * Treat unwritten extent as data extent since it might
1003		- * contains dirty data in page cache.
1004		- */
1005		- if (map[0].br_startblock != HOLESTARTBLOCK) {
1006		- offset = max_t(loff_t, start,
1007		- XFS_FSB_TO_B(mp, map[0].br_startoff));
1008		- } else {
	1220	+ /* No extents at given offset, must be beyond EOF */
	1221	+ if (nmap == 0) {
	1222	+ error = ENXIO;
	1223	+ goto out_unlock;
	1224	+ }
	1225	+
	1226	+ for (i = 0; i < nmap; i++) {
	1227	+ offset = max_t(loff_t, start,
	1228	+ XFS_FSB_TO_B(mp, map[i].br_startoff));
	1229	+
	1230	+ /* Landed in a data extent */
	1231	+ if (map[i].br_startblock == DELAYSTARTBLOCK \|\|
	1232	+ (map[i].br_state == XFS_EXT_NORM &&
	1233	+ !isnullstartblock(map[i].br_startblock)))
	1234	+ goto out;
	1235	+
	1236	+ /*
	1237	+ * Landed in an unwritten extent, try to search data
	1238	+ * from page cache.
	1239	+ */
	1240	+ if (map[i].br_state == XFS_EXT_UNWRITTEN) {
	1241	+ if (xfs_find_get_desired_pgoff(inode, &map[i],
	1242	+ DATA_OFF, &offset))
	1243	+ goto out;
	1244	+ }
	1245	+ }
	1246	+
	1247	+ /*
	1248	+ * map[0] is hole or its an unwritten extent but
	1249	+ * without data in page cache. Probably means that
	1250	+ * we are reading after EOF if nothing in map[1].
	1251	+ */
1009	1252	if (nmap == 1) {
1010	1253	error = ENXIO;
1011	1254	goto out_unlock;
1012	1255	}
1013	1256
1014		- offset = max_t(loff_t, start,
1015		- XFS_FSB_TO_B(mp, map[1].br_startoff));
	1257	+ ASSERT(i > 1);
	1258	+
	1259	+ /*
	1260	+ * Nothing was found, proceed to the next round of search
	1261	+ * if reading offset not beyond or hit EOF.
	1262	+ */
	1263	+ fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
	1264	+ start = XFS_FSB_TO_B(mp, fsbno);
	1265	+ if (start >= isize) {
	1266	+ error = ENXIO;
	1267	+ goto out_unlock;
	1268	+ }
1016	1269	}
1017	1270
	1271	+out:
1018	1272	if (offset != file->f_pos)
1019	1273	file->f_pos = offset;
1020	1274
1021	1275
1022	1276
...	...	@@ -1029,16 +1283,15 @@
1029	1283	STATIC loff_t
1030	1284	xfs_seek_hole(
1031	1285	struct file *file,
1032		- loff_t start,
1033		- u32 type)
	1286	+ loff_t start)
1034	1287	{
1035	1288	struct inode *inode = file->f_mapping->host;
1036	1289	struct xfs_inode *ip = XFS_I(inode);
1037	1290	struct xfs_mount *mp = ip->i_mount;
1038	1291	loff_t uninitialized_var(offset);
1039		- loff_t holeoff;
1040	1292	xfs_fsize_t isize;
1041	1293	xfs_fileoff_t fsbno;
	1294	+ xfs_filblks_t end;
1042	1295	uint lock;
1043	1296	int error;
1044	1297
1045	1298
1046	1299
1047	1300
1048	1301
...	...	@@ -1054,21 +1307,77 @@
1054	1307	}
1055	1308
1056	1309	fsbno = XFS_B_TO_FSBT(mp, start);
1057		- error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
1058		- if (error)
1059		- goto out_unlock;
	1310	+ end = XFS_B_TO_FSB(mp, isize);
1060	1311
1061		- holeoff = XFS_FSB_TO_B(mp, fsbno);
1062		- if (holeoff <= start)
1063		- offset = start;
1064		- else {
	1312	+ for (;;) {
	1313	+ struct xfs_bmbt_irec map[2];
	1314	+ int nmap = 2;
	1315	+ unsigned int i;
	1316	+
	1317	+ error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
	1318	+ XFS_BMAPI_ENTIRE);
	1319	+ if (error)
	1320	+ goto out_unlock;
	1321	+
	1322	+ /* No extents at given offset, must be beyond EOF */
	1323	+ if (nmap == 0) {
	1324	+ error = ENXIO;
	1325	+ goto out_unlock;
	1326	+ }
	1327	+
	1328	+ for (i = 0; i < nmap; i++) {
	1329	+ offset = max_t(loff_t, start,
	1330	+ XFS_FSB_TO_B(mp, map[i].br_startoff));
	1331	+
	1332	+ /* Landed in a hole */
	1333	+ if (map[i].br_startblock == HOLESTARTBLOCK)
	1334	+ goto out;
	1335	+
	1336	+ /*
	1337	+ * Landed in an unwritten extent, try to search hole
	1338	+ * from page cache.
	1339	+ */
	1340	+ if (map[i].br_state == XFS_EXT_UNWRITTEN) {
	1341	+ if (xfs_find_get_desired_pgoff(inode, &map[i],
	1342	+ HOLE_OFF, &offset))
	1343	+ goto out;
	1344	+ }
	1345	+ }
	1346	+
1065	1347	/*
1066		- * xfs_bmap_first_unused() could return a value bigger than
1067		- * isize if there are no more holes past the supplied offset.
	1348	+ * map[0] contains data or its unwritten but contains
	1349	+ * data in page cache, probably means that we are
	1350	+ * reading after EOF. We should fix offset to point
	1351	+ * to the end of the file(i.e., there is an implicit
	1352	+ * hole at the end of any file).
1068	1353	*/
1069		- offset = min_t(loff_t, holeoff, isize);
	1354	+ if (nmap == 1) {
	1355	+ offset = isize;
	1356	+ break;
	1357	+ }
	1358	+
	1359	+ ASSERT(i > 1);
	1360	+
	1361	+ /*
	1362	+ * Both mappings contains data, proceed to the next round of
	1363	+ * search if the current reading offset not beyond or hit EOF.
	1364	+ */
	1365	+ fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
	1366	+ start = XFS_FSB_TO_B(mp, fsbno);
	1367	+ if (start >= isize) {
	1368	+ offset = isize;
	1369	+ break;
	1370	+ }
1070	1371	}
1071	1372
	1373	+out:
	1374	+ /*
	1375	+ * At this point, we must have found a hole. However, the returned
	1376	+ * offset may be bigger than the file size as it may be aligned to
	1377	+ * page boundary for unwritten extents, we need to deal with this
	1378	+ * situation in particular.
	1379	+ */
	1380	+ offset = min_t(loff_t, offset, isize);
1072	1381	if (offset != file->f_pos)
1073	1382	file->f_pos = offset;
1074	1383
1075	1384
...	...	@@ -1092,9 +1401,9 @@
1092	1401	case SEEK_SET:
1093	1402	return generic_file_llseek(file, offset, origin);
1094	1403	case SEEK_DATA:
1095		- return xfs_seek_data(file, offset, origin);
	1404	+ return xfs_seek_data(file, offset);
1096	1405	case SEEK_HOLE:
1097		- return xfs_seek_hole(file, offset, origin);
	1406	+ return xfs_seek_hole(file, offset);
1098	1407	default:
1099	1408	return -EINVAL;
1100	1409	}
...	...	@@ -431,7 +431,7 @@
431	431
432	432	spin_lock(&mp->m_agirotor_lock);
433	433	agno = mp->m_agirotor;
434		- if (++mp->m_agirotor == mp->m_maxagi)
	434	+ if (++mp->m_agirotor >= mp->m_maxagi)
435	435	mp->m_agirotor = 0;
436	436	spin_unlock(&mp->m_agirotor_lock);
437	437
...	...	@@ -440,7 +440,7 @@
440	440	xfs_agnumber_t agcount,
441	441	xfs_agnumber_t *maxagi)
442	442	{
443		- xfs_agnumber_t index, max_metadata;
	443	+ xfs_agnumber_t index;
444	444	xfs_agnumber_t first_initialised = 0;
445	445	xfs_perag_t *pag;
446	446	xfs_agino_t agino;
...	...	@@ -500,43 +500,10 @@
500	500	else
501	501	mp->m_flags &= ~XFS_MOUNT_32BITINODES;
502	502
503		- if (mp->m_flags & XFS_MOUNT_32BITINODES) {
504		- /*
505		- * Calculate how much should be reserved for inodes to meet
506		- * the max inode percentage.
507		- */
508		- if (mp->m_maxicount) {
509		- __uint64_t icount;
510		-
511		- icount = sbp->sb_dblocks * sbp->sb_imax_pct;
512		- do_div(icount, 100);
513		- icount += sbp->sb_agblocks - 1;
514		- do_div(icount, sbp->sb_agblocks);
515		- max_metadata = icount;
516		- } else {
517		- max_metadata = agcount;
518		- }
519		-
520		- for (index = 0; index < agcount; index++) {
521		- ino = XFS_AGINO_TO_INO(mp, index, agino);
522		- if (ino > XFS_MAXINUMBER_32) {
523		- index++;
524		- break;
525		- }
526		-
527		- pag = xfs_perag_get(mp, index);
528		- pag->pagi_inodeok = 1;
529		- if (index < max_metadata)
530		- pag->pagf_metadata = 1;
531		- xfs_perag_put(pag);
532		- }
533		- } else {
534		- for (index = 0; index < agcount; index++) {
535		- pag = xfs_perag_get(mp, index);
536		- pag->pagi_inodeok = 1;
537		- xfs_perag_put(pag);
538		- }
539		- }
	503	+ if (mp->m_flags & XFS_MOUNT_32BITINODES)
	504	+ index = xfs_set_inode32(mp);
	505	+ else
	506	+ index = xfs_set_inode64(mp);
540	507
541	508	if (maxagi)
542	509	*maxagi = index;
...	...	@@ -54,12 +54,7 @@
54	54	#include "xfs_sync.h"
55	55
56	56	struct xlog;
57		-struct xfs_mount_args;
58	57	struct xfs_inode;
59		-struct xfs_bmbt_irec;
60		-struct xfs_bmap_free;
61		-struct xfs_extdelta;
62		-struct xfs_swapext;
63	58	struct xfs_mru_cache;
64	59	struct xfs_nameops;
65	60	struct xfs_ail;
...	...	@@ -88,6 +88,8 @@
88	88	* unwritten extent conversion */
89	89	#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
90	90	#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
	91	+#define MNTOPT_32BITINODE "inode32" /* inode allocation limited to
	92	+ * XFS_MAXINUMBER_32 */
91	93	#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
92	94	#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
93	95	#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
94	96
...	...	@@ -120,12 +122,18 @@
120	122	* in the future, too.
121	123	*/
122	124	enum {
123		- Opt_barrier, Opt_nobarrier, Opt_err
	125	+ Opt_barrier,
	126	+ Opt_nobarrier,
	127	+ Opt_inode64,
	128	+ Opt_inode32,
	129	+ Opt_err
124	130	};
125	131
126	132	static const match_table_t tokens = {
127	133	{Opt_barrier, "barrier"},
128	134	{Opt_nobarrier, "nobarrier"},
	135	+ {Opt_inode64, "inode64"},
	136	+ {Opt_inode32, "inode32"},
129	137	{Opt_err, NULL}
130	138	};
131	139
132	140
...	...	@@ -197,7 +205,9 @@
197	205	*/
198	206	mp->m_flags \|= XFS_MOUNT_BARRIER;
199	207	mp->m_flags \|= XFS_MOUNT_COMPAT_IOSIZE;
	208	+#if !XFS_BIG_INUMS
200	209	mp->m_flags \|= XFS_MOUNT_SMALL_INUMS;
	210	+#endif
201	211
202	212	/*
203	213	* These can be overridden by the mount option parsing.
...	...	@@ -294,6 +304,8 @@
294	304	return EINVAL;
295	305	}
296	306	dswidth = simple_strtoul(value, &eov, 10);
	307	+ } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
	308	+ mp->m_flags \|= XFS_MOUNT_SMALL_INUMS;
297	309	} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
298	310	mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
299	311	#if !XFS_BIG_INUMS
...	...	@@ -492,6 +504,7 @@
492	504	{ XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
493	505	{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
494	506	{ XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
	507	+ { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
495	508	{ 0, NULL }
496	509	};
497	510	static struct proc_xfs_info xfs_info_unset[] = {
...	...	@@ -591,6 +604,80 @@
591	604	return (((__uint64_t)pagefactor) << bitshift) - 1;
592	605	}
593	606
	607	+xfs_agnumber_t
	608	+xfs_set_inode32(struct xfs_mount *mp)
	609	+{
	610	+ xfs_agnumber_t index = 0;
	611	+ xfs_agnumber_t maxagi = 0;
	612	+ xfs_sb_t *sbp = &mp->m_sb;
	613	+ xfs_agnumber_t max_metadata;
	614	+ xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
	615	+ xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
	616	+ xfs_perag_t *pag;
	617	+
	618	+ /* Calculate how much should be reserved for inodes to meet
	619	+ * the max inode percentage.
	620	+ */
	621	+ if (mp->m_maxicount) {
	622	+ __uint64_t icount;
	623	+
	624	+ icount = sbp->sb_dblocks * sbp->sb_imax_pct;
	625	+ do_div(icount, 100);
	626	+ icount += sbp->sb_agblocks - 1;
	627	+ do_div(icount, sbp->sb_agblocks);
	628	+ max_metadata = icount;
	629	+ } else {
	630	+ max_metadata = sbp->sb_agcount;
	631	+ }
	632	+
	633	+ for (index = 0; index < sbp->sb_agcount; index++) {
	634	+ ino = XFS_AGINO_TO_INO(mp, index, agino);
	635	+
	636	+ if (ino > XFS_MAXINUMBER_32) {
	637	+ pag = xfs_perag_get(mp, index);
	638	+ pag->pagi_inodeok = 0;
	639	+ pag->pagf_metadata = 0;
	640	+ xfs_perag_put(pag);
	641	+ continue;
	642	+ }
	643	+
	644	+ pag = xfs_perag_get(mp, index);
	645	+ pag->pagi_inodeok = 1;
	646	+ maxagi++;
	647	+ if (index < max_metadata)
	648	+ pag->pagf_metadata = 1;
	649	+ xfs_perag_put(pag);
	650	+ }
	651	+ mp->m_flags \|= (XFS_MOUNT_32BITINODES \|
	652	+ XFS_MOUNT_SMALL_INUMS);
	653	+
	654	+ return maxagi;
	655	+}
	656	+
	657	+xfs_agnumber_t
	658	+xfs_set_inode64(struct xfs_mount *mp)
	659	+{
	660	+ xfs_agnumber_t index = 0;
	661	+
	662	+ for (index = 0; index < mp->m_sb.sb_agcount; index++) {
	663	+ struct xfs_perag *pag;
	664	+
	665	+ pag = xfs_perag_get(mp, index);
	666	+ pag->pagi_inodeok = 1;
	667	+ pag->pagf_metadata = 0;
	668	+ xfs_perag_put(pag);
	669	+ }
	670	+
	671	+ /* There is no need for lock protection on m_flags,
	672	+ * the rw_semaphore of the VFS superblock is locked
	673	+ * during mount/umount/remount operations, so this is
	674	+ * enough to avoid concurency on the m_flags field
	675	+ */
	676	+ mp->m_flags &= ~(XFS_MOUNT_32BITINODES \|
	677	+ XFS_MOUNT_SMALL_INUMS);
	678	+ return index;
	679	+}
	680	+
594	681	STATIC int
595	682	xfs_blkdev_get(
596	683	xfs_mount_t *mp,
...	...	@@ -1055,6 +1142,12 @@
1055	1142	break;
1056	1143	case Opt_nobarrier:
1057	1144	mp->m_flags &= ~XFS_MOUNT_BARRIER;
	1145	+ break;
	1146	+ case Opt_inode64:
	1147	+ mp->m_maxagi = xfs_set_inode64(mp);
	1148	+ break;
	1149	+ case Opt_inode32:
	1150	+ mp->m_maxagi = xfs_set_inode32(mp);
1058	1151	break;
1059	1152	default:
1060	1153	/*
...	...	@@ -75,6 +75,8 @@
75	75	extern __uint64_t xfs_max_file_offset(unsigned int);
76	76
77	77	extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
	78	+extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
	79	+extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
78	80
79	81	extern const struct export_operations xfs_export_operations;
80	82	extern const struct xattr_handler *xfs_xattr_handlers[];
...	...	@@ -37,6 +37,7 @@
37	37	struct xlog_recover_item;
38	38	struct xfs_buf_log_format;
39	39	struct xfs_inode_log_format;
	40	+struct xfs_bmbt_irec;
40	41
41	42	DECLARE_EVENT_CLASS(xfs_attr_list_class,
42	43	TP_PROTO(struct xfs_attr_list_context *ctx),