Commit 9dbe9610b9df4efe0946299804ed46bb8f91dec2

Authored by Steven Whitehouse
1 parent c9aecf7371

GFS2: Add Orlov allocator

Just like ext3, this works on the root directory and any directory
with the +T flag set. Also, just like ext3, any subdirectory created
in one of the just mentioned cases will be allocated to a random
resource group (GFS2 equivalent of a block group).

If you are creating a set of directories, each of which will contain a
job running on a different node, then by setting +T on the parent
directory before creating the subdirectories, each will land up in a
different resource group, and thus resource group contention between
nodes will be kept to a minimum.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

Showing 8 changed files with 38 additions and 15 deletions Side-by-side Diff

... ... @@ -643,7 +643,7 @@
643 643 goto out_unlock;
644 644  
645 645 requested = data_blocks + ind_blocks;
646   - error = gfs2_inplace_reserve(ip, requested);
  646 + error = gfs2_inplace_reserve(ip, requested, 0);
647 647 if (error)
648 648 goto out_qunlock;
649 649 }
... ... @@ -1178,7 +1178,7 @@
1178 1178 if (error)
1179 1179 return error;
1180 1180  
1181   - error = gfs2_inplace_reserve(ip, 1);
  1181 + error = gfs2_inplace_reserve(ip, 1, 0);
1182 1182 if (error)
1183 1183 goto do_grow_qunlock;
1184 1184 unstuff = 1;
... ... @@ -432,7 +432,7 @@
432 432 if (ret)
433 433 goto out_unlock;
434 434 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
435   - ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
  435 + ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
436 436 if (ret)
437 437 goto out_quota_unlock;
438 438  
... ... @@ -825,7 +825,7 @@
825 825 retry:
826 826 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
827 827  
828   - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
  828 + error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
829 829 if (error) {
830 830 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
831 831 bytes >>= 1;
... ... @@ -385,13 +385,13 @@
385 385 inode->i_gid = current_fsgid();
386 386 }
387 387  
388   -static int alloc_dinode(struct gfs2_inode *ip)
  388 +static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
389 389 {
390 390 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
391 391 int error;
392 392 int dblocks = 1;
393 393  
394   - error = gfs2_inplace_reserve(ip, RES_DINODE);
  394 + error = gfs2_inplace_reserve(ip, RES_DINODE, flags);
395 395 if (error)
396 396 goto out;
397 397  
... ... @@ -560,7 +560,7 @@
560 560 if (error)
561 561 goto fail_quota_locks;
562 562  
563   - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
  563 + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
564 564 if (error)
565 565 goto fail_quota_locks;
566 566  
... ... @@ -650,6 +650,7 @@
650 650 struct gfs2_glock *io_gl;
651 651 int error;
652 652 struct buffer_head *bh = NULL;
  653 + u32 aflags = 0;
653 654  
654 655 if (!name->len || name->len > GFS2_FNAMESIZE)
655 656 return -ENAMETOOLONG;
... ... @@ -685,7 +686,11 @@
685 686 munge_mode_uid_gid(dip, inode);
686 687 ip->i_goal = dip->i_goal;
687 688  
688   - error = alloc_dinode(ip);
  689 + if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) ||
  690 + (dip->i_diskflags & GFS2_DIF_TOPDIR))
  691 + aflags |= GFS2_AF_ORLOV;
  692 +
  693 + error = alloc_dinode(ip, aflags);
689 694 if (error)
690 695 goto fail_free_inode;
691 696  
... ... @@ -897,7 +902,7 @@
897 902 if (error)
898 903 goto out_gunlock;
899 904  
900   - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
  905 + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
901 906 if (error)
902 907 goto out_gunlock_q;
903 908  
... ... @@ -1378,7 +1383,7 @@
1378 1383 if (error)
1379 1384 goto out_gunlock;
1380 1385  
1381   - error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres);
  1386 + error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0);
1382 1387 if (error)
1383 1388 goto out_gunlock_q;
1384 1389  
... ... @@ -816,7 +816,7 @@
816 816 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
817 817  
818 818 reserved = 1 + (nalloc * (data_blocks + ind_blocks));
819   - error = gfs2_inplace_reserve(ip, reserved);
  819 + error = gfs2_inplace_reserve(ip, reserved, 0);
820 820 if (error)
821 821 goto out_alloc;
822 822  
... ... @@ -1605,7 +1605,7 @@
1605 1605 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
1606 1606 &data_blocks, &ind_blocks);
1607 1607 blocks = 1 + data_blocks + ind_blocks;
1608   - error = gfs2_inplace_reserve(ip, blocks);
  1608 + error = gfs2_inplace_reserve(ip, blocks, 0);
1609 1609 if (error)
1610 1610 goto out_i;
1611 1611 blocks += gfs2_rg_blocks(ip, blocks);
... ... @@ -16,6 +16,7 @@
16 16 #include <linux/prefetch.h>
17 17 #include <linux/blkdev.h>
18 18 #include <linux/rbtree.h>
  19 +#include <linux/random.h>
19 20  
20 21 #include "gfs2.h"
21 22 #include "incore.h"
... ... @@ -1763,6 +1764,15 @@
1763 1764 return tdiff > (msecs * 1000 * 1000);
1764 1765 }
1765 1766  
  1767 +static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
  1768 +{
  1769 + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
  1770 + u32 skip;
  1771 +
  1772 + get_random_bytes(&skip, sizeof(skip));
  1773 + return skip % sdp->sd_rgrps;
  1774 +}
  1775 +
1766 1776 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1767 1777 {
1768 1778 struct gfs2_rgrpd *rgd = *pos;
... ... @@ -1784,7 +1794,7 @@
1784 1794 * Returns: errno
1785 1795 */
1786 1796  
1787   -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
  1797 +int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags)
1788 1798 {
1789 1799 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1790 1800 struct gfs2_rgrpd *begin = NULL;
... ... @@ -1792,6 +1802,7 @@
1792 1802 int error = 0, rg_locked, flags = 0;
1793 1803 u64 last_unlinked = NO_BLOCK;
1794 1804 int loops = 0;
  1805 + u32 skip = 0;
1795 1806  
1796 1807 if (sdp->sd_args.ar_rgrplvb)
1797 1808 flags |= GL_SKIP;
... ... @@ -1805,6 +1816,8 @@
1805 1816 } else {
1806 1817 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1807 1818 }
  1819 + if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV))
  1820 + skip = gfs2_orlov_skip(ip);
1808 1821 if (rs->rs_rbm.rgd == NULL)
1809 1822 return -EBADSLT;
1810 1823  
... ... @@ -1813,6 +1826,8 @@
1813 1826  
1814 1827 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1815 1828 rg_locked = 0;
  1829 + if (skip && skip--)
  1830 + goto next_rgrp;
1816 1831 if (!gfs2_rs_active(rs) && (loops < 2) &&
1817 1832 gfs2_rgrp_used_recently(rs, 1000) &&
1818 1833 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
... ... @@ -1870,6 +1885,8 @@
1870 1885 next_rgrp:
1871 1886 /* Find the next rgrp, and continue looking */
1872 1887 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
  1888 + continue;
  1889 + if (skip)
1873 1890 continue;
1874 1891  
1875 1892 /* If we've scanned all the rgrps, but found no free blocks
... ... @@ -39,7 +39,8 @@
39 39  
40 40 extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
41 41  
42   -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
  42 +#define GFS2_AF_ORLOV 1
  43 +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags);
43 44 extern void gfs2_inplace_release(struct gfs2_inode *ip);
44 45  
45 46 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
... ... @@ -734,7 +734,7 @@
734 734 if (error)
735 735 return error;
736 736  
737   - error = gfs2_inplace_reserve(ip, blks);
  737 + error = gfs2_inplace_reserve(ip, blks, 0);
738 738 if (error)
739 739 goto out_gunlock_q;
740 740