Commit 9dbe9610b9df4efe0946299804ed46bb8f91dec2
1 parent
c9aecf7371
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
GFS2: Add Orlov allocator
Just like ext3, this works on the root directory and any directory with the +T flag set. Also, just like ext3, any subdirectory created in one of the just mentioned cases will be allocated to a random resource group (GFS2 equivalent of a block group). If you are creating a set of directories, each of which will contain a job running on a different node, then by setting +T on the parent directory before creating the subdirectories, each will land up in a different resource group, and thus resource group contention between nodes will be kept to a minimum. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Showing 8 changed files with 38 additions and 15 deletions Side-by-side Diff
fs/gfs2/aops.c
fs/gfs2/bmap.c
fs/gfs2/file.c
... | ... | @@ -432,7 +432,7 @@ |
432 | 432 | if (ret) |
433 | 433 | goto out_unlock; |
434 | 434 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); |
435 | - ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); | |
435 | + ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); | |
436 | 436 | if (ret) |
437 | 437 | goto out_quota_unlock; |
438 | 438 | |
... | ... | @@ -825,7 +825,7 @@ |
825 | 825 | retry: |
826 | 826 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); |
827 | 827 | |
828 | - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); | |
828 | + error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); | |
829 | 829 | if (error) { |
830 | 830 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { |
831 | 831 | bytes >>= 1; |
fs/gfs2/inode.c
... | ... | @@ -385,13 +385,13 @@ |
385 | 385 | inode->i_gid = current_fsgid(); |
386 | 386 | } |
387 | 387 | |
388 | -static int alloc_dinode(struct gfs2_inode *ip) | |
388 | +static int alloc_dinode(struct gfs2_inode *ip, u32 flags) | |
389 | 389 | { |
390 | 390 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
391 | 391 | int error; |
392 | 392 | int dblocks = 1; |
393 | 393 | |
394 | - error = gfs2_inplace_reserve(ip, RES_DINODE); | |
394 | + error = gfs2_inplace_reserve(ip, RES_DINODE, flags); | |
395 | 395 | if (error) |
396 | 396 | goto out; |
397 | 397 | |
... | ... | @@ -560,7 +560,7 @@ |
560 | 560 | if (error) |
561 | 561 | goto fail_quota_locks; |
562 | 562 | |
563 | - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); | |
563 | + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); | |
564 | 564 | if (error) |
565 | 565 | goto fail_quota_locks; |
566 | 566 | |
... | ... | @@ -650,6 +650,7 @@ |
650 | 650 | struct gfs2_glock *io_gl; |
651 | 651 | int error; |
652 | 652 | struct buffer_head *bh = NULL; |
653 | + u32 aflags = 0; | |
653 | 654 | |
654 | 655 | if (!name->len || name->len > GFS2_FNAMESIZE) |
655 | 656 | return -ENAMETOOLONG; |
... | ... | @@ -685,7 +686,11 @@ |
685 | 686 | munge_mode_uid_gid(dip, inode); |
686 | 687 | ip->i_goal = dip->i_goal; |
687 | 688 | |
688 | - error = alloc_dinode(ip); | |
689 | + if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) || | |
690 | + (dip->i_diskflags & GFS2_DIF_TOPDIR)) | |
691 | + aflags |= GFS2_AF_ORLOV; | |
692 | + | |
693 | + error = alloc_dinode(ip, aflags); | |
689 | 694 | if (error) |
690 | 695 | goto fail_free_inode; |
691 | 696 | |
... | ... | @@ -897,7 +902,7 @@ |
897 | 902 | if (error) |
898 | 903 | goto out_gunlock; |
899 | 904 | |
900 | - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); | |
905 | + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); | |
901 | 906 | if (error) |
902 | 907 | goto out_gunlock_q; |
903 | 908 | |
... | ... | @@ -1378,7 +1383,7 @@ |
1378 | 1383 | if (error) |
1379 | 1384 | goto out_gunlock; |
1380 | 1385 | |
1381 | - error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); | |
1386 | + error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); | |
1382 | 1387 | if (error) |
1383 | 1388 | goto out_gunlock_q; |
1384 | 1389 |
fs/gfs2/quota.c
... | ... | @@ -816,7 +816,7 @@ |
816 | 816 | blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; |
817 | 817 | |
818 | 818 | reserved = 1 + (nalloc * (data_blocks + ind_blocks)); |
819 | - error = gfs2_inplace_reserve(ip, reserved); | |
819 | + error = gfs2_inplace_reserve(ip, reserved, 0); | |
820 | 820 | if (error) |
821 | 821 | goto out_alloc; |
822 | 822 | |
... | ... | @@ -1605,7 +1605,7 @@ |
1605 | 1605 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), |
1606 | 1606 | &data_blocks, &ind_blocks); |
1607 | 1607 | blocks = 1 + data_blocks + ind_blocks; |
1608 | - error = gfs2_inplace_reserve(ip, blocks); | |
1608 | + error = gfs2_inplace_reserve(ip, blocks, 0); | |
1609 | 1609 | if (error) |
1610 | 1610 | goto out_i; |
1611 | 1611 | blocks += gfs2_rg_blocks(ip, blocks); |
fs/gfs2/rgrp.c
... | ... | @@ -16,6 +16,7 @@ |
16 | 16 | #include <linux/prefetch.h> |
17 | 17 | #include <linux/blkdev.h> |
18 | 18 | #include <linux/rbtree.h> |
19 | +#include <linux/random.h> | |
19 | 20 | |
20 | 21 | #include "gfs2.h" |
21 | 22 | #include "incore.h" |
... | ... | @@ -1763,6 +1764,15 @@ |
1763 | 1764 | return tdiff > (msecs * 1000 * 1000); |
1764 | 1765 | } |
1765 | 1766 | |
1767 | +static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) | |
1768 | +{ | |
1769 | + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | |
1770 | + u32 skip; | |
1771 | + | |
1772 | + get_random_bytes(&skip, sizeof(skip)); | |
1773 | + return skip % sdp->sd_rgrps; | |
1774 | +} | |
1775 | + | |
1766 | 1776 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) |
1767 | 1777 | { |
1768 | 1778 | struct gfs2_rgrpd *rgd = *pos; |
... | ... | @@ -1784,7 +1794,7 @@ |
1784 | 1794 | * Returns: errno |
1785 | 1795 | */ |
1786 | 1796 | |
1787 | -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |
1797 | +int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) | |
1788 | 1798 | { |
1789 | 1799 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1790 | 1800 | struct gfs2_rgrpd *begin = NULL; |
... | ... | @@ -1792,6 +1802,7 @@ |
1792 | 1802 | int error = 0, rg_locked, flags = 0; |
1793 | 1803 | u64 last_unlinked = NO_BLOCK; |
1794 | 1804 | int loops = 0; |
1805 | + u32 skip = 0; | |
1795 | 1806 | |
1796 | 1807 | if (sdp->sd_args.ar_rgrplvb) |
1797 | 1808 | flags |= GL_SKIP; |
... | ... | @@ -1805,6 +1816,8 @@ |
1805 | 1816 | } else { |
1806 | 1817 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); |
1807 | 1818 | } |
1819 | + if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) | |
1820 | + skip = gfs2_orlov_skip(ip); | |
1808 | 1821 | if (rs->rs_rbm.rgd == NULL) |
1809 | 1822 | return -EBADSLT; |
1810 | 1823 | |
... | ... | @@ -1813,6 +1826,8 @@ |
1813 | 1826 | |
1814 | 1827 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { |
1815 | 1828 | rg_locked = 0; |
1829 | + if (skip && skip--) | |
1830 | + goto next_rgrp; | |
1816 | 1831 | if (!gfs2_rs_active(rs) && (loops < 2) && |
1817 | 1832 | gfs2_rgrp_used_recently(rs, 1000) && |
1818 | 1833 | gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) |
... | ... | @@ -1870,6 +1885,8 @@ |
1870 | 1885 | next_rgrp: |
1871 | 1886 | /* Find the next rgrp, and continue looking */ |
1872 | 1887 | if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) |
1888 | + continue; | |
1889 | + if (skip) | |
1873 | 1890 | continue; |
1874 | 1891 | |
1875 | 1892 | /* If we've scanned all the rgrps, but found no free blocks |
fs/gfs2/rgrp.h
... | ... | @@ -39,7 +39,8 @@ |
39 | 39 | |
40 | 40 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); |
41 | 41 | |
42 | -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); | |
42 | +#define GFS2_AF_ORLOV 1 | |
43 | +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); | |
43 | 44 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
44 | 45 | |
45 | 46 | extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, |