Commit f21ce8f8447c8be8847dadcfdbcc76b0d7365fa5

Authored by Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

Pull XFS update (part 2) from Ben Myers:
 "Fixes for tracing of xfs_name strings, flag handling in
  open_by_handle, a log space hang with freeze/unfreeze, fstrim offset
  calculations, a section mismatch with xfs_qm_exit, an oops in
  xlog_recover_process_iunlinks, and a deadlock in xfs_rtfree_extent.

  There are also additional trace points for attributes, and the
  addition of a workqueue for allocation to work around kernel stack
  size limitations."

* 'for-linus' of git://oss.sgi.com/xfs/xfs:
  xfs: add lots of attribute trace points
  xfs: Fix oops on IO error during xlog_recover_process_iunlinks()
  xfs: fix fstrim offset calculations
  xfs: Account log unmount transaction correctly
  xfs: don't cache inodes read through bulkstat
  xfs: trace xfs_name strings correctly
  xfs: introduce an allocation workqueue
  xfs: Fix open flag handling in open_by_handle code
  xfs: fix deadlock in xfs_rtfree_extent
  fs: xfs: fix section mismatch in linux-next

Showing 17 changed files Side-by-side Diff

... ... @@ -35,6 +35,7 @@
35 35 #include "xfs_error.h"
36 36 #include "xfs_trace.h"
37 37  
  38 +struct workqueue_struct *xfs_alloc_wq;
38 39  
39 40 #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
40 41  
... ... @@ -68,7 +69,7 @@
68 69 * Lookup the first record greater than or equal to [bno, len]
69 70 * in the btree given by cur.
70 71 */
71   -STATIC int /* error */
  72 +int /* error */
72 73 xfs_alloc_lookup_ge(
73 74 struct xfs_btree_cur *cur, /* btree cursor */
74 75 xfs_agblock_t bno, /* starting block of extent */
... ... @@ -2207,7 +2208,7 @@
2207 2208 * group or loop over the allocation groups to find the result.
2208 2209 */
2209 2210 int /* error */
2210   -xfs_alloc_vextent(
  2211 +__xfs_alloc_vextent(
2211 2212 xfs_alloc_arg_t *args) /* allocation argument structure */
2212 2213 {
2213 2214 xfs_agblock_t agsize; /* allocation group size */
... ... @@ -2415,6 +2416,37 @@
2415 2416 error0:
2416 2417 xfs_perag_put(args->pag);
2417 2418 return error;
  2419 +}
  2420 +
  2421 +static void
  2422 +xfs_alloc_vextent_worker(
  2423 + struct work_struct *work)
  2424 +{
  2425 + struct xfs_alloc_arg *args = container_of(work,
  2426 + struct xfs_alloc_arg, work);
  2427 + unsigned long pflags;
  2428 +
  2429 + /* we are in a transaction context here */
  2430 + current_set_flags_nested(&pflags, PF_FSTRANS);
  2431 +
  2432 + args->result = __xfs_alloc_vextent(args);
  2433 + complete(args->done);
  2434 +
  2435 + current_restore_flags_nested(&pflags, PF_FSTRANS);
  2436 +}
  2437 +
  2438 +
  2439 +int /* error */
  2440 +xfs_alloc_vextent(
  2441 + xfs_alloc_arg_t *args) /* allocation argument structure */
  2442 +{
  2443 + DECLARE_COMPLETION_ONSTACK(done);
  2444 +
  2445 + args->done = &done;
  2446 + INIT_WORK(&args->work, xfs_alloc_vextent_worker);
  2447 + queue_work(xfs_alloc_wq, &args->work);
  2448 + wait_for_completion(&done);
  2449 + return args->result;
2418 2450 }
2419 2451  
2420 2452 /*
... ... @@ -25,6 +25,8 @@
25 25 struct xfs_trans;
26 26 struct xfs_busy_extent;
27 27  
  28 +extern struct workqueue_struct *xfs_alloc_wq;
  29 +
28 30 /*
29 31 * Freespace allocation types. Argument to xfs_alloc_[v]extent.
30 32 */
... ... @@ -119,6 +121,9 @@
119 121 char isfl; /* set if is freelist blocks - !acctg */
120 122 char userdata; /* set if this is user data */
121 123 xfs_fsblock_t firstblock; /* io first block allocated */
  124 + struct completion *done;
  125 + struct work_struct work;
  126 + int result;
122 127 } xfs_alloc_arg_t;
123 128  
124 129 /*
... ... @@ -238,6 +243,13 @@
238 243  
239 244 int /* error */
240 245 xfs_alloc_lookup_le(
  246 + struct xfs_btree_cur *cur, /* btree cursor */
  247 + xfs_agblock_t bno, /* starting block of extent */
  248 + xfs_extlen_t len, /* length of extent */
  249 + int *stat); /* success/failure */
  250 +
  251 +int /* error */
  252 +xfs_alloc_lookup_ge(
241 253 struct xfs_btree_cur *cur, /* btree cursor */
242 254 xfs_agblock_t bno, /* starting block of extent */
243 255 xfs_extlen_t len, /* length of extent */
... ... @@ -853,6 +853,8 @@
853 853 {
854 854 int newsize, forkoff, retval;
855 855  
  856 + trace_xfs_attr_sf_addname(args);
  857 +
856 858 retval = xfs_attr_shortform_lookup(args);
857 859 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
858 860 return(retval);
... ... @@ -896,6 +898,8 @@
896 898 xfs_dabuf_t *bp;
897 899 int retval, error, committed, forkoff;
898 900  
  901 + trace_xfs_attr_leaf_addname(args);
  902 +
899 903 /*
900 904 * Read the (only) block in the attribute list in.
901 905 */
... ... @@ -920,6 +924,9 @@
920 924 xfs_da_brelse(args->trans, bp);
921 925 return(retval);
922 926 }
  927 +
  928 + trace_xfs_attr_leaf_replace(args);
  929 +
923 930 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
924 931 args->blkno2 = args->blkno; /* set 2nd entry info*/
925 932 args->index2 = args->index;
... ... @@ -1090,6 +1097,8 @@
1090 1097 xfs_dabuf_t *bp;
1091 1098 int error, committed, forkoff;
1092 1099  
  1100 + trace_xfs_attr_leaf_removename(args);
  1101 +
1093 1102 /*
1094 1103 * Remove the attribute.
1095 1104 */
... ... @@ -1223,6 +1232,8 @@
1223 1232 xfs_mount_t *mp;
1224 1233 int committed, retval, error;
1225 1234  
  1235 + trace_xfs_attr_node_addname(args);
  1236 +
1226 1237 /*
1227 1238 * Fill in bucket of arguments/results/context to carry around.
1228 1239 */
... ... @@ -1249,6 +1260,9 @@
1249 1260 } else if (retval == EEXIST) {
1250 1261 if (args->flags & ATTR_CREATE)
1251 1262 goto out;
  1263 +
  1264 + trace_xfs_attr_node_replace(args);
  1265 +
1252 1266 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
1253 1267 args->blkno2 = args->blkno; /* set 2nd entry info*/
1254 1268 args->index2 = args->index;
... ... @@ -1479,6 +1493,8 @@
1479 1493 xfs_inode_t *dp;
1480 1494 xfs_dabuf_t *bp;
1481 1495 int retval, error, committed, forkoff;
  1496 +
  1497 + trace_xfs_attr_node_removename(args);
1482 1498  
1483 1499 /*
1484 1500 * Tie a string around our finger to remind us where we are.
fs/xfs/xfs_attr_leaf.c
... ... @@ -235,6 +235,8 @@
235 235 xfs_inode_t *dp;
236 236 xfs_ifork_t *ifp;
237 237  
  238 + trace_xfs_attr_sf_create(args);
  239 +
238 240 dp = args->dp;
239 241 ASSERT(dp != NULL);
240 242 ifp = dp->i_afp;
... ... @@ -268,6 +270,8 @@
268 270 xfs_inode_t *dp;
269 271 xfs_ifork_t *ifp;
270 272  
  273 + trace_xfs_attr_sf_add(args);
  274 +
271 275 dp = args->dp;
272 276 mp = dp->i_mount;
273 277 dp->i_d.di_forkoff = forkoff;
... ... @@ -337,6 +341,8 @@
337 341 xfs_mount_t *mp;
338 342 xfs_inode_t *dp;
339 343  
  344 + trace_xfs_attr_sf_remove(args);
  345 +
340 346 dp = args->dp;
341 347 mp = dp->i_mount;
342 348 base = sizeof(xfs_attr_sf_hdr_t);
... ... @@ -405,6 +411,8 @@
405 411 int i;
406 412 xfs_ifork_t *ifp;
407 413  
  414 + trace_xfs_attr_sf_lookup(args);
  415 +
408 416 ifp = args->dp->i_afp;
409 417 ASSERT(ifp->if_flags & XFS_IFINLINE);
410 418 sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
... ... @@ -476,6 +484,8 @@
476 484 xfs_dabuf_t *bp;
477 485 xfs_ifork_t *ifp;
478 486  
  487 + trace_xfs_attr_sf_to_leaf(args);
  488 +
479 489 dp = args->dp;
480 490 ifp = dp->i_afp;
481 491 sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
... ... @@ -775,6 +785,8 @@
775 785 char *tmpbuffer;
776 786 int error, i;
777 787  
  788 + trace_xfs_attr_leaf_to_sf(args);
  789 +
778 790 dp = args->dp;
779 791 tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
780 792 ASSERT(tmpbuffer != NULL);
... ... @@ -848,6 +860,8 @@
848 860 xfs_dablk_t blkno;
849 861 int error;
850 862  
  863 + trace_xfs_attr_leaf_to_node(args);
  864 +
851 865 dp = args->dp;
852 866 bp1 = bp2 = NULL;
853 867 error = xfs_da_grow_inode(args, &blkno);
... ... @@ -911,6 +925,8 @@
911 925 xfs_dabuf_t *bp;
912 926 int error;
913 927  
  928 + trace_xfs_attr_leaf_create(args);
  929 +
914 930 dp = args->dp;
915 931 ASSERT(dp != NULL);
916 932 error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
... ... @@ -948,6 +964,8 @@
948 964 xfs_dablk_t blkno;
949 965 int error;
950 966  
  967 + trace_xfs_attr_leaf_split(state->args);
  968 +
951 969 /*
952 970 * Allocate space for a new leaf node.
953 971 */
954 972  
955 973  
... ... @@ -977,10 +995,13 @@
977 995 *
978 996 * Insert the "new" entry in the correct block.
979 997 */
980   - if (state->inleaf)
  998 + if (state->inleaf) {
  999 + trace_xfs_attr_leaf_add_old(state->args);
981 1000 error = xfs_attr_leaf_add(oldblk->bp, state->args);
982   - else
  1001 + } else {
  1002 + trace_xfs_attr_leaf_add_new(state->args);
983 1003 error = xfs_attr_leaf_add(newblk->bp, state->args);
  1004 + }
984 1005  
985 1006 /*
986 1007 * Update last hashval in each block since we added the name.
... ... @@ -1001,6 +1022,8 @@
1001 1022 xfs_attr_leaf_map_t *map;
1002 1023 int tablesize, entsize, sum, tmp, i;
1003 1024  
  1025 + trace_xfs_attr_leaf_add(args);
  1026 +
1004 1027 leaf = bp->data;
1005 1028 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1006 1029 ASSERT((args->index >= 0)
... ... @@ -1128,8 +1151,6 @@
1128 1151 (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
1129 1152  
1130 1153 /*
1131   - * Copy the attribute name and value into the new space.
1132   - *
1133 1154 * For "remote" attribute values, simply note that we need to
1134 1155 * allocate space for the "remote" value. We can't actually
1135 1156 * allocate the extents in this transaction, and we can't decide
... ... @@ -1265,6 +1286,8 @@
1265 1286 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1266 1287 args = state->args;
1267 1288  
  1289 + trace_xfs_attr_leaf_rebalance(args);
  1290 +
1268 1291 /*
1269 1292 * Check ordering of blocks, reverse if it makes things simpler.
1270 1293 *
... ... @@ -1810,6 +1833,8 @@
1810 1833 xfs_mount_t *mp;
1811 1834 char *tmpbuffer;
1812 1835  
  1836 + trace_xfs_attr_leaf_unbalance(state->args);
  1837 +
1813 1838 /*
1814 1839 * Set up environment.
1815 1840 */
... ... @@ -1919,6 +1944,8 @@
1919 1944 int probe, span;
1920 1945 xfs_dahash_t hashval;
1921 1946  
  1947 + trace_xfs_attr_leaf_lookup(args);
  1948 +
1922 1949 leaf = bp->data;
1923 1950 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1924 1951 ASSERT(be16_to_cpu(leaf->hdr.count)
... ... @@ -2445,6 +2472,7 @@
2445 2472 char *name;
2446 2473 #endif /* DEBUG */
2447 2474  
  2475 + trace_xfs_attr_leaf_clearflag(args);
2448 2476 /*
2449 2477 * Set up the operation.
2450 2478 */
... ... @@ -2509,6 +2537,8 @@
2509 2537 xfs_dabuf_t *bp;
2510 2538 int error;
2511 2539  
  2540 + trace_xfs_attr_leaf_setflag(args);
  2541 +
2512 2542 /*
2513 2543 * Set up the operation.
2514 2544 */
... ... @@ -2564,6 +2594,8 @@
2564 2594 int namelen1, namelen2;
2565 2595 char *name1, *name2;
2566 2596 #endif /* DEBUG */
  2597 +
  2598 + trace_xfs_attr_leaf_flipflags(args);
2567 2599  
2568 2600 /*
2569 2601 * Read the block containing the "old" attr
... ... @@ -5124,6 +5124,15 @@
5124 5124 cur->bc_private.b.flags = 0;
5125 5125 } else
5126 5126 cur = NULL;
  5127 +
  5128 + if (isrt) {
  5129 + /*
  5130 + * Synchronize by locking the bitmap inode.
  5131 + */
  5132 + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
  5133 + xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
  5134 + }
  5135 +
5127 5136 extno = 0;
5128 5137 while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5129 5138 (nexts == 0 || extno < nexts)) {
fs/xfs/xfs_da_btree.c
... ... @@ -108,6 +108,8 @@
108 108 int error;
109 109 xfs_trans_t *tp;
110 110  
  111 + trace_xfs_da_node_create(args);
  112 +
111 113 tp = args->trans;
112 114 error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
113 115 if (error)
... ... @@ -140,6 +142,8 @@
140 142 xfs_dabuf_t *bp;
141 143 int max, action, error, i;
142 144  
  145 + trace_xfs_da_split(state->args);
  146 +
143 147 /*
144 148 * Walk back up the tree splitting/inserting/adjusting as necessary.
145 149 * If we need to insert and there isn't room, split the node, then
146 150  
... ... @@ -178,10 +182,12 @@
178 182 state->extravalid = 1;
179 183 if (state->inleaf) {
180 184 state->extraafter = 0; /* before newblk */
  185 + trace_xfs_attr_leaf_split_before(state->args);
181 186 error = xfs_attr_leaf_split(state, oldblk,
182 187 &state->extrablk);
183 188 } else {
184 189 state->extraafter = 1; /* after newblk */
  190 + trace_xfs_attr_leaf_split_after(state->args);
185 191 error = xfs_attr_leaf_split(state, newblk,
186 192 &state->extrablk);
187 193 }
... ... @@ -300,6 +306,8 @@
300 306 xfs_mount_t *mp;
301 307 xfs_dir2_leaf_t *leaf;
302 308  
  309 + trace_xfs_da_root_split(state->args);
  310 +
303 311 /*
304 312 * Copy the existing (incorrect) block from the root node position
305 313 * to a free space somewhere.
... ... @@ -380,6 +388,8 @@
380 388 int newcount, error;
381 389 int useextra;
382 390  
  391 + trace_xfs_da_node_split(state->args);
  392 +
383 393 node = oldblk->bp->data;
384 394 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
385 395  
... ... @@ -466,6 +476,8 @@
466 476 int count, tmp;
467 477 xfs_trans_t *tp;
468 478  
  479 + trace_xfs_da_node_rebalance(state->args);
  480 +
469 481 node1 = blk1->bp->data;
470 482 node2 = blk2->bp->data;
471 483 /*
... ... @@ -574,6 +586,8 @@
574 586 xfs_da_node_entry_t *btree;
575 587 int tmp;
576 588  
  589 + trace_xfs_da_node_add(state->args);
  590 +
577 591 node = oldblk->bp->data;
578 592 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
579 593 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
... ... @@ -619,6 +633,8 @@
619 633 xfs_da_state_blk_t *drop_blk, *save_blk;
620 634 int action, error;
621 635  
  636 + trace_xfs_da_join(state->args);
  637 +
622 638 action = 0;
623 639 drop_blk = &state->path.blk[ state->path.active-1 ];
624 640 save_blk = &state->altpath.blk[ state->path.active-1 ];
... ... @@ -723,6 +739,8 @@
723 739 xfs_dabuf_t *bp;
724 740 int error;
725 741  
  742 + trace_xfs_da_root_join(state->args);
  743 +
726 744 args = state->args;
727 745 ASSERT(args != NULL);
728 746 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
... ... @@ -941,6 +959,8 @@
941 959 xfs_da_node_entry_t *btree;
942 960 int tmp;
943 961  
  962 + trace_xfs_da_node_remove(state->args);
  963 +
944 964 node = drop_blk->bp->data;
945 965 ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
946 966 ASSERT(drop_blk->index >= 0);
... ... @@ -984,6 +1004,8 @@
984 1004 int tmp;
985 1005 xfs_trans_t *tp;
986 1006  
  1007 + trace_xfs_da_node_unbalance(state->args);
  1008 +
987 1009 drop_node = drop_blk->bp->data;
988 1010 save_node = save_blk->bp->data;
989 1011 ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
... ... @@ -1230,6 +1252,7 @@
1230 1252 /*
1231 1253 * Link new block in before existing block.
1232 1254 */
  1255 + trace_xfs_da_link_before(args);
1233 1256 new_info->forw = cpu_to_be32(old_blk->blkno);
1234 1257 new_info->back = old_info->back;
1235 1258 if (old_info->back) {
... ... @@ -1251,6 +1274,7 @@
1251 1274 /*
1252 1275 * Link new block in after existing block.
1253 1276 */
  1277 + trace_xfs_da_link_after(args);
1254 1278 new_info->forw = old_info->forw;
1255 1279 new_info->back = cpu_to_be32(old_blk->blkno);
1256 1280 if (old_info->forw) {
... ... @@ -1348,6 +1372,7 @@
1348 1372 * Unlink the leaf block from the doubly linked chain of leaves.
1349 1373 */
1350 1374 if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
  1375 + trace_xfs_da_unlink_back(args);
1351 1376 save_info->back = drop_info->back;
1352 1377 if (drop_info->back) {
1353 1378 error = xfs_da_read_buf(args->trans, args->dp,
... ... @@ -1365,6 +1390,7 @@
1365 1390 xfs_da_buf_done(bp);
1366 1391 }
1367 1392 } else {
  1393 + trace_xfs_da_unlink_forward(args);
1368 1394 save_info->forw = drop_info->forw;
1369 1395 if (drop_info->forw) {
1370 1396 error = xfs_da_read_buf(args->trans, args->dp,
... ... @@ -1652,6 +1678,8 @@
1652 1678 int count;
1653 1679 int error;
1654 1680  
  1681 + trace_xfs_da_grow_inode(args);
  1682 +
1655 1683 if (args->whichfork == XFS_DATA_FORK) {
1656 1684 bno = args->dp->i_mount->m_dirleafblk;
1657 1685 count = args->dp->i_mount->m_dirblkfsbs;
... ... @@ -1690,6 +1718,8 @@
1690 1718 xfs_dir2_leaf_t *dead_leaf2;
1691 1719 xfs_dahash_t dead_hash;
1692 1720  
  1721 + trace_xfs_da_swap_lastblock(args);
  1722 +
1693 1723 dead_buf = *dead_bufp;
1694 1724 dead_blkno = *dead_blknop;
1695 1725 tp = args->trans;
... ... @@ -1877,6 +1907,8 @@
1877 1907 int done, error, w, count;
1878 1908 xfs_trans_t *tp;
1879 1909 xfs_mount_t *mp;
  1910 +
  1911 + trace_xfs_da_shrink_inode(args);
1880 1912  
1881 1913 dp = args->dp;
1882 1914 w = args->whichfork;
fs/xfs/xfs_discard.c
... ... @@ -37,9 +37,9 @@
37 37 xfs_trim_extents(
38 38 struct xfs_mount *mp,
39 39 xfs_agnumber_t agno,
40   - xfs_fsblock_t start,
41   - xfs_fsblock_t end,
42   - xfs_fsblock_t minlen,
  40 + xfs_daddr_t start,
  41 + xfs_daddr_t end,
  42 + xfs_daddr_t minlen,
43 43 __uint64_t *blocks_trimmed)
44 44 {
45 45 struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
... ... @@ -67,7 +67,7 @@
67 67 /*
68 68 * Look up the longest btree in the AGF and start with it.
69 69 */
70   - error = xfs_alloc_lookup_le(cur, 0,
  70 + error = xfs_alloc_lookup_ge(cur, 0,
71 71 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i);
72 72 if (error)
73 73 goto out_del_cursor;
... ... @@ -77,8 +77,10 @@
77 77 * enough to be worth discarding.
78 78 */
79 79 while (i) {
80   - xfs_agblock_t fbno;
81   - xfs_extlen_t flen;
  80 + xfs_agblock_t fbno;
  81 + xfs_extlen_t flen;
  82 + xfs_daddr_t dbno;
  83 + xfs_extlen_t dlen;
82 84  
83 85 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
84 86 if (error)
85 87  
... ... @@ -87,9 +89,17 @@
87 89 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
88 90  
89 91 /*
  92 + * use daddr format for all range/len calculations as that is
  93 + * the format the range/len variables are supplied in by
  94 + * userspace.
  95 + */
  96 + dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
  97 + dlen = XFS_FSB_TO_BB(mp, flen);
  98 +
  99 + /*
90 100 * Too small? Give up.
91 101 */
92   - if (flen < minlen) {
  102 + if (dlen < minlen) {
93 103 trace_xfs_discard_toosmall(mp, agno, fbno, flen);
94 104 goto out_del_cursor;
95 105 }
... ... @@ -99,8 +109,7 @@
99 109 * supposed to discard skip it. Do not bother to trim
100 110 * down partially overlapping ranges for now.
101 111 */
102   - if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
103   - XFS_AGB_TO_FSB(mp, agno, fbno) > end) {
  112 + if (dbno + dlen < start || dbno > end) {
104 113 trace_xfs_discard_exclude(mp, agno, fbno, flen);
105 114 goto next_extent;
106 115 }
... ... @@ -115,10 +124,7 @@
115 124 }
116 125  
117 126 trace_xfs_discard_extent(mp, agno, fbno, flen);
118   - error = -blkdev_issue_discard(bdev,
119   - XFS_AGB_TO_DADDR(mp, agno, fbno),
120   - XFS_FSB_TO_BB(mp, flen),
121   - GFP_NOFS, 0);
  127 + error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
122 128 if (error)
123 129 goto out_del_cursor;
124 130 *blocks_trimmed += flen;
... ... @@ -137,6 +143,15 @@
137 143 return error;
138 144 }
139 145  
  146 +/*
  147 + * trim a range of the filesystem.
  148 + *
  149 + * Note: the parameters passed from userspace are byte ranges into the
  150 + * filesystem which does not match to the format we use for filesystem block
  151 + * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
  152 + * is a linear address range. Hence we need to use DADDR based conversions and
  153 + * comparisons for determining the correct offset and regions to trim.
  154 + */
140 155 int
141 156 xfs_ioc_trim(
142 157 struct xfs_mount *mp,
... ... @@ -145,7 +160,7 @@
145 160 struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
146 161 unsigned int granularity = q->limits.discard_granularity;
147 162 struct fstrim_range range;
148   - xfs_fsblock_t start, end, minlen;
  163 + xfs_daddr_t start, end, minlen;
149 164 xfs_agnumber_t start_agno, end_agno, agno;
150 165 __uint64_t blocks_trimmed = 0;
151 166 int error, last_error = 0;
152 167  
153 168  
154 169  
155 170  
... ... @@ -159,22 +174,22 @@
159 174  
160 175 /*
161 176 * Truncating down the len isn't actually quite correct, but using
162   - * XFS_B_TO_FSB would mean we trivially get overflows for values
  177 + * BBTOB would mean we trivially get overflows for values
163 178 * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
164 179 * used by the fstrim application. In the end it really doesn't
165 180 * matter as trimming blocks is an advisory interface.
166 181 */
167   - start = XFS_B_TO_FSBT(mp, range.start);
168   - end = start + XFS_B_TO_FSBT(mp, range.len) - 1;
169   - minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
  182 + start = BTOBB(range.start);
  183 + end = start + BTOBBT(range.len) - 1;
  184 + minlen = BTOBB(max_t(u64, granularity, range.minlen));
170 185  
171   - if (start >= mp->m_sb.sb_dblocks)
  186 + if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
172 187 return -XFS_ERROR(EINVAL);
173   - if (end > mp->m_sb.sb_dblocks - 1)
174   - end = mp->m_sb.sb_dblocks - 1;
  188 + if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
  189 + end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
175 190  
176   - start_agno = XFS_FSB_TO_AGNO(mp, start);
177   - end_agno = XFS_FSB_TO_AGNO(mp, end);
  191 + start_agno = xfs_daddr_to_agno(mp, start);
  192 + end_agno = xfs_daddr_to_agno(mp, end);
178 193  
179 194 for (agno = start_agno; agno <= end_agno; agno++) {
180 195 error = -xfs_trim_extents(mp, agno, start, end, minlen,
... ... @@ -1065,7 +1065,7 @@
1065 1065 return -ENOMEM;
1066 1066 }
1067 1067  
1068   -void __exit
  1068 +void
1069 1069 xfs_qm_exit(void)
1070 1070 {
1071 1071 kmem_zone_destroy(xfs_qm_dqtrxzone);
... ... @@ -289,7 +289,7 @@
289 289 if (lock_flags != 0)
290 290 xfs_ilock(ip, lock_flags);
291 291  
292   - xfs_iflags_clear(ip, XFS_ISTALE);
  292 + xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
293 293 XFS_STATS_INC(xs_ig_found);
294 294  
295 295 return 0;
... ... @@ -314,6 +314,7 @@
314 314 struct xfs_inode *ip;
315 315 int error;
316 316 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
  317 + int iflags;
317 318  
318 319 ip = xfs_inode_alloc(mp, ino);
319 320 if (!ip)
320 321  
... ... @@ -358,8 +359,11 @@
358 359 * memory barrier that ensures this detection works correctly at lookup
359 360 * time.
360 361 */
  362 + iflags = XFS_INEW;
  363 + if (flags & XFS_IGET_DONTCACHE)
  364 + iflags |= XFS_IDONTCACHE;
361 365 ip->i_udquot = ip->i_gdquot = NULL;
362   - xfs_iflags_set(ip, XFS_INEW);
  366 + xfs_iflags_set(ip, iflags);
363 367  
364 368 /* insert the new inode */
365 369 spin_lock(&pag->pag_ici_lock);
... ... @@ -387,10 +387,11 @@
387 387 #define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
388 388 #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
389 389 #define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
  390 +#define XFS_IDONTCACHE (1 << 9) /* don't cache the inode long term */
390 391  
391 392 /*
392 393 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
393   - * inode lookup. Thi prevents unintended behaviour on the new inode from
  394 + * inode lookup. This prevents unintended behaviour on the new inode from
394 395 * ocurring.
395 396 */
396 397 #define XFS_IRECLAIM_RESET_FLAGS \
... ... @@ -553,6 +554,7 @@
553 554 */
554 555 #define XFS_IGET_CREATE 0x1
555 556 #define XFS_IGET_UNTRUSTED 0x2
  557 +#define XFS_IGET_DONTCACHE 0x4
556 558  
557 559 int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
558 560 xfs_ino_t, struct xfs_dinode **,
... ... @@ -209,6 +209,7 @@
209 209 struct file *filp;
210 210 struct inode *inode;
211 211 struct dentry *dentry;
  212 + fmode_t fmode;
212 213  
213 214 if (!capable(CAP_SYS_ADMIN))
214 215 return -XFS_ERROR(EPERM);
215 216  
216 217  
217 218  
218 219  
... ... @@ -228,26 +229,21 @@
228 229 hreq->oflags |= O_LARGEFILE;
229 230 #endif
230 231  
231   - /* Put open permission in namei format. */
232 232 permflag = hreq->oflags;
233   - if ((permflag+1) & O_ACCMODE)
234   - permflag++;
235   - if (permflag & O_TRUNC)
236   - permflag |= 2;
237   -
  233 + fmode = OPEN_FMODE(permflag);
238 234 if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
239   - (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
  235 + (fmode & FMODE_WRITE) && IS_APPEND(inode)) {
240 236 error = -XFS_ERROR(EPERM);
241 237 goto out_dput;
242 238 }
243 239  
244   - if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
  240 + if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
245 241 error = -XFS_ERROR(EACCES);
246 242 goto out_dput;
247 243 }
248 244  
249 245 /* Can't write directories. */
250   - if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
  246 + if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) {
251 247 error = -XFS_ERROR(EISDIR);
252 248 goto out_dput;
253 249 }
... ... @@ -75,7 +75,8 @@
75 75 return XFS_ERROR(ENOMEM);
76 76  
77 77 error = xfs_iget(mp, NULL, ino,
78   - XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip);
  78 + (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
  79 + XFS_ILOCK_SHARED, &ip);
79 80 if (error) {
80 81 *stat = BULKSTAT_RV_NOTHING;
81 82 goto out_free;
... ... @@ -726,8 +726,9 @@
726 726 .lv_iovecp = &reg,
727 727 };
728 728  
729   - /* remove inited flag */
  729 + /* remove inited flag, and account for space used */
730 730 tic->t_flags = 0;
  731 + tic->t_curr_res -= sizeof(magic);
731 732 error = xlog_write(log, &vec, tic, &lsn,
732 733 NULL, XLOG_UNMOUNT_TRANS);
733 734 /*
fs/xfs/xfs_log_recover.c
... ... @@ -3161,37 +3161,26 @@
3161 3161 */
3162 3162 continue;
3163 3163 }
  3164 + /*
  3165 + * Unlock the buffer so that it can be acquired in the normal
  3166 + * course of the transaction to truncate and free each inode.
  3167 + * Because we are not racing with anyone else here for the AGI
  3168 + * buffer, we don't even need to hold it locked to read the
  3169 + * initial unlinked bucket entries out of the buffer. We keep
  3170 + * buffer reference though, so that it stays pinned in memory
  3171 + * while we need the buffer.
  3172 + */
3164 3173 agi = XFS_BUF_TO_AGI(agibp);
  3174 + xfs_buf_unlock(agibp);
3165 3175  
3166 3176 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
3167 3177 agino = be32_to_cpu(agi->agi_unlinked[bucket]);
3168 3178 while (agino != NULLAGINO) {
3169   - /*
3170   - * Release the agi buffer so that it can
3171   - * be acquired in the normal course of the
3172   - * transaction to truncate and free the inode.
3173   - */
3174   - xfs_buf_relse(agibp);
3175   -
3176 3179 agino = xlog_recover_process_one_iunlink(mp,
3177 3180 agno, agino, bucket);
3178   -
3179   - /*
3180   - * Reacquire the agibuffer and continue around
3181   - * the loop. This should never fail as we know
3182   - * the buffer was good earlier on.
3183   - */
3184   - error = xfs_read_agi(mp, NULL, agno, &agibp);
3185   - ASSERT(error == 0);
3186   - agi = XFS_BUF_TO_AGI(agibp);
3187 3181 }
3188 3182 }
3189   -
3190   - /*
3191   - * Release the buffer for the current agi so we can
3192   - * go on to the next one.
3193   - */
3194   - xfs_buf_relse(agibp);
  3183 + xfs_buf_rele(agibp);
3195 3184 }
3196 3185  
3197 3186 mp->m_dmevmask = mp_dmevmask;
fs/xfs/xfs_rtalloc.c
... ... @@ -183,6 +183,7 @@
183 183 oblocks = map.br_startoff + map.br_blockcount;
184 184 }
185 185 return 0;
  186 +
186 187 error:
187 188 return error;
188 189 }
... ... @@ -2139,11 +2140,9 @@
2139 2140 xfs_buf_t *sumbp; /* summary file block buffer */
2140 2141  
2141 2142 mp = tp->t_mountp;
2142   - /*
2143   - * Synchronize by locking the bitmap inode.
2144   - */
2145   - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
2146   - xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
  2143 +
  2144 + ASSERT(mp->m_rbmip->i_itemp != NULL);
  2145 + ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
2147 2146  
2148 2147 #if defined(__KERNEL__) && defined(DEBUG)
2149 2148 /*
... ... @@ -950,6 +950,22 @@
950 950 xfs_inactive(ip);
951 951 }
952 952  
  953 +/*
  954 + * We do an unlocked check for XFS_IDONTCACHE here because we are already
  955 + * serialised against cache hits here via the inode->i_lock and igrab() in
  956 + * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
  957 + * racing with us, and it avoids needing to grab a spinlock here for every inode
  958 + * we drop the final reference on.
  959 + */
  960 +STATIC int
  961 +xfs_fs_drop_inode(
  962 + struct inode *inode)
  963 +{
  964 + struct xfs_inode *ip = XFS_I(inode);
  965 +
  966 + return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
  967 +}
  968 +
953 969 STATIC void
954 970 xfs_free_fsname(
955 971 struct xfs_mount *mp)
... ... @@ -1433,6 +1449,7 @@
1433 1449 .destroy_inode = xfs_fs_destroy_inode,
1434 1450 .dirty_inode = xfs_fs_dirty_inode,
1435 1451 .evict_inode = xfs_fs_evict_inode,
  1452 + .drop_inode = xfs_fs_drop_inode,
1436 1453 .put_super = xfs_fs_put_super,
1437 1454 .sync_fs = xfs_fs_sync_fs,
1438 1455 .freeze_fs = xfs_fs_freeze,
1439 1456  
1440 1457  
... ... @@ -1606,12 +1623,28 @@
1606 1623 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
1607 1624 if (!xfs_syncd_wq)
1608 1625 return -ENOMEM;
  1626 +
  1627 + /*
  1628 + * The allocation workqueue can be used in memory reclaim situations
  1629 + * (writepage path), and parallelism is only limited by the number of
  1630 + * AGs in all the filesystems mounted. Hence use the default large
  1631 + * max_active value for this workqueue.
  1632 + */
  1633 + xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0);
  1634 + if (!xfs_alloc_wq)
  1635 + goto out_destroy_syncd;
  1636 +
1609 1637 return 0;
  1638 +
  1639 +out_destroy_syncd:
  1640 + destroy_workqueue(xfs_syncd_wq);
  1641 + return -ENOMEM;
1610 1642 }
1611 1643  
1612 1644 STATIC void
1613 1645 xfs_destroy_workqueues(void)
1614 1646 {
  1647 + destroy_workqueue(xfs_alloc_wq);
1615 1648 destroy_workqueue(xfs_syncd_wq);
1616 1649 }
1617 1650  
... ... @@ -627,16 +627,19 @@
627 627 TP_STRUCT__entry(
628 628 __field(dev_t, dev)
629 629 __field(xfs_ino_t, dp_ino)
  630 + __field(int, namelen)
630 631 __dynamic_array(char, name, name->len)
631 632 ),
632 633 TP_fast_assign(
633 634 __entry->dev = VFS_I(dp)->i_sb->s_dev;
634 635 __entry->dp_ino = dp->i_ino;
  636 + __entry->namelen = name->len;
635 637 memcpy(__get_str(name), name->name, name->len);
636 638 ),
637   - TP_printk("dev %d:%d dp ino 0x%llx name %s",
  639 + TP_printk("dev %d:%d dp ino 0x%llx name %.*s",
638 640 MAJOR(__entry->dev), MINOR(__entry->dev),
639 641 __entry->dp_ino,
  642 + __entry->namelen,
640 643 __get_str(name))
641 644 )
642 645  
... ... @@ -658,6 +661,8 @@
658 661 __field(dev_t, dev)
659 662 __field(xfs_ino_t, src_dp_ino)
660 663 __field(xfs_ino_t, target_dp_ino)
  664 + __field(int, src_namelen)
  665 + __field(int, target_namelen)
661 666 __dynamic_array(char, src_name, src_name->len)
662 667 __dynamic_array(char, target_name, target_name->len)
663 668 ),
664 669  
665 670  
666 671  
667 672  
... ... @@ -665,15 +670,20 @@
665 670 __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
666 671 __entry->src_dp_ino = src_dp->i_ino;
667 672 __entry->target_dp_ino = target_dp->i_ino;
  673 + __entry->src_namelen = src_name->len;
  674 + __entry->target_namelen = target_name->len;
668 675 memcpy(__get_str(src_name), src_name->name, src_name->len);
669   - memcpy(__get_str(target_name), target_name->name, target_name->len);
  676 + memcpy(__get_str(target_name), target_name->name,
  677 + target_name->len);
670 678 ),
671 679 TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
672   - " src name %s target name %s",
  680 + " src name %.*s target name %.*s",
673 681 MAJOR(__entry->dev), MINOR(__entry->dev),
674 682 __entry->src_dp_ino,
675 683 __entry->target_dp_ino,
  684 + __entry->src_namelen,
676 685 __get_str(src_name),
  686 + __entry->target_namelen,
677 687 __get_str(target_name))
678 688 )
679 689  
... ... @@ -1408,7 +1418,7 @@
1408 1418 DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1409 1419 DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1410 1420  
1411   -DECLARE_EVENT_CLASS(xfs_dir2_class,
  1421 +DECLARE_EVENT_CLASS(xfs_da_class,
1412 1422 TP_PROTO(struct xfs_da_args *args),
1413 1423 TP_ARGS(args),
1414 1424 TP_STRUCT__entry(
... ... @@ -1443,7 +1453,7 @@
1443 1453 )
1444 1454  
1445 1455 #define DEFINE_DIR2_EVENT(name) \
1446   -DEFINE_EVENT(xfs_dir2_class, name, \
  1456 +DEFINE_EVENT(xfs_da_class, name, \
1447 1457 TP_PROTO(struct xfs_da_args *args), \
1448 1458 TP_ARGS(args))
1449 1459 DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
... ... @@ -1471,6 +1481,64 @@
1471 1481 DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1472 1482 DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1473 1483 DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
  1484 +
  1485 +#define DEFINE_ATTR_EVENT(name) \
  1486 +DEFINE_EVENT(xfs_da_class, name, \
  1487 + TP_PROTO(struct xfs_da_args *args), \
  1488 + TP_ARGS(args))
  1489 +DEFINE_ATTR_EVENT(xfs_attr_sf_add);
  1490 +DEFINE_ATTR_EVENT(xfs_attr_sf_addname);
  1491 +DEFINE_ATTR_EVENT(xfs_attr_sf_create);
  1492 +DEFINE_ATTR_EVENT(xfs_attr_sf_lookup);
  1493 +DEFINE_ATTR_EVENT(xfs_attr_sf_remove);
  1494 +DEFINE_ATTR_EVENT(xfs_attr_sf_removename);
  1495 +DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf);
  1496 +
  1497 +DEFINE_ATTR_EVENT(xfs_attr_leaf_add);
  1498 +DEFINE_ATTR_EVENT(xfs_attr_leaf_add_old);
  1499 +DEFINE_ATTR_EVENT(xfs_attr_leaf_add_new);
  1500 +DEFINE_ATTR_EVENT(xfs_attr_leaf_addname);
  1501 +DEFINE_ATTR_EVENT(xfs_attr_leaf_create);
  1502 +DEFINE_ATTR_EVENT(xfs_attr_leaf_lookup);
  1503 +DEFINE_ATTR_EVENT(xfs_attr_leaf_replace);
  1504 +DEFINE_ATTR_EVENT(xfs_attr_leaf_removename);
  1505 +DEFINE_ATTR_EVENT(xfs_attr_leaf_split);
  1506 +DEFINE_ATTR_EVENT(xfs_attr_leaf_split_before);
  1507 +DEFINE_ATTR_EVENT(xfs_attr_leaf_split_after);
  1508 +DEFINE_ATTR_EVENT(xfs_attr_leaf_clearflag);
  1509 +DEFINE_ATTR_EVENT(xfs_attr_leaf_setflag);
  1510 +DEFINE_ATTR_EVENT(xfs_attr_leaf_flipflags);
  1511 +DEFINE_ATTR_EVENT(xfs_attr_leaf_to_sf);
  1512 +DEFINE_ATTR_EVENT(xfs_attr_leaf_to_node);
  1513 +DEFINE_ATTR_EVENT(xfs_attr_leaf_rebalance);
  1514 +DEFINE_ATTR_EVENT(xfs_attr_leaf_unbalance);
  1515 +
  1516 +DEFINE_ATTR_EVENT(xfs_attr_node_addname);
  1517 +DEFINE_ATTR_EVENT(xfs_attr_node_lookup);
  1518 +DEFINE_ATTR_EVENT(xfs_attr_node_replace);
  1519 +DEFINE_ATTR_EVENT(xfs_attr_node_removename);
  1520 +
  1521 +#define DEFINE_DA_EVENT(name) \
  1522 +DEFINE_EVENT(xfs_da_class, name, \
  1523 + TP_PROTO(struct xfs_da_args *args), \
  1524 + TP_ARGS(args))
  1525 +DEFINE_DA_EVENT(xfs_da_split);
  1526 +DEFINE_DA_EVENT(xfs_da_join);
  1527 +DEFINE_DA_EVENT(xfs_da_link_before);
  1528 +DEFINE_DA_EVENT(xfs_da_link_after);
  1529 +DEFINE_DA_EVENT(xfs_da_unlink_back);
  1530 +DEFINE_DA_EVENT(xfs_da_unlink_forward);
  1531 +DEFINE_DA_EVENT(xfs_da_root_split);
  1532 +DEFINE_DA_EVENT(xfs_da_root_join);
  1533 +DEFINE_DA_EVENT(xfs_da_node_add);
  1534 +DEFINE_DA_EVENT(xfs_da_node_create);
  1535 +DEFINE_DA_EVENT(xfs_da_node_split);
  1536 +DEFINE_DA_EVENT(xfs_da_node_remove);
  1537 +DEFINE_DA_EVENT(xfs_da_node_rebalance);
  1538 +DEFINE_DA_EVENT(xfs_da_node_unbalance);
  1539 +DEFINE_DA_EVENT(xfs_da_swap_lastblock);
  1540 +DEFINE_DA_EVENT(xfs_da_grow_inode);
  1541 +DEFINE_DA_EVENT(xfs_da_shrink_inode);
1474 1542  
1475 1543 DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1476 1544 TP_PROTO(struct xfs_da_args *args, int idx),