Commit a601e63717a269b9171a7164ab9e285788362d1b

Authored by Linus Torvalds

Merge tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs

Pull xfs bugfixes from Ben Myers:

 - fix for large transactions spanning multiple iclog buffers

 - zero the allocation_args structure on the stack before using it to
   determine whether to use a worker for allocation
 - move allocation stack switch to xfs_bmapi_allocate in order to
   prevent deadlock on AGF buffers

 - growfs no longer reads in garbage for new secondary superblocks

 - silence a build warning

 - ensure that invalid buffers never get written to disk while on free
   list

 - don't vmap inode cluster buffers during free

 - fix buffer shutdown reference count mismatch

 - fix reading of wrapped log data

* tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs:
  xfs: fix reading of wrapped log data
  xfs: fix buffer shudown reference count mismatch
  xfs: don't vmap inode cluster buffers during free
  xfs: invalidate allocbt blocks moved to the free list
  xfs: silence uninitialised f.file warning.
  xfs: growfs: don't read garbage for new secondary superblocks
  xfs: move allocation stack switch up to xfs_bmapi_allocate
  xfs: introduce XFS_BMAPI_STACK_SWITCH
  xfs: zero allocation_args on the kernel stack
  xfs: only update the last_sync_lsn when a transaction completes

Showing 13 changed files Side-by-side Diff

... ... @@ -1866,6 +1866,7 @@
1866 1866 /*
1867 1867 * Initialize the args structure.
1868 1868 */
  1869 + memset(&targs, 0, sizeof(targs));
1869 1870 targs.tp = tp;
1870 1871 targs.mp = mp;
1871 1872 targs.agbp = agbp;
... ... @@ -2207,7 +2208,7 @@
2207 2208 * group or loop over the allocation groups to find the result.
2208 2209 */
2209 2210 int /* error */
2210   -__xfs_alloc_vextent(
  2211 +xfs_alloc_vextent(
2211 2212 xfs_alloc_arg_t *args) /* allocation argument structure */
2212 2213 {
2213 2214 xfs_agblock_t agsize; /* allocation group size */
... ... @@ -2415,46 +2416,6 @@
2415 2416 error0:
2416 2417 xfs_perag_put(args->pag);
2417 2418 return error;
2418   -}
2419   -
2420   -static void
2421   -xfs_alloc_vextent_worker(
2422   - struct work_struct *work)
2423   -{
2424   - struct xfs_alloc_arg *args = container_of(work,
2425   - struct xfs_alloc_arg, work);
2426   - unsigned long pflags;
2427   -
2428   - /* we are in a transaction context here */
2429   - current_set_flags_nested(&pflags, PF_FSTRANS);
2430   -
2431   - args->result = __xfs_alloc_vextent(args);
2432   - complete(args->done);
2433   -
2434   - current_restore_flags_nested(&pflags, PF_FSTRANS);
2435   -}
2436   -
2437   -/*
2438   - * Data allocation requests often come in with little stack to work on. Push
2439   - * them off to a worker thread so there is lots of stack to use. Metadata
2440   - * requests, OTOH, are generally from low stack usage paths, so avoid the
2441   - * context switch overhead here.
2442   - */
2443   -int
2444   -xfs_alloc_vextent(
2445   - struct xfs_alloc_arg *args)
2446   -{
2447   - DECLARE_COMPLETION_ONSTACK(done);
2448   -
2449   - if (!args->userdata)
2450   - return __xfs_alloc_vextent(args);
2451   -
2452   -
2453   - args->done = &done;
2454   - INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
2455   - queue_work(xfs_alloc_wq, &args->work);
2456   - wait_for_completion(&done);
2457   - return args->result;
2458 2419 }
2459 2420  
2460 2421 /*
... ... @@ -120,9 +120,6 @@
120 120 char isfl; /* set if is freelist blocks - !acctg */
121 121 char userdata; /* set if this is user data */
122 122 xfs_fsblock_t firstblock; /* io first block allocated */
123   - struct completion *done;
124   - struct work_struct work;
125   - int result;
126 123 } xfs_alloc_arg_t;
127 124  
128 125 /*
fs/xfs/xfs_alloc_btree.c
... ... @@ -121,6 +121,8 @@
121 121 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
122 122 XFS_EXTENT_BUSY_SKIP_DISCARD);
123 123 xfs_trans_agbtree_delta(cur->bc_tp, -1);
  124 +
  125 + xfs_trans_binval(cur->bc_tp, bp);
124 126 return 0;
125 127 }
126 128  
... ... @@ -2437,6 +2437,7 @@
2437 2437 * Normal allocation, done through xfs_alloc_vextent.
2438 2438 */
2439 2439 tryagain = isaligned = 0;
  2440 + memset(&args, 0, sizeof(args));
2440 2441 args.tp = ap->tp;
2441 2442 args.mp = mp;
2442 2443 args.fsbno = ap->blkno;
... ... @@ -3082,6 +3083,7 @@
3082 3083 * Convert to a btree with two levels, one record in root.
3083 3084 */
3084 3085 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
  3086 + memset(&args, 0, sizeof(args));
3085 3087 args.tp = tp;
3086 3088 args.mp = mp;
3087 3089 args.firstblock = *firstblock;
... ... @@ -3237,6 +3239,7 @@
3237 3239 xfs_buf_t *bp; /* buffer for extent block */
3238 3240 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3239 3241  
  3242 + memset(&args, 0, sizeof(args));
3240 3243 args.tp = tp;
3241 3244 args.mp = ip->i_mount;
3242 3245 args.firstblock = *firstblock;
3243 3246  
... ... @@ -4616,12 +4619,11 @@
4616 4619  
4617 4620  
4618 4621 STATIC int
4619   -xfs_bmapi_allocate(
4620   - struct xfs_bmalloca *bma,
4621   - int flags)
  4622 +__xfs_bmapi_allocate(
  4623 + struct xfs_bmalloca *bma)
4622 4624 {
4623 4625 struct xfs_mount *mp = bma->ip->i_mount;
4624   - int whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
  4626 + int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4625 4627 XFS_ATTR_FORK : XFS_DATA_FORK;
4626 4628 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4627 4629 int tmp_logflags = 0;
4628 4630  
4629 4631  
4630 4632  
... ... @@ -4654,24 +4656,27 @@
4654 4656 * Indicate if this is the first user data in the file, or just any
4655 4657 * user data.
4656 4658 */
4657   - if (!(flags & XFS_BMAPI_METADATA)) {
  4659 + if (!(bma->flags & XFS_BMAPI_METADATA)) {
4658 4660 bma->userdata = (bma->offset == 0) ?
4659 4661 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4660 4662 }
4661 4663  
4662   - bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
  4664 + bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4663 4665  
4664 4666 /*
4665 4667 * Only want to do the alignment at the eof if it is userdata and
4666 4668 * allocation length is larger than a stripe unit.
4667 4669 */
4668 4670 if (mp->m_dalign && bma->length >= mp->m_dalign &&
4669   - !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
  4671 + !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4670 4672 error = xfs_bmap_isaeof(bma, whichfork);
4671 4673 if (error)
4672 4674 return error;
4673 4675 }
4674 4676  
  4677 + if (bma->flags & XFS_BMAPI_STACK_SWITCH)
  4678 + bma->stack_switch = 1;
  4679 +
4675 4680 error = xfs_bmap_alloc(bma);
4676 4681 if (error)
4677 4682 return error;
... ... @@ -4706,7 +4711,7 @@
4706 4711 * A wasdelay extent has been initialized, so shouldn't be flagged
4707 4712 * as unwritten.
4708 4713 */
4709   - if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) &&
  4714 + if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4710 4715 xfs_sb_version_hasextflgbit(&mp->m_sb))
4711 4716 bma->got.br_state = XFS_EXT_UNWRITTEN;
4712 4717  
... ... @@ -4734,6 +4739,45 @@
4734 4739 return 0;
4735 4740 }
4736 4741  
  4742 +static void
  4743 +xfs_bmapi_allocate_worker(
  4744 + struct work_struct *work)
  4745 +{
  4746 + struct xfs_bmalloca *args = container_of(work,
  4747 + struct xfs_bmalloca, work);
  4748 + unsigned long pflags;
  4749 +
  4750 + /* we are in a transaction context here */
  4751 + current_set_flags_nested(&pflags, PF_FSTRANS);
  4752 +
  4753 + args->result = __xfs_bmapi_allocate(args);
  4754 + complete(args->done);
  4755 +
  4756 + current_restore_flags_nested(&pflags, PF_FSTRANS);
  4757 +}
  4758 +
  4759 +/*
  4760 + * Some allocation requests often come in with little stack to work on. Push
  4761 + * them off to a worker thread so there is lots of stack to use. Otherwise just
  4762 + * call directly to avoid the context switch overhead here.
  4763 + */
  4764 +int
  4765 +xfs_bmapi_allocate(
  4766 + struct xfs_bmalloca *args)
  4767 +{
  4768 + DECLARE_COMPLETION_ONSTACK(done);
  4769 +
  4770 + if (!args->stack_switch)
  4771 + return __xfs_bmapi_allocate(args);
  4772 +
  4773 +
  4774 + args->done = &done;
  4775 + INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
  4776 + queue_work(xfs_alloc_wq, &args->work);
  4777 + wait_for_completion(&done);
  4778 + return args->result;
  4779 +}
  4780 +
4737 4781 STATIC int
4738 4782 xfs_bmapi_convert_unwritten(
4739 4783 struct xfs_bmalloca *bma,
... ... @@ -4919,6 +4963,7 @@
4919 4963 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4920 4964 bma.wasdel = wasdelay;
4921 4965 bma.offset = bno;
  4966 + bma.flags = flags;
4922 4967  
4923 4968 /*
4924 4969 * There's a 32/64 bit type mismatch between the
... ... @@ -4934,7 +4979,7 @@
4934 4979  
4935 4980 ASSERT(len > 0);
4936 4981 ASSERT(bma.length > 0);
4937   - error = xfs_bmapi_allocate(&bma, flags);
  4982 + error = xfs_bmapi_allocate(&bma);
4938 4983 if (error)
4939 4984 goto error0;
4940 4985 if (bma.blkno == NULLFSBLOCK)
... ... @@ -77,6 +77,7 @@
77 77 * from written to unwritten, otherwise convert from unwritten to written.
78 78 */
79 79 #define XFS_BMAPI_CONVERT 0x040
  80 +#define XFS_BMAPI_STACK_SWITCH 0x080
80 81  
81 82 #define XFS_BMAPI_FLAGS \
82 83 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
... ... @@ -85,7 +86,8 @@
85 86 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
86 87 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
87 88 { XFS_BMAPI_CONTIG, "CONTIG" }, \
88   - { XFS_BMAPI_CONVERT, "CONVERT" }
  89 + { XFS_BMAPI_CONVERT, "CONVERT" }, \
  90 + { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
89 91  
90 92  
91 93 static inline int xfs_bmapi_aflag(int w)
... ... @@ -133,6 +135,11 @@
133 135 char userdata;/* set if is user data */
134 136 char aeof; /* allocated space at eof */
135 137 char conv; /* overwriting unwritten extents */
  138 + char stack_switch;
  139 + int flags;
  140 + struct completion *done;
  141 + struct work_struct work;
  142 + int result;
136 143 } xfs_bmalloca_t;
137 144  
138 145 /*
fs/xfs/xfs_buf_item.c
... ... @@ -526,7 +526,25 @@
526 526 }
527 527 xfs_buf_relse(bp);
528 528 } else if (freed && remove) {
  529 + /*
  530 + * There are currently two references to the buffer - the active
  531 + * LRU reference and the buf log item. What we are about to do
  532 + * here - simulate a failed IO completion - requires 3
  533 + * references.
  534 + *
  535 + * The LRU reference is removed by the xfs_buf_stale() call. The
  536 + * buf item reference is removed by the xfs_buf_iodone()
  537 + * callback that is run by xfs_buf_do_callbacks() during ioend
  538 + * processing (via the bp->b_iodone callback), and then finally
  539 + * the ioend processing will drop the IO reference if the buffer
  540 + * is marked XBF_ASYNC.
  541 + *
  542 + * Hence we need to take an additional reference here so that IO
  543 + * completion processing doesn't free the buffer prematurely.
  544 + */
529 545 xfs_buf_lock(bp);
  546 + xfs_buf_hold(bp);
  547 + bp->b_flags |= XBF_ASYNC;
530 548 xfs_buf_ioerror(bp, EIO);
531 549 XFS_BUF_UNDONE(bp);
532 550 xfs_buf_stale(bp);
... ... @@ -399,9 +399,26 @@
399 399  
400 400 /* update secondary superblocks. */
401 401 for (agno = 1; agno < nagcount; agno++) {
402   - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
  402 + error = 0;
  403 + /*
  404 + * new secondary superblocks need to be zeroed, not read from
  405 + * disk as the contents of the new area we are growing into is
  406 + * completely unknown.
  407 + */
  408 + if (agno < oagcount) {
  409 + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
403 410 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
404 411 XFS_FSS_TO_BB(mp, 1), 0, &bp);
  412 + } else {
  413 + bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
  414 + XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
  415 + XFS_FSS_TO_BB(mp, 1), 0);
  416 + if (bp)
  417 + xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
  418 + else
  419 + error = ENOMEM;
  420 + }
  421 +
405 422 if (error) {
406 423 xfs_warn(mp,
407 424 "error %d reading secondary superblock for ag %d",
... ... @@ -423,7 +440,7 @@
423 440 break; /* no point in continuing */
424 441 }
425 442 }
426   - return 0;
  443 + return error;
427 444  
428 445 error0:
429 446 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
... ... @@ -250,6 +250,7 @@
250 250 /* boundary */
251 251 struct xfs_perag *pag;
252 252  
  253 + memset(&args, 0, sizeof(args));
253 254 args.tp = tp;
254 255 args.mp = tp->t_mountp;
255 256  
... ... @@ -1509,7 +1509,8 @@
1509 1509 * to mark all the active inodes on the buffer stale.
1510 1510 */
1511 1511 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1512   - mp->m_bsize * blks_per_cluster, 0);
  1512 + mp->m_bsize * blks_per_cluster,
  1513 + XBF_UNMAPPED);
1513 1514  
1514 1515 if (!bp)
1515 1516 return ENOMEM;
... ... @@ -70,7 +70,7 @@
70 70 int hsize;
71 71 xfs_handle_t handle;
72 72 struct inode *inode;
73   - struct fd f;
  73 + struct fd f = {0};
74 74 struct path path;
75 75 int error;
76 76 struct xfs_inode *ip;
... ... @@ -584,7 +584,9 @@
584 584 * pointer that the caller gave to us.
585 585 */
586 586 error = xfs_bmapi_write(tp, ip, map_start_fsb,
587   - count_fsb, 0, &first_block, 1,
  587 + count_fsb,
  588 + XFS_BMAPI_STACK_SWITCH,
  589 + &first_block, 1,
588 590 imap, &nimaps, &free_list);
589 591 if (error)
590 592 goto trans_cancel;
... ... @@ -2387,14 +2387,27 @@
2387 2387  
2388 2388  
2389 2389 /*
2390   - * update the last_sync_lsn before we drop the
  2390 + * Completion of a iclog IO does not imply that
  2391 + * a transaction has completed, as transactions
  2392 + * can be large enough to span many iclogs. We
  2393 + * cannot change the tail of the log half way
  2394 + * through a transaction as this may be the only
  2395 + * transaction in the log and moving th etail to
  2396 + * point to the middle of it will prevent
  2397 + * recovery from finding the start of the
  2398 + * transaction. Hence we should only update the
  2399 + * last_sync_lsn if this iclog contains
  2400 + * transaction completion callbacks on it.
  2401 + *
  2402 + * We have to do this before we drop the
2391 2403 * icloglock to ensure we are the only one that
2392 2404 * can update it.
2393 2405 */
2394 2406 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2395 2407 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
2396   - atomic64_set(&log->l_last_sync_lsn,
2397   - be64_to_cpu(iclog->ic_header.h_lsn));
  2408 + if (iclog->ic_callback)
  2409 + atomic64_set(&log->l_last_sync_lsn,
  2410 + be64_to_cpu(iclog->ic_header.h_lsn));
2398 2411  
2399 2412 } else
2400 2413 ioerrors++;
fs/xfs/xfs_log_recover.c
... ... @@ -3541,7 +3541,7 @@
3541 3541 * - order is important.
3542 3542 */
3543 3543 error = xlog_bread_offset(log, 0,
3544   - bblks - split_bblks, hbp,
  3544 + bblks - split_bblks, dbp,
3545 3545 offset + BBTOB(split_bblks));
3546 3546 if (error)
3547 3547 goto bread_err2;