Commit 78f94673d7faf01677f374f4ebbf324ff1a0aa6e

Authored by Tristan Ye
Committed by Joel Becker
1 parent 547ba7c8ef

Ocfs2: Optimize ocfs2 truncate to use ocfs2_remove_btree_range() instead.

Truncate is just a special case of punching holes(from new i_size to
end), we therefore could take advantage of the existing
ocfs2_remove_btree_range() to reduce the comlexity and redundancy in
alloc.c.  The goal here is to make truncate more generic and
straightforward.

Several functions only used by ocfs2_commit_truncate() will smiply be
removed.

ocfs2_remove_btree_range() was originally used by the hole punching
code, which didn't take refcount trees into account (definitely a bug).
We therefore need to change that func a bit to handle refcount trees.
It must take the refcount lock, calculate and reserve blocks for
refcount tree changes, and decrease refcounts at the end.  We replace
ocfs2_lock_allocators() here by adding a new func
ocfs2_reserve_blocks_for_rec_trunc() which accepts some extra blocks to
reserve.  This will not hurt any other code using
ocfs2_remove_btree_range() (such as dir truncate and hole punching).

I merged the following steps into one patch since they may be
logically doing one thing, though I know it looks a little bit fat
to review.

1). Remove redundant code used by ocfs2_commit_truncate(), since we're
    moving to ocfs2_remove_btree_range anyway.

2). Add a new func ocfs2_reserve_blocks_for_rec_trunc() for purpose of
    accepting some extra blocks to reserve.

3). Change ocfs2_prepare_refcount_change_for_del() a bit to fit our
    needs.  It's safe to do this since it's only being called by
    truncate.

4). Change ocfs2_remove_btree_range() a bit to take refcount case into
    account.

5). Finally, we change ocfs2_commit_truncate() to call
    ocfs2_remove_btree_range() in a proper way.

The patch has been tested normally for sanity check, stress tests
with heavier workload will be expected.

Based on this patch, fixing the punching holes bug will be fairly easy.

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

Showing 7 changed files with 178 additions and 572 deletions Side-by-side Diff

... ... @@ -5587,19 +5587,97 @@
5587 5587 return ret;
5588 5588 }
5589 5589  
  5590 +/*
  5591 + * ocfs2_reserve_blocks_for_rec_trunc() would look basically the
  5592 + * same as ocfs2_lock_alloctors(), except for it accepts a blocks
  5593 + * number to reserve some extra blocks, and it only handles meta
  5594 + * data allocations.
  5595 + *
  5596 + * Currently, only ocfs2_remove_btree_range() uses it for truncating
  5597 + * and punching holes.
  5598 + */
  5599 +static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode,
  5600 + struct ocfs2_extent_tree *et,
  5601 + u32 extents_to_split,
  5602 + struct ocfs2_alloc_context **ac,
  5603 + int extra_blocks)
  5604 +{
  5605 + int ret = 0, num_free_extents;
  5606 + unsigned int max_recs_needed = 2 * extents_to_split;
  5607 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  5608 +
  5609 + *ac = NULL;
  5610 +
  5611 + num_free_extents = ocfs2_num_free_extents(osb, et);
  5612 + if (num_free_extents < 0) {
  5613 + ret = num_free_extents;
  5614 + mlog_errno(ret);
  5615 + goto out;
  5616 + }
  5617 +
  5618 + if (!num_free_extents ||
  5619 + (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
  5620 + extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
  5621 +
  5622 + if (extra_blocks) {
  5623 + ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac);
  5624 + if (ret < 0) {
  5625 + if (ret != -ENOSPC)
  5626 + mlog_errno(ret);
  5627 + goto out;
  5628 + }
  5629 + }
  5630 +
  5631 +out:
  5632 + if (ret) {
  5633 + if (*ac) {
  5634 + ocfs2_free_alloc_context(*ac);
  5635 + *ac = NULL;
  5636 + }
  5637 + }
  5638 +
  5639 + return ret;
  5640 +}
  5641 +
5590 5642 int ocfs2_remove_btree_range(struct inode *inode,
5591 5643 struct ocfs2_extent_tree *et,
5592   - u32 cpos, u32 phys_cpos, u32 len,
5593   - struct ocfs2_cached_dealloc_ctxt *dealloc)
  5644 + u32 cpos, u32 phys_cpos, u32 len, int flags,
  5645 + struct ocfs2_cached_dealloc_ctxt *dealloc,
  5646 + u64 refcount_loc)
5594 5647 {
5595   - int ret;
  5648 + int ret, credits = 0, extra_blocks = 0;
5596 5649 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
5597 5650 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5598 5651 struct inode *tl_inode = osb->osb_tl_inode;
5599 5652 handle_t *handle;
5600 5653 struct ocfs2_alloc_context *meta_ac = NULL;
  5654 + struct ocfs2_refcount_tree *ref_tree = NULL;
5601 5655  
5602   - ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac);
  5656 + if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
  5657 + BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
  5658 + OCFS2_HAS_REFCOUNT_FL));
  5659 +
  5660 + ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
  5661 + &ref_tree, NULL);
  5662 + if (ret) {
  5663 + mlog_errno(ret);
  5664 + goto out;
  5665 + }
  5666 +
  5667 + ret = ocfs2_prepare_refcount_change_for_del(inode,
  5668 + refcount_loc,
  5669 + phys_blkno,
  5670 + len,
  5671 + &credits,
  5672 + &extra_blocks);
  5673 + if (ret < 0) {
  5674 + mlog_errno(ret);
  5675 + goto out;
  5676 + }
  5677 + }
  5678 +
  5679 + ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac,
  5680 + extra_blocks);
5603 5681 if (ret) {
5604 5682 mlog_errno(ret);
5605 5683 return ret;
... ... @@ -5615,7 +5693,8 @@
5615 5693 }
5616 5694 }
5617 5695  
5618   - handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
  5696 + handle = ocfs2_start_trans(osb,
  5697 + ocfs2_remove_extent_credits(osb->sb) + credits);
5619 5698 if (IS_ERR(handle)) {
5620 5699 ret = PTR_ERR(handle);
5621 5700 mlog_errno(ret);
5622 5701  
... ... @@ -5642,10 +5721,21 @@
5642 5721  
5643 5722 ocfs2_journal_dirty(handle, et->et_root_bh);
5644 5723  
5645   - ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
5646   - if (ret)
5647   - mlog_errno(ret);
  5724 + if (phys_blkno) {
  5725 + if (flags & OCFS2_EXT_REFCOUNTED)
  5726 + ret = ocfs2_decrease_refcount(inode, handle,
  5727 + ocfs2_blocks_to_clusters(osb->sb,
  5728 + phys_blkno),
  5729 + len, meta_ac,
  5730 + dealloc, 1);
  5731 + else
  5732 + ret = ocfs2_truncate_log_append(osb, handle,
  5733 + phys_blkno, len);
  5734 + if (ret)
  5735 + mlog_errno(ret);
5648 5736  
  5737 + }
  5738 +
5649 5739 out_commit:
5650 5740 ocfs2_commit_trans(osb, handle);
5651 5741 out:
... ... @@ -5654,6 +5744,9 @@
5654 5744 if (meta_ac)
5655 5745 ocfs2_free_alloc_context(meta_ac);
5656 5746  
  5747 + if (ref_tree)
  5748 + ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
  5749 +
5657 5750 return ret;
5658 5751 }
5659 5752  
... ... @@ -6481,417 +6574,6 @@
6481 6574 le16_to_cpu(eb->h_suballoc_bit));
6482 6575 }
6483 6576  
6484   -/* This function will figure out whether the currently last extent
6485   - * block will be deleted, and if it will, what the new last extent
6486   - * block will be so we can update his h_next_leaf_blk field, as well
6487   - * as the dinodes i_last_eb_blk */
6488   -static int ocfs2_find_new_last_ext_blk(struct inode *inode,
6489   - unsigned int clusters_to_del,
6490   - struct ocfs2_path *path,
6491   - struct buffer_head **new_last_eb)
6492   -{
6493   - int next_free, ret = 0;
6494   - u32 cpos;
6495   - struct ocfs2_extent_rec *rec;
6496   - struct ocfs2_extent_block *eb;
6497   - struct ocfs2_extent_list *el;
6498   - struct buffer_head *bh = NULL;
6499   -
6500   - *new_last_eb = NULL;
6501   -
6502   - /* we have no tree, so of course, no last_eb. */
6503   - if (!path->p_tree_depth)
6504   - goto out;
6505   -
6506   - /* trunc to zero special case - this makes tree_depth = 0
6507   - * regardless of what it is. */
6508   - if (OCFS2_I(inode)->ip_clusters == clusters_to_del)
6509   - goto out;
6510   -
6511   - el = path_leaf_el(path);
6512   - BUG_ON(!el->l_next_free_rec);
6513   -
6514   - /*
6515   - * Make sure that this extent list will actually be empty
6516   - * after we clear away the data. We can shortcut out if
6517   - * there's more than one non-empty extent in the
6518   - * list. Otherwise, a check of the remaining extent is
6519   - * necessary.
6520   - */
6521   - next_free = le16_to_cpu(el->l_next_free_rec);
6522   - rec = NULL;
6523   - if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6524   - if (next_free > 2)
6525   - goto out;
6526   -
6527   - /* We may have a valid extent in index 1, check it. */
6528   - if (next_free == 2)
6529   - rec = &el->l_recs[1];
6530   -
6531   - /*
6532   - * Fall through - no more nonempty extents, so we want
6533   - * to delete this leaf.
6534   - */
6535   - } else {
6536   - if (next_free > 1)
6537   - goto out;
6538   -
6539   - rec = &el->l_recs[0];
6540   - }
6541   -
6542   - if (rec) {
6543   - /*
6544   - * Check it we'll only be trimming off the end of this
6545   - * cluster.
6546   - */
6547   - if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del)
6548   - goto out;
6549   - }
6550   -
6551   - ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
6552   - if (ret) {
6553   - mlog_errno(ret);
6554   - goto out;
6555   - }
6556   -
6557   - ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh);
6558   - if (ret) {
6559   - mlog_errno(ret);
6560   - goto out;
6561   - }
6562   -
6563   - eb = (struct ocfs2_extent_block *) bh->b_data;
6564   - el = &eb->h_list;
6565   -
6566   - /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
6567   - * Any corruption is a code bug. */
6568   - BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
6569   -
6570   - *new_last_eb = bh;
6571   - get_bh(*new_last_eb);
6572   - mlog(0, "returning block %llu, (cpos: %u)\n",
6573   - (unsigned long long)le64_to_cpu(eb->h_blkno), cpos);
6574   -out:
6575   - brelse(bh);
6576   -
6577   - return ret;
6578   -}
6579   -
6580   -/*
6581   - * Trim some clusters off the rightmost edge of a tree. Only called
6582   - * during truncate.
6583   - *
6584   - * The caller needs to:
6585   - * - start journaling of each path component.
6586   - * - compute and fully set up any new last ext block
6587   - */
6588   -static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6589   - handle_t *handle, struct ocfs2_truncate_context *tc,
6590   - u32 clusters_to_del, u64 *delete_start, u8 *flags)
6591   -{
6592   - int ret, i, index = path->p_tree_depth;
6593   - u32 new_edge = 0;
6594   - u64 deleted_eb = 0;
6595   - struct buffer_head *bh;
6596   - struct ocfs2_extent_list *el;
6597   - struct ocfs2_extent_rec *rec;
6598   -
6599   - *delete_start = 0;
6600   - *flags = 0;
6601   -
6602   - while (index >= 0) {
6603   - bh = path->p_node[index].bh;
6604   - el = path->p_node[index].el;
6605   -
6606   - mlog(0, "traveling tree (index = %d, block = %llu)\n",
6607   - index, (unsigned long long)bh->b_blocknr);
6608   -
6609   - BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
6610   -
6611   - if (index !=
6612   - (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) {
6613   - ocfs2_error(inode->i_sb,
6614   - "Inode %lu has invalid ext. block %llu",
6615   - inode->i_ino,
6616   - (unsigned long long)bh->b_blocknr);
6617   - ret = -EROFS;
6618   - goto out;
6619   - }
6620   -
6621   -find_tail_record:
6622   - i = le16_to_cpu(el->l_next_free_rec) - 1;
6623   - rec = &el->l_recs[i];
6624   -
6625   - mlog(0, "Extent list before: record %d: (%u, %u, %llu), "
6626   - "next = %u\n", i, le32_to_cpu(rec->e_cpos),
6627   - ocfs2_rec_clusters(el, rec),
6628   - (unsigned long long)le64_to_cpu(rec->e_blkno),
6629   - le16_to_cpu(el->l_next_free_rec));
6630   -
6631   - BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del);
6632   -
6633   - if (le16_to_cpu(el->l_tree_depth) == 0) {
6634   - /*
6635   - * If the leaf block contains a single empty
6636   - * extent and no records, we can just remove
6637   - * the block.
6638   - */
6639   - if (i == 0 && ocfs2_is_empty_extent(rec)) {
6640   - memset(rec, 0,
6641   - sizeof(struct ocfs2_extent_rec));
6642   - el->l_next_free_rec = cpu_to_le16(0);
6643   -
6644   - goto delete;
6645   - }
6646   -
6647   - /*
6648   - * Remove any empty extents by shifting things
6649   - * left. That should make life much easier on
6650   - * the code below. This condition is rare
6651   - * enough that we shouldn't see a performance
6652   - * hit.
6653   - */
6654   - if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6655   - le16_add_cpu(&el->l_next_free_rec, -1);
6656   -
6657   - for(i = 0;
6658   - i < le16_to_cpu(el->l_next_free_rec); i++)
6659   - el->l_recs[i] = el->l_recs[i + 1];
6660   -
6661   - memset(&el->l_recs[i], 0,
6662   - sizeof(struct ocfs2_extent_rec));
6663   -
6664   - /*
6665   - * We've modified our extent list. The
6666   - * simplest way to handle this change
6667   - * is to being the search from the
6668   - * start again.
6669   - */
6670   - goto find_tail_record;
6671   - }
6672   -
6673   - le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del);
6674   -
6675   - /*
6676   - * We'll use "new_edge" on our way back up the
6677   - * tree to know what our rightmost cpos is.
6678   - */
6679   - new_edge = le16_to_cpu(rec->e_leaf_clusters);
6680   - new_edge += le32_to_cpu(rec->e_cpos);
6681   -
6682   - /*
6683   - * The caller will use this to delete data blocks.
6684   - */
6685   - *delete_start = le64_to_cpu(rec->e_blkno)
6686   - + ocfs2_clusters_to_blocks(inode->i_sb,
6687   - le16_to_cpu(rec->e_leaf_clusters));
6688   - *flags = rec->e_flags;
6689   -
6690   - /*
6691   - * If it's now empty, remove this record.
6692   - */
6693   - if (le16_to_cpu(rec->e_leaf_clusters) == 0) {
6694   - memset(rec, 0,
6695   - sizeof(struct ocfs2_extent_rec));
6696   - le16_add_cpu(&el->l_next_free_rec, -1);
6697   - }
6698   - } else {
6699   - if (le64_to_cpu(rec->e_blkno) == deleted_eb) {
6700   - memset(rec, 0,
6701   - sizeof(struct ocfs2_extent_rec));
6702   - le16_add_cpu(&el->l_next_free_rec, -1);
6703   -
6704   - goto delete;
6705   - }
6706   -
6707   - /* Can this actually happen? */
6708   - if (le16_to_cpu(el->l_next_free_rec) == 0)
6709   - goto delete;
6710   -
6711   - /*
6712   - * We never actually deleted any clusters
6713   - * because our leaf was empty. There's no
6714   - * reason to adjust the rightmost edge then.
6715   - */
6716   - if (new_edge == 0)
6717   - goto delete;
6718   -
6719   - rec->e_int_clusters = cpu_to_le32(new_edge);
6720   - le32_add_cpu(&rec->e_int_clusters,
6721   - -le32_to_cpu(rec->e_cpos));
6722   -
6723   - /*
6724   - * A deleted child record should have been
6725   - * caught above.
6726   - */
6727   - BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0);
6728   - }
6729   -
6730   -delete:
6731   - ocfs2_journal_dirty(handle, bh);
6732   -
6733   - mlog(0, "extent list container %llu, after: record %d: "
6734   - "(%u, %u, %llu), next = %u.\n",
6735   - (unsigned long long)bh->b_blocknr, i,
6736   - le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec),
6737   - (unsigned long long)le64_to_cpu(rec->e_blkno),
6738   - le16_to_cpu(el->l_next_free_rec));
6739   -
6740   - /*
6741   - * We must be careful to only attempt delete of an
6742   - * extent block (and not the root inode block).
6743   - */
6744   - if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) {
6745   - struct ocfs2_extent_block *eb =
6746   - (struct ocfs2_extent_block *)bh->b_data;
6747   -
6748   - /*
6749   - * Save this for use when processing the
6750   - * parent block.
6751   - */
6752   - deleted_eb = le64_to_cpu(eb->h_blkno);
6753   -
6754   - mlog(0, "deleting this extent block.\n");
6755   -
6756   - ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
6757   -
6758   - BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
6759   - BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
6760   - BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
6761   -
6762   - ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb);
6763   - /* An error here is not fatal. */
6764   - if (ret < 0)
6765   - mlog_errno(ret);
6766   - } else {
6767   - deleted_eb = 0;
6768   - }
6769   -
6770   - index--;
6771   - }
6772   -
6773   - ret = 0;
6774   -out:
6775   - return ret;
6776   -}
6777   -
6778   -static int ocfs2_do_truncate(struct ocfs2_super *osb,
6779   - unsigned int clusters_to_del,
6780   - struct inode *inode,
6781   - struct buffer_head *fe_bh,
6782   - handle_t *handle,
6783   - struct ocfs2_truncate_context *tc,
6784   - struct ocfs2_path *path,
6785   - struct ocfs2_alloc_context *meta_ac)
6786   -{
6787   - int status;
6788   - struct ocfs2_dinode *fe;
6789   - struct ocfs2_extent_block *last_eb = NULL;
6790   - struct ocfs2_extent_list *el;
6791   - struct buffer_head *last_eb_bh = NULL;
6792   - u64 delete_blk = 0;
6793   - u8 rec_flags;
6794   -
6795   - fe = (struct ocfs2_dinode *) fe_bh->b_data;
6796   -
6797   - status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del,
6798   - path, &last_eb_bh);
6799   - if (status < 0) {
6800   - mlog_errno(status);
6801   - goto bail;
6802   - }
6803   -
6804   - /*
6805   - * Each component will be touched, so we might as well journal
6806   - * here to avoid having to handle errors later.
6807   - */
6808   - status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
6809   - if (status < 0) {
6810   - mlog_errno(status);
6811   - goto bail;
6812   - }
6813   -
6814   - if (last_eb_bh) {
6815   - status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh,
6816   - OCFS2_JOURNAL_ACCESS_WRITE);
6817   - if (status < 0) {
6818   - mlog_errno(status);
6819   - goto bail;
6820   - }
6821   -
6822   - last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
6823   - }
6824   -
6825   - el = &(fe->id2.i_list);
6826   -
6827   - /*
6828   - * Lower levels depend on this never happening, but it's best
6829   - * to check it up here before changing the tree.
6830   - */
6831   - if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) {
6832   - ocfs2_error(inode->i_sb,
6833   - "Inode %lu has an empty extent record, depth %u\n",
6834   - inode->i_ino, le16_to_cpu(el->l_tree_depth));
6835   - status = -EROFS;
6836   - goto bail;
6837   - }
6838   -
6839   - dquot_free_space_nodirty(inode,
6840   - ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
6841   - spin_lock(&OCFS2_I(inode)->ip_lock);
6842   - OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
6843   - clusters_to_del;
6844   - spin_unlock(&OCFS2_I(inode)->ip_lock);
6845   - le32_add_cpu(&fe->i_clusters, -clusters_to_del);
6846   - inode->i_blocks = ocfs2_inode_sector_count(inode);
6847   -
6848   - status = ocfs2_trim_tree(inode, path, handle, tc,
6849   - clusters_to_del, &delete_blk, &rec_flags);
6850   - if (status) {
6851   - mlog_errno(status);
6852   - goto bail;
6853   - }
6854   -
6855   - if (le32_to_cpu(fe->i_clusters) == 0) {
6856   - /* trunc to zero is a special case. */
6857   - el->l_tree_depth = 0;
6858   - fe->i_last_eb_blk = 0;
6859   - } else if (last_eb)
6860   - fe->i_last_eb_blk = last_eb->h_blkno;
6861   -
6862   - ocfs2_journal_dirty(handle, fe_bh);
6863   -
6864   - if (last_eb) {
6865   - /* If there will be a new last extent block, then by
6866   - * definition, there cannot be any leaves to the right of
6867   - * him. */
6868   - last_eb->h_next_leaf_blk = 0;
6869   - ocfs2_journal_dirty(handle, last_eb_bh);
6870   - }
6871   -
6872   - if (delete_blk) {
6873   - if (rec_flags & OCFS2_EXT_REFCOUNTED)
6874   - status = ocfs2_decrease_refcount(inode, handle,
6875   - ocfs2_blocks_to_clusters(osb->sb,
6876   - delete_blk),
6877   - clusters_to_del, meta_ac,
6878   - &tc->tc_dealloc, 1);
6879   - else
6880   - status = ocfs2_truncate_log_append(osb, handle,
6881   - delete_blk,
6882   - clusters_to_del);
6883   - if (status < 0) {
6884   - mlog_errno(status);
6885   - goto bail;
6886   - }
6887   - }
6888   - status = 0;
6889   -bail:
6890   - brelse(last_eb_bh);
6891   - mlog_exit(status);
6892   - return status;
6893   -}
6894   -
6895 6577 static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
6896 6578 {
6897 6579 set_buffer_uptodate(bh);
6898 6580  
6899 6581  
6900 6582  
6901 6583  
6902 6584  
... ... @@ -7300,26 +6982,29 @@
7300 6982 */
7301 6983 int ocfs2_commit_truncate(struct ocfs2_super *osb,
7302 6984 struct inode *inode,
7303   - struct buffer_head *fe_bh,
7304   - struct ocfs2_truncate_context *tc)
  6985 + struct buffer_head *di_bh)
7305 6986 {
7306   - int status, i, credits, tl_sem = 0;
7307   - u32 clusters_to_del, new_highest_cpos, range;
  6987 + int status = 0, i, flags = 0;
  6988 + u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff;
7308 6989 u64 blkno = 0;
7309 6990 struct ocfs2_extent_list *el;
7310   - handle_t *handle = NULL;
7311   - struct inode *tl_inode = osb->osb_tl_inode;
  6991 + struct ocfs2_extent_rec *rec;
7312 6992 struct ocfs2_path *path = NULL;
7313   - struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
7314   - struct ocfs2_alloc_context *meta_ac = NULL;
7315   - struct ocfs2_refcount_tree *ref_tree = NULL;
  6993 + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  6994 + struct ocfs2_extent_list *root_el = &(di->id2.i_list);
  6995 + u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
  6996 + struct ocfs2_extent_tree et;
  6997 + struct ocfs2_cached_dealloc_ctxt dealloc;
7316 6998  
7317 6999 mlog_entry_void();
7318 7000  
  7001 + ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
  7002 + ocfs2_init_dealloc_ctxt(&dealloc);
  7003 +
7319 7004 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
7320 7005 i_size_read(inode));
7321 7006  
7322   - path = ocfs2_new_path(fe_bh, &di->id2.i_list,
  7007 + path = ocfs2_new_path(di_bh, &di->id2.i_list,
7323 7008 ocfs2_journal_access_di);
7324 7009 if (!path) {
7325 7010 status = -ENOMEM;
... ... @@ -7338,8 +7023,6 @@
7338 7023 goto bail;
7339 7024 }
7340 7025  
7341   - credits = 0;
7342   -
7343 7026 /*
7344 7027 * Truncate always works against the rightmost tree branch.
7345 7028 */
7346 7029  
7347 7030  
7348 7031  
7349 7032  
7350 7033  
7351 7034  
... ... @@ -7374,101 +7057,62 @@
7374 7057 }
7375 7058  
7376 7059 i = le16_to_cpu(el->l_next_free_rec) - 1;
7377   - range = le32_to_cpu(el->l_recs[i].e_cpos) +
7378   - ocfs2_rec_clusters(el, &el->l_recs[i]);
7379   - if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) {
7380   - clusters_to_del = 0;
7381   - } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) {
7382   - clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]);
7383   - blkno = le64_to_cpu(el->l_recs[i].e_blkno);
  7060 + rec = &el->l_recs[i];
  7061 + flags = rec->e_flags;
  7062 + range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
  7063 +
  7064 + if (i == 0 && ocfs2_is_empty_extent(rec)) {
  7065 + /*
  7066 + * Lower levels depend on this never happening, but it's best
  7067 + * to check it up here before changing the tree.
  7068 + */
  7069 + if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
  7070 + ocfs2_error(inode->i_sb, "Inode %lu has an empty "
  7071 + "extent record, depth %u\n", inode->i_ino,
  7072 + le16_to_cpu(root_el->l_tree_depth));
  7073 + status = -EROFS;
  7074 + goto bail;
  7075 + }
  7076 + trunc_cpos = le32_to_cpu(rec->e_cpos);
  7077 + trunc_len = 0;
  7078 + blkno = 0;
  7079 + } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) {
  7080 + /*
  7081 + * Truncate entire record.
  7082 + */
  7083 + trunc_cpos = le32_to_cpu(rec->e_cpos);
  7084 + trunc_len = ocfs2_rec_clusters(el, rec);
  7085 + blkno = le64_to_cpu(rec->e_blkno);
7384 7086 } else if (range > new_highest_cpos) {
7385   - clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) +
7386   - le32_to_cpu(el->l_recs[i].e_cpos)) -
7387   - new_highest_cpos;
7388   - blkno = le64_to_cpu(el->l_recs[i].e_blkno) +
7389   - ocfs2_clusters_to_blocks(inode->i_sb,
7390   - ocfs2_rec_clusters(el, &el->l_recs[i]) -
7391   - clusters_to_del);
  7087 + /*
  7088 + * Partial truncate. it also should be
  7089 + * the last truncate we're doing.
  7090 + */
  7091 + trunc_cpos = new_highest_cpos;
  7092 + trunc_len = range - new_highest_cpos;
  7093 + coff = new_highest_cpos - le32_to_cpu(rec->e_cpos);
  7094 + blkno = le64_to_cpu(rec->e_blkno) +
  7095 + ocfs2_clusters_to_blocks(inode->i_sb, coff);
7392 7096 } else {
  7097 + /*
  7098 + * Truncate completed, leave happily.
  7099 + */
7393 7100 status = 0;
7394 7101 goto bail;
7395 7102 }
7396 7103  
7397   - mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n",
7398   - clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
  7104 + phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
7399 7105  
7400   - if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
7401   - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
7402   - OCFS2_HAS_REFCOUNT_FL));
7403   -
7404   - status = ocfs2_lock_refcount_tree(osb,
7405   - le64_to_cpu(di->i_refcount_loc),
7406   - 1, &ref_tree, NULL);
7407   - if (status) {
7408   - mlog_errno(status);
7409   - goto bail;
7410   - }
7411   -
7412   - status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
7413   - blkno,
7414   - clusters_to_del,
7415   - &credits,
7416   - &meta_ac);
7417   - if (status < 0) {
7418   - mlog_errno(status);
7419   - goto bail;
7420   - }
7421   - }
7422   -
7423   - mutex_lock(&tl_inode->i_mutex);
7424   - tl_sem = 1;
7425   - /* ocfs2_truncate_log_needs_flush guarantees us at least one
7426   - * record is free for use. If there isn't any, we flush to get
7427   - * an empty truncate log. */
7428   - if (ocfs2_truncate_log_needs_flush(osb)) {
7429   - status = __ocfs2_flush_truncate_log(osb);
7430   - if (status < 0) {
7431   - mlog_errno(status);
7432   - goto bail;
7433   - }
7434   - }
7435   -
7436   - credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
7437   - (struct ocfs2_dinode *)fe_bh->b_data,
7438   - el);
7439   - handle = ocfs2_start_trans(osb, credits);
7440   - if (IS_ERR(handle)) {
7441   - status = PTR_ERR(handle);
7442   - handle = NULL;
7443   - mlog_errno(status);
7444   - goto bail;
7445   - }
7446   -
7447   - status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
7448   - tc, path, meta_ac);
  7106 + status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
  7107 + phys_cpos, trunc_len, flags, &dealloc,
  7108 + refcount_loc);
7449 7109 if (status < 0) {
7450 7110 mlog_errno(status);
7451 7111 goto bail;
7452 7112 }
7453 7113  
7454   - mutex_unlock(&tl_inode->i_mutex);
7455   - tl_sem = 0;
7456   -
7457   - ocfs2_commit_trans(osb, handle);
7458   - handle = NULL;
7459   -
7460 7114 ocfs2_reinit_path(path, 1);
7461 7115  
7462   - if (meta_ac) {
7463   - ocfs2_free_alloc_context(meta_ac);
7464   - meta_ac = NULL;
7465   - }
7466   -
7467   - if (ref_tree) {
7468   - ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7469   - ref_tree = NULL;
7470   - }
7471   -
7472 7116 /*
7473 7117 * The check above will catch the case where we've truncated
7474 7118 * away all allocation.
7475 7119  
7476 7120  
... ... @@ -7479,24 +7123,9 @@
7479 7123  
7480 7124 ocfs2_schedule_truncate_log_flush(osb, 1);
7481 7125  
7482   - if (tl_sem)
7483   - mutex_unlock(&tl_inode->i_mutex);
  7126 + ocfs2_run_deallocs(osb, &dealloc);
7484 7127  
7485   - if (handle)
7486   - ocfs2_commit_trans(osb, handle);
7487   -
7488   - if (meta_ac)
7489   - ocfs2_free_alloc_context(meta_ac);
7490   -
7491   - if (ref_tree)
7492   - ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7493   -
7494   - ocfs2_run_deallocs(osb, &tc->tc_dealloc);
7495   -
7496 7128 ocfs2_free_path(path);
7497   -
7498   - /* This will drop the ext_alloc cluster lock for us */
7499   - ocfs2_free_truncate_context(tc);
7500 7129  
7501 7130 mlog_exit(status);
7502 7131 return status;
... ... @@ -140,8 +140,9 @@
140 140 struct ocfs2_cached_dealloc_ctxt *dealloc);
141 141 int ocfs2_remove_btree_range(struct inode *inode,
142 142 struct ocfs2_extent_tree *et,
143   - u32 cpos, u32 phys_cpos, u32 len,
144   - struct ocfs2_cached_dealloc_ctxt *dealloc);
  143 + u32 cpos, u32 phys_cpos, u32 len, int flags,
  144 + struct ocfs2_cached_dealloc_ctxt *dealloc,
  145 + u64 refcount_loc);
145 146  
146 147 int ocfs2_num_free_extents(struct ocfs2_super *osb,
147 148 struct ocfs2_extent_tree *et);
... ... @@ -233,8 +234,7 @@
233 234 struct ocfs2_truncate_context **tc);
234 235 int ocfs2_commit_truncate(struct ocfs2_super *osb,
235 236 struct inode *inode,
236   - struct buffer_head *fe_bh,
237   - struct ocfs2_truncate_context *tc);
  237 + struct buffer_head *di_bh);
238 238 int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
239 239 unsigned int start, unsigned int end, int trunc);
240 240  
... ... @@ -4526,8 +4526,8 @@
4526 4526  
4527 4527 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
4528 4528  
4529   - ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen,
4530   - &dealloc);
  4529 + ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
  4530 + &dealloc, 0);
4531 4531 if (ret) {
4532 4532 mlog_errno(ret);
4533 4533 goto out;
... ... @@ -444,7 +444,6 @@
444 444 int status = 0;
445 445 struct ocfs2_dinode *fe = NULL;
446 446 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
447   - struct ocfs2_truncate_context *tc = NULL;
448 447  
449 448 mlog_entry("(inode = %llu, new_i_size = %llu\n",
450 449 (unsigned long long)OCFS2_I(inode)->ip_blkno,
451 450  
... ... @@ -515,18 +514,12 @@
515 514 goto bail_unlock_sem;
516 515 }
517 516  
518   - status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
  517 + status = ocfs2_commit_truncate(osb, inode, di_bh);
519 518 if (status < 0) {
520 519 mlog_errno(status);
521 520 goto bail_unlock_sem;
522 521 }
523 522  
524   - status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
525   - if (status < 0) {
526   - mlog_errno(status);
527   - goto bail_unlock_sem;
528   - }
529   -
530 523 /* TODO: orphan dir cleanup here. */
531 524 bail_unlock_sem:
532 525 up_write(&OCFS2_I(inode)->ip_alloc_sem);
... ... @@ -1494,7 +1487,7 @@
1494 1487 if (phys_cpos != 0) {
1495 1488 ret = ocfs2_remove_btree_range(inode, &et, cpos,
1496 1489 phys_cpos, alloc_size,
1497   - &dealloc);
  1490 + 0, &dealloc, 0);
1498 1491 if (ret) {
1499 1492 mlog_errno(ret);
1500 1493 goto out;
... ... @@ -544,7 +544,6 @@
544 544 struct buffer_head *fe_bh)
545 545 {
546 546 int status = 0;
547   - struct ocfs2_truncate_context *tc = NULL;
548 547 struct ocfs2_dinode *fe;
549 548 handle_t *handle = NULL;
550 549  
... ... @@ -586,13 +585,7 @@
586 585 ocfs2_commit_trans(osb, handle);
587 586 handle = NULL;
588 587  
589   - status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
590   - if (status < 0) {
591   - mlog_errno(status);
592   - goto out;
593   - }
594   -
595   - status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
  588 + status = ocfs2_commit_truncate(osb, inode, fe_bh);
596 589 if (status < 0) {
597 590 mlog_errno(status);
598 591 goto out;
fs/ocfs2/refcounttree.c
... ... @@ -2509,20 +2509,19 @@
2509 2509 *
2510 2510 * Normally the refcount blocks store these refcount should be
2511 2511 * contiguous also, so that we can get the number easily.
2512   - * As for meta_ac, we will at most add split 2 refcount record and
2513   - * 2 more refcount block, so just check it in a rough way.
  2512 + * We will at most add split 2 refcount records and 2 more
  2513 + * refcount blocks, so just check it in a rough way.
2514 2514 *
2515 2515 * Caller must hold refcount tree lock.
2516 2516 */
2517 2517 int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2518   - struct buffer_head *di_bh,
  2518 + u64 refcount_loc,
2519 2519 u64 phys_blkno,
2520 2520 u32 clusters,
2521 2521 int *credits,
2522   - struct ocfs2_alloc_context **meta_ac)
  2522 + int *ref_blocks)
2523 2523 {
2524   - int ret, ref_blocks = 0;
2525   - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  2524 + int ret;
2526 2525 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2527 2526 struct buffer_head *ref_root_bh = NULL;
2528 2527 struct ocfs2_refcount_tree *tree;
2529 2528  
... ... @@ -2539,14 +2538,13 @@
2539 2538 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2540 2539  
2541 2540 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
2542   - le64_to_cpu(di->i_refcount_loc), &tree);
  2541 + refcount_loc, &tree);
2543 2542 if (ret) {
2544 2543 mlog_errno(ret);
2545 2544 goto out;
2546 2545 }
2547 2546  
2548   - ret = ocfs2_read_refcount_block(&tree->rf_ci,
2549   - le64_to_cpu(di->i_refcount_loc),
  2547 + ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
2550 2548 &ref_root_bh);
2551 2549 if (ret) {
2552 2550 mlog_errno(ret);
2553 2551  
... ... @@ -2557,21 +2555,14 @@
2557 2555 &tree->rf_ci,
2558 2556 ref_root_bh,
2559 2557 start_cpos, clusters,
2560   - &ref_blocks, credits);
  2558 + ref_blocks, credits);
2561 2559 if (ret) {
2562 2560 mlog_errno(ret);
2563 2561 goto out;
2564 2562 }
2565 2563  
2566   - mlog(0, "reserve new metadata %d, credits = %d\n",
2567   - ref_blocks, *credits);
2568   -
2569   - if (ref_blocks) {
2570   - ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2571   - ref_blocks, meta_ac);
2572   - if (ret)
2573   - mlog_errno(ret);
2574   - }
  2564 + mlog(0, "reserve new metadata %d blocks, credits = %d\n",
  2565 + *ref_blocks, *credits);
2575 2566  
2576 2567 out:
2577 2568 brelse(ref_root_bh);
fs/ocfs2/refcounttree.h
... ... @@ -47,11 +47,11 @@
47 47 struct ocfs2_cached_dealloc_ctxt *dealloc,
48 48 int delete);
49 49 int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
50   - struct buffer_head *di_bh,
  50 + u64 refcount_loc,
51 51 u64 phys_blkno,
52 52 u32 clusters,
53 53 int *credits,
54   - struct ocfs2_alloc_context **meta_ac);
  54 + int *ref_blocks);
55 55 int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
56 56 u32 cpos, u32 write_len, u32 max_cpos);
57 57