Commit 38a04e432768ec0b016f3c687b4de31ac111ae59

Authored by Tao Ma
Committed by Joel Becker
1 parent 56f3f55cf9

ocfs2: Find proper end cpos for a leaf refcount block.

ocfs2 refcount tree is stored as an extent tree while
the leaf ocfs2_refcount_rec points to a refcount block.

The following step can trip a kernel panic.
mkfs.ocfs2 -b 512 -C 1M --fs-features=refcount $DEVICE
mount -t ocfs2 $DEVICE $MNT_DIR
FILE_NAME=$RANDOM
FILE_NAME_1=$RANDOM
FILE_REF="${FILE_NAME}_ref"
FILE_REF_1="${FILE_NAME}_ref_1"
for((i=0;i<305;i++))
do
# /mnt/1048576 is a file with 1048576 sizes.
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1
done
for((i=0;i<3;i++))
do
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME
done

for((i=0;i<2;i++))
do
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1
done

cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME

for((i=0;i<11;i++))
do
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME
cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1
done
reflink $MNT_DIR/$FILE_NAME $MNT_DIR/$FILE_REF
# write_f is a program which will write some bytes to a file at offset.
# write_f -f file_name -l offset -w write_bytes.
./write_f -f $MNT_DIR/$FILE_REF -l $[310*1048576] -w 4096
./write_f -f $MNT_DIR/$FILE_REF -l $[306*1048576] -w 4096
./write_f -f $MNT_DIR/$FILE_REF -l $[311*1048576] -w 4096
./write_f -f $MNT_DIR/$FILE_NAME -l $[310*1048576] -w 4096
./write_f -f $MNT_DIR/$FILE_NAME -l $[311*1048576] -w 4096
reflink $MNT_DIR/$FILE_NAME $MNT_DIR/$FILE_REF_1
./write_f -f $MNT_DIR/$FILE_NAME -l $[311*1048576] -w 4096
#kernel panic here.

The reason is that if the ocfs2_extent_rec is the last record
in a leaf extent block, the old solution fails to find the
suitable end cpos. So this patch try to walk through the b-tree,
find the next sub root and get the c_pos the next sub-tree starts
from.

btw, I have runned tristan's test case against the patched kernel
for several days and this type of kernel panic never happens again.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

Showing 3 changed files with 119 additions and 13 deletions Side-by-side Diff

... ... @@ -1765,9 +1765,9 @@
1765 1765 *
1766 1766 * The array index of the subtree root is passed back.
1767 1767 */
1768   -static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
1769   - struct ocfs2_path *left,
1770   - struct ocfs2_path *right)
  1768 +int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
  1769 + struct ocfs2_path *left,
  1770 + struct ocfs2_path *right)
1771 1771 {
1772 1772 int i = 0;
1773 1773  
... ... @@ -2872,8 +2872,8 @@
2872 2872 * This looks similar, but is subtly different to
2873 2873 * ocfs2_find_cpos_for_left_leaf().
2874 2874 */
2875   -static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
2876   - struct ocfs2_path *path, u32 *cpos)
  2875 +int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
  2876 + struct ocfs2_path *path, u32 *cpos)
2877 2877 {
2878 2878 int i, j, ret = 0;
2879 2879 u64 blkno;
... ... @@ -317,5 +317,10 @@
317 317 int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
318 318 handle_t *handle,
319 319 struct ocfs2_path *path);
  320 +int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
  321 + struct ocfs2_path *path, u32 *cpos);
  322 +int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
  323 + struct ocfs2_path *left,
  324 + struct ocfs2_path *right);
320 325 #endif /* OCFS2_ALLOC_H */
fs/ocfs2/refcounttree.c
... ... @@ -969,6 +969,103 @@
969 969 }
970 970  
971 971 /*
  972 + * Find the end range for a leaf refcount block indicated by
  973 + * el->l_recs[index].e_blkno.
  974 + */
  975 +static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
  976 + struct buffer_head *ref_root_bh,
  977 + struct ocfs2_extent_block *eb,
  978 + struct ocfs2_extent_list *el,
  979 + int index, u32 *cpos_end)
  980 +{
  981 + int ret, i, subtree_root;
  982 + u32 cpos;
  983 + u64 blkno;
  984 + struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
  985 + struct ocfs2_path *left_path = NULL, *right_path = NULL;
  986 + struct ocfs2_extent_tree et;
  987 + struct ocfs2_extent_list *tmp_el;
  988 +
  989 + if (index < le16_to_cpu(el->l_next_free_rec) - 1) {
  990 + /*
  991 + * We have a extent rec after index, so just use the e_cpos
  992 + * of the next extent rec.
  993 + */
  994 + *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos);
  995 + return 0;
  996 + }
  997 +
  998 + if (!eb || (eb && !eb->h_next_leaf_blk)) {
  999 + /*
  1000 + * We are the last extent rec, so any high cpos should
  1001 + * be stored in this leaf refcount block.
  1002 + */
  1003 + *cpos_end = UINT_MAX;
  1004 + return 0;
  1005 + }
  1006 +
  1007 + /*
  1008 + * If the extent block isn't the last one, we have to find
  1009 + * the subtree root between this extent block and the next
  1010 + * leaf extent block and get the corresponding e_cpos from
  1011 + * the subroot. Otherwise we may corrupt the b-tree.
  1012 + */
  1013 + ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
  1014 +
  1015 + left_path = ocfs2_new_path_from_et(&et);
  1016 + if (!left_path) {
  1017 + ret = -ENOMEM;
  1018 + mlog_errno(ret);
  1019 + goto out;
  1020 + }
  1021 +
  1022 + cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos);
  1023 + ret = ocfs2_find_path(ci, left_path, cpos);
  1024 + if (ret) {
  1025 + mlog_errno(ret);
  1026 + goto out;
  1027 + }
  1028 +
  1029 + right_path = ocfs2_new_path_from_path(left_path);
  1030 + if (!right_path) {
  1031 + ret = -ENOMEM;
  1032 + mlog_errno(ret);
  1033 + goto out;
  1034 + }
  1035 +
  1036 + ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos);
  1037 + if (ret) {
  1038 + mlog_errno(ret);
  1039 + goto out;
  1040 + }
  1041 +
  1042 + ret = ocfs2_find_path(ci, right_path, cpos);
  1043 + if (ret) {
  1044 + mlog_errno(ret);
  1045 + goto out;
  1046 + }
  1047 +
  1048 + subtree_root = ocfs2_find_subtree_root(&et, left_path,
  1049 + right_path);
  1050 +
  1051 + tmp_el = left_path->p_node[subtree_root].el;
  1052 + blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
  1053 + for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) {
  1054 + if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
  1055 + *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
  1056 + break;
  1057 + }
  1058 + }
  1059 +
  1060 + BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec));
  1061 +
  1062 +out:
  1063 + ocfs2_free_path(left_path);
  1064 + ocfs2_free_path(right_path);
  1065 + return ret;
  1066 +}
  1067 +
  1068 +/*
972 1069 * Given a cpos and len, try to find the refcount record which contains cpos.
973 1070 * 1. If cpos can be found in one refcount record, return the record.
974 1071 * 2. If cpos can't be found, return a fake record which start from cpos
975 1072  
... ... @@ -983,10 +1080,10 @@
983 1080 struct buffer_head **ret_bh)
984 1081 {
985 1082 int ret = 0, i, found;
986   - u32 low_cpos;
  1083 + u32 low_cpos, uninitialized_var(cpos_end);
987 1084 struct ocfs2_extent_list *el;
988   - struct ocfs2_extent_rec *tmp, *rec = NULL;
989   - struct ocfs2_extent_block *eb;
  1085 + struct ocfs2_extent_rec *rec = NULL;
  1086 + struct ocfs2_extent_block *eb = NULL;
990 1087 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
991 1088 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
992 1089 struct ocfs2_refcount_block *rb =
993 1090  
... ... @@ -1034,12 +1131,16 @@
1034 1131 }
1035 1132 }
1036 1133  
1037   - /* adjust len when we have ocfs2_extent_rec after it. */
1038   - if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) {
1039   - tmp = &el->l_recs[i+1];
  1134 + if (found) {
  1135 + ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh,
  1136 + eb, el, i, &cpos_end);
  1137 + if (ret) {
  1138 + mlog_errno(ret);
  1139 + goto out;
  1140 + }
1040 1141  
1041   - if (le32_to_cpu(tmp->e_cpos) < cpos + len)
1042   - len = le32_to_cpu(tmp->e_cpos) - cpos;
  1142 + if (cpos_end < low_cpos + len)
  1143 + len = cpos_end - low_cpos;
1043 1144 }
1044 1145  
1045 1146 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),