Commit 38a04e432768ec0b016f3c687b4de31ac111ae59
Committed by
Joel Becker
1 parent
56f3f55cf9
Exists in
master
and in
7 other branches
ocfs2: Find proper end cpos for a leaf refcount block.
ocfs2 refcount tree is stored as an extent tree while the leaf ocfs2_refcount_rec points to a refcount block. The following step can trip a kernel panic. mkfs.ocfs2 -b 512 -C 1M --fs-features=refcount $DEVICE mount -t ocfs2 $DEVICE $MNT_DIR FILE_NAME=$RANDOM FILE_NAME_1=$RANDOM FILE_REF="${FILE_NAME}_ref" FILE_REF_1="${FILE_NAME}_ref_1" for((i=0;i<305;i++)) do # /mnt/1048576 is a file with 1048576 sizes. cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1 done for((i=0;i<3;i++)) do cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME done for((i=0;i<2;i++)) do cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1 done cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME for((i=0;i<11;i++)) do cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME cat /mnt/1048576 >> $MNT_DIR/$FILE_NAME_1 done reflink $MNT_DIR/$FILE_NAME $MNT_DIR/$FILE_REF # write_f is a program which will write some bytes to a file at offset. # write_f -f file_name -l offset -w write_bytes. ./write_f -f $MNT_DIR/$FILE_REF -l $[310*1048576] -w 4096 ./write_f -f $MNT_DIR/$FILE_REF -l $[306*1048576] -w 4096 ./write_f -f $MNT_DIR/$FILE_REF -l $[311*1048576] -w 4096 ./write_f -f $MNT_DIR/$FILE_NAME -l $[310*1048576] -w 4096 ./write_f -f $MNT_DIR/$FILE_NAME -l $[311*1048576] -w 4096 reflink $MNT_DIR/$FILE_NAME $MNT_DIR/$FILE_REF_1 ./write_f -f $MNT_DIR/$FILE_NAME -l $[311*1048576] -w 4096 #kernel panic here. The reason is that if the ocfs2_extent_rec is the last record in a leaf extent block, the old solution fails to find the suitable end cpos. So this patch try to walk through the b-tree, find the next sub root and get the c_pos the next sub-tree starts from. btw, I have runned tristan's test case against the patched kernel for several days and this type of kernel panic never happens again. Signed-off-by: Tao Ma <tao.ma@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Showing 3 changed files with 119 additions and 13 deletions Side-by-side Diff
fs/ocfs2/alloc.c
... | ... | @@ -1765,9 +1765,9 @@ |
1765 | 1765 | * |
1766 | 1766 | * The array index of the subtree root is passed back. |
1767 | 1767 | */ |
1768 | -static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, | |
1769 | - struct ocfs2_path *left, | |
1770 | - struct ocfs2_path *right) | |
1768 | +int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, | |
1769 | + struct ocfs2_path *left, | |
1770 | + struct ocfs2_path *right) | |
1771 | 1771 | { |
1772 | 1772 | int i = 0; |
1773 | 1773 | |
... | ... | @@ -2872,8 +2872,8 @@ |
2872 | 2872 | * This looks similar, but is subtly different to |
2873 | 2873 | * ocfs2_find_cpos_for_left_leaf(). |
2874 | 2874 | */ |
2875 | -static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, | |
2876 | - struct ocfs2_path *path, u32 *cpos) | |
2875 | +int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, | |
2876 | + struct ocfs2_path *path, u32 *cpos) | |
2877 | 2877 | { |
2878 | 2878 | int i, j, ret = 0; |
2879 | 2879 | u64 blkno; |
fs/ocfs2/alloc.h
... | ... | @@ -317,5 +317,10 @@ |
317 | 317 | int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, |
318 | 318 | handle_t *handle, |
319 | 319 | struct ocfs2_path *path); |
320 | +int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, | |
321 | + struct ocfs2_path *path, u32 *cpos); | |
322 | +int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, | |
323 | + struct ocfs2_path *left, | |
324 | + struct ocfs2_path *right); | |
320 | 325 | #endif /* OCFS2_ALLOC_H */ |
fs/ocfs2/refcounttree.c
... | ... | @@ -969,6 +969,103 @@ |
969 | 969 | } |
970 | 970 | |
971 | 971 | /* |
972 | + * Find the end range for a leaf refcount block indicated by | |
973 | + * el->l_recs[index].e_blkno. | |
974 | + */ | |
975 | +static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci, | |
976 | + struct buffer_head *ref_root_bh, | |
977 | + struct ocfs2_extent_block *eb, | |
978 | + struct ocfs2_extent_list *el, | |
979 | + int index, u32 *cpos_end) | |
980 | +{ | |
981 | + int ret, i, subtree_root; | |
982 | + u32 cpos; | |
983 | + u64 blkno; | |
984 | + struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | |
985 | + struct ocfs2_path *left_path = NULL, *right_path = NULL; | |
986 | + struct ocfs2_extent_tree et; | |
987 | + struct ocfs2_extent_list *tmp_el; | |
988 | + | |
989 | + if (index < le16_to_cpu(el->l_next_free_rec) - 1) { | |
990 | + /* | |
991 | + * We have a extent rec after index, so just use the e_cpos | |
992 | + * of the next extent rec. | |
993 | + */ | |
994 | + *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos); | |
995 | + return 0; | |
996 | + } | |
997 | + | |
998 | + if (!eb || (eb && !eb->h_next_leaf_blk)) { | |
999 | + /* | |
1000 | + * We are the last extent rec, so any high cpos should | |
1001 | + * be stored in this leaf refcount block. | |
1002 | + */ | |
1003 | + *cpos_end = UINT_MAX; | |
1004 | + return 0; | |
1005 | + } | |
1006 | + | |
1007 | + /* | |
1008 | + * If the extent block isn't the last one, we have to find | |
1009 | + * the subtree root between this extent block and the next | |
1010 | + * leaf extent block and get the corresponding e_cpos from | |
1011 | + * the subroot. Otherwise we may corrupt the b-tree. | |
1012 | + */ | |
1013 | + ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); | |
1014 | + | |
1015 | + left_path = ocfs2_new_path_from_et(&et); | |
1016 | + if (!left_path) { | |
1017 | + ret = -ENOMEM; | |
1018 | + mlog_errno(ret); | |
1019 | + goto out; | |
1020 | + } | |
1021 | + | |
1022 | + cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos); | |
1023 | + ret = ocfs2_find_path(ci, left_path, cpos); | |
1024 | + if (ret) { | |
1025 | + mlog_errno(ret); | |
1026 | + goto out; | |
1027 | + } | |
1028 | + | |
1029 | + right_path = ocfs2_new_path_from_path(left_path); | |
1030 | + if (!right_path) { | |
1031 | + ret = -ENOMEM; | |
1032 | + mlog_errno(ret); | |
1033 | + goto out; | |
1034 | + } | |
1035 | + | |
1036 | + ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos); | |
1037 | + if (ret) { | |
1038 | + mlog_errno(ret); | |
1039 | + goto out; | |
1040 | + } | |
1041 | + | |
1042 | + ret = ocfs2_find_path(ci, right_path, cpos); | |
1043 | + if (ret) { | |
1044 | + mlog_errno(ret); | |
1045 | + goto out; | |
1046 | + } | |
1047 | + | |
1048 | + subtree_root = ocfs2_find_subtree_root(&et, left_path, | |
1049 | + right_path); | |
1050 | + | |
1051 | + tmp_el = left_path->p_node[subtree_root].el; | |
1052 | + blkno = left_path->p_node[subtree_root+1].bh->b_blocknr; | |
1053 | + for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) { | |
1054 | + if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) { | |
1055 | + *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos); | |
1056 | + break; | |
1057 | + } | |
1058 | + } | |
1059 | + | |
1060 | + BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec)); | |
1061 | + | |
1062 | +out: | |
1063 | + ocfs2_free_path(left_path); | |
1064 | + ocfs2_free_path(right_path); | |
1065 | + return ret; | |
1066 | +} | |
1067 | + | |
1068 | +/* | |
972 | 1069 | * Given a cpos and len, try to find the refcount record which contains cpos. |
973 | 1070 | * 1. If cpos can be found in one refcount record, return the record. |
974 | 1071 | * 2. If cpos can't be found, return a fake record which start from cpos |
975 | 1072 | |
... | ... | @@ -983,10 +1080,10 @@ |
983 | 1080 | struct buffer_head **ret_bh) |
984 | 1081 | { |
985 | 1082 | int ret = 0, i, found; |
986 | - u32 low_cpos; | |
1083 | + u32 low_cpos, uninitialized_var(cpos_end); | |
987 | 1084 | struct ocfs2_extent_list *el; |
988 | - struct ocfs2_extent_rec *tmp, *rec = NULL; | |
989 | - struct ocfs2_extent_block *eb; | |
1085 | + struct ocfs2_extent_rec *rec = NULL; | |
1086 | + struct ocfs2_extent_block *eb = NULL; | |
990 | 1087 | struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; |
991 | 1088 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); |
992 | 1089 | struct ocfs2_refcount_block *rb = |
993 | 1090 | |
... | ... | @@ -1034,12 +1131,16 @@ |
1034 | 1131 | } |
1035 | 1132 | } |
1036 | 1133 | |
1037 | - /* adjust len when we have ocfs2_extent_rec after it. */ | |
1038 | - if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { | |
1039 | - tmp = &el->l_recs[i+1]; | |
1134 | + if (found) { | |
1135 | + ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh, | |
1136 | + eb, el, i, &cpos_end); | |
1137 | + if (ret) { | |
1138 | + mlog_errno(ret); | |
1139 | + goto out; | |
1140 | + } | |
1040 | 1141 | |
1041 | - if (le32_to_cpu(tmp->e_cpos) < cpos + len) | |
1042 | - len = le32_to_cpu(tmp->e_cpos) - cpos; | |
1142 | + if (cpos_end < low_cpos + len) | |
1143 | + len = cpos_end - low_cpos; | |
1043 | 1144 | } |
1044 | 1145 | |
1045 | 1146 | ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), |