Commit 3c47d54170b6a678875566b1b8d6dcf57904e49b

Authored by Tao Ma
Committed by Theodore Ts'o
1 parent 978fef914a

ext4: let add_dir_entry handle inline data properly

This patch let add_dir_entry handle the inline data case. So the
dir is initialized as inline dir first and then we can try to add
some files to it, when the inline space can't hold all the entries,
a dir block will be created and the dir entry will be moved to it.

Also for an inlined dir, "." and ".." are removed and we only use
4 bytes to store the parent inode number. These 2 entries will be
added when we convert an inline dir to a block-based one.

[ Folded in patch from Dan Carpenter to remove an unused variable. ]

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

Showing 4 changed files with 430 additions and 10 deletions Side-by-side Diff

... ... @@ -1616,6 +1616,11 @@
1616 1616 __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */
1617 1617 };
1618 1618  
  1619 +#define EXT4_DIRENT_TAIL(block, blocksize) \
  1620 + ((struct ext4_dir_entry_tail *)(((void *)(block)) + \
  1621 + ((blocksize) - \
  1622 + sizeof(struct ext4_dir_entry_tail))))
  1623 +
1619 1624 /*
1620 1625 * Ext4 directory file types. Only the low 3 bits are used. The
1621 1626 * other bits are reserved for now.
... ... @@ -2435,6 +2440,11 @@
2435 2440 struct ext4_dir_entry_2 *de,
2436 2441 int blocksize, int csum_size,
2437 2442 unsigned int parent_ino, int dotdot_real_len);
  2443 +extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
  2444 + unsigned int blocksize);
  2445 +extern int ext4_handle_dirty_dirent_node(handle_t *handle,
  2446 + struct inode *inode,
  2447 + struct buffer_head *bh);
2438 2448  
2439 2449 /* symlink.c */
2440 2450 extern const struct inode_operations ext4_symlink_inode_operations;
... ... @@ -18,6 +18,7 @@
18 18  
19 19 #define EXT4_XATTR_SYSTEM_DATA "data"
20 20 #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
  21 +#define EXT4_INLINE_DOTDOT_SIZE 4
21 22  
22 23 int ext4_get_inline_size(struct inode *inode)
23 24 {
... ... @@ -947,6 +948,382 @@
947 948 mark_inode_dirty(inode);
948 949  
949 950 return copied;
  951 +}
  952 +
  953 +#ifdef INLINE_DIR_DEBUG
  954 +void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
  955 + void *inline_start, int inline_size)
  956 +{
  957 + int offset;
  958 + unsigned short de_len;
  959 + struct ext4_dir_entry_2 *de = inline_start;
  960 + void *dlimit = inline_start + inline_size;
  961 +
  962 + trace_printk("inode %lu\n", dir->i_ino);
  963 + offset = 0;
  964 + while ((void *)de < dlimit) {
  965 + de_len = ext4_rec_len_from_disk(de->rec_len, inline_size);
  966 + trace_printk("de: off %u rlen %u name %*.s nlen %u ino %u\n",
  967 + offset, de_len, de->name_len, de->name,
  968 + de->name_len, le32_to_cpu(de->inode));
  969 + if (ext4_check_dir_entry(dir, NULL, de, bh,
  970 + inline_start, inline_size, offset))
  971 + BUG();
  972 +
  973 + offset += de_len;
  974 + de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
  975 + }
  976 +}
  977 +#else
  978 +#define ext4_show_inline_dir(dir, bh, inline_start, inline_size)
  979 +#endif
  980 +
  981 +/*
  982 + * Add a new entry into a inline dir.
  983 + * It will return -ENOSPC if no space is available, and -EIO
  984 + * and -EEXIST if directory entry already exists.
  985 + */
  986 +static int ext4_add_dirent_to_inline(handle_t *handle,
  987 + struct dentry *dentry,
  988 + struct inode *inode,
  989 + struct ext4_iloc *iloc,
  990 + void *inline_start, int inline_size)
  991 +{
  992 + struct inode *dir = dentry->d_parent->d_inode;
  993 + const char *name = dentry->d_name.name;
  994 + int namelen = dentry->d_name.len;
  995 + unsigned short reclen;
  996 + int err;
  997 + struct ext4_dir_entry_2 *de;
  998 +
  999 + reclen = EXT4_DIR_REC_LEN(namelen);
  1000 + err = ext4_find_dest_de(dir, inode, iloc->bh,
  1001 + inline_start, inline_size,
  1002 + name, namelen, &de);
  1003 + if (err)
  1004 + return err;
  1005 +
  1006 + err = ext4_journal_get_write_access(handle, iloc->bh);
  1007 + if (err)
  1008 + return err;
  1009 + ext4_insert_dentry(inode, de, inline_size, name, namelen);
  1010 +
  1011 + ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
  1012 +
  1013 + /*
  1014 + * XXX shouldn't update any times until successful
  1015 + * completion of syscall, but too many callers depend
  1016 + * on this.
  1017 + *
  1018 + * XXX similarly, too many callers depend on
  1019 + * ext4_new_inode() setting the times, but error
  1020 + * recovery deletes the inode, so the worst that can
  1021 + * happen is that the times are slightly out of date
  1022 + * and/or different from the directory change time.
  1023 + */
  1024 + dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
  1025 + ext4_update_dx_flag(dir);
  1026 + dir->i_version++;
  1027 + ext4_mark_inode_dirty(handle, dir);
  1028 + return 1;
  1029 +}
  1030 +
  1031 +static void *ext4_get_inline_xattr_pos(struct inode *inode,
  1032 + struct ext4_iloc *iloc)
  1033 +{
  1034 + struct ext4_xattr_entry *entry;
  1035 + struct ext4_xattr_ibody_header *header;
  1036 +
  1037 + BUG_ON(!EXT4_I(inode)->i_inline_off);
  1038 +
  1039 + header = IHDR(inode, ext4_raw_inode(iloc));
  1040 + entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) +
  1041 + EXT4_I(inode)->i_inline_off);
  1042 +
  1043 + return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs);
  1044 +}
  1045 +
  1046 +/* Set the final de to cover the whole block. */
  1047 +static void ext4_update_final_de(void *de_buf, int old_size, int new_size)
  1048 +{
  1049 + struct ext4_dir_entry_2 *de, *prev_de;
  1050 + void *limit;
  1051 + int de_len;
  1052 +
  1053 + de = (struct ext4_dir_entry_2 *)de_buf;
  1054 + if (old_size) {
  1055 + limit = de_buf + old_size;
  1056 + do {
  1057 + prev_de = de;
  1058 + de_len = ext4_rec_len_from_disk(de->rec_len, old_size);
  1059 + de_buf += de_len;
  1060 + de = (struct ext4_dir_entry_2 *)de_buf;
  1061 + } while (de_buf < limit);
  1062 +
  1063 + prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size -
  1064 + old_size, new_size);
  1065 + } else {
  1066 + /* this is just created, so create an empty entry. */
  1067 + de->inode = 0;
  1068 + de->rec_len = ext4_rec_len_to_disk(new_size, new_size);
  1069 + }
  1070 +}
  1071 +
  1072 +static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
  1073 + struct ext4_iloc *iloc)
  1074 +{
  1075 + int ret;
  1076 + int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
  1077 + int new_size = get_max_inline_xattr_value_size(dir, iloc);
  1078 +
  1079 + if (new_size - old_size <= EXT4_DIR_REC_LEN(1))
  1080 + return -ENOSPC;
  1081 +
  1082 + ret = ext4_update_inline_data(handle, dir,
  1083 + new_size + EXT4_MIN_INLINE_DATA_SIZE);
  1084 + if (ret)
  1085 + return ret;
  1086 +
  1087 + ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size,
  1088 + EXT4_I(dir)->i_inline_size -
  1089 + EXT4_MIN_INLINE_DATA_SIZE);
  1090 + dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size;
  1091 + return 0;
  1092 +}
  1093 +
  1094 +static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
  1095 + struct ext4_iloc *iloc,
  1096 + void *buf, int inline_size)
  1097 +{
  1098 + ext4_create_inline_data(handle, inode, inline_size);
  1099 + ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
  1100 + ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
  1101 +}
  1102 +
  1103 +static int ext4_finish_convert_inline_dir(handle_t *handle,
  1104 + struct inode *inode,
  1105 + struct buffer_head *dir_block,
  1106 + void *buf,
  1107 + int inline_size)
  1108 +{
  1109 + int err, csum_size = 0, header_size = 0;
  1110 + struct ext4_dir_entry_2 *de;
  1111 + struct ext4_dir_entry_tail *t;
  1112 + void *target = dir_block->b_data;
  1113 +
  1114 + /*
  1115 + * First create "." and ".." and then copy the dir information
  1116 + * back to the block.
  1117 + */
  1118 + de = (struct ext4_dir_entry_2 *)target;
  1119 + de = ext4_init_dot_dotdot(inode, de,
  1120 + inode->i_sb->s_blocksize, csum_size,
  1121 + le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1);
  1122 + header_size = (void *)de - target;
  1123 +
  1124 + memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
  1125 + inline_size - EXT4_INLINE_DOTDOT_SIZE);
  1126 +
  1127 + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
  1128 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
  1129 + csum_size = sizeof(struct ext4_dir_entry_tail);
  1130 +
  1131 + inode->i_size = inode->i_sb->s_blocksize;
  1132 + i_size_write(inode, inode->i_sb->s_blocksize);
  1133 + EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
  1134 + ext4_update_final_de(dir_block->b_data,
  1135 + inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size,
  1136 + inode->i_sb->s_blocksize - csum_size);
  1137 +
  1138 + if (csum_size) {
  1139 + t = EXT4_DIRENT_TAIL(dir_block->b_data,
  1140 + inode->i_sb->s_blocksize);
  1141 + initialize_dirent_tail(t, inode->i_sb->s_blocksize);
  1142 + }
  1143 + set_buffer_uptodate(dir_block);
  1144 + err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
  1145 + if (err)
  1146 + goto out;
  1147 + set_buffer_verified(dir_block);
  1148 +out:
  1149 + return err;
  1150 +}
  1151 +
  1152 +static int ext4_convert_inline_data_nolock(handle_t *handle,
  1153 + struct inode *inode,
  1154 + struct ext4_iloc *iloc)
  1155 +{
  1156 + int error;
  1157 + void *buf = NULL;
  1158 + struct buffer_head *data_bh = NULL;
  1159 + struct ext4_map_blocks map;
  1160 + int inline_size;
  1161 +
  1162 + inline_size = ext4_get_inline_size(inode);
  1163 + buf = kmalloc(inline_size, GFP_NOFS);
  1164 + if (!buf) {
  1165 + error = -ENOMEM;
  1166 + goto out;
  1167 + }
  1168 +
  1169 + error = ext4_read_inline_data(inode, buf, inline_size, iloc);
  1170 + if (error < 0)
  1171 + goto out;
  1172 +
  1173 + error = ext4_destroy_inline_data_nolock(handle, inode);
  1174 + if (error)
  1175 + goto out;
  1176 +
  1177 + map.m_lblk = 0;
  1178 + map.m_len = 1;
  1179 + map.m_flags = 0;
  1180 + error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE);
  1181 + if (error < 0)
  1182 + goto out_restore;
  1183 + if (!(map.m_flags & EXT4_MAP_MAPPED)) {
  1184 + error = -EIO;
  1185 + goto out_restore;
  1186 + }
  1187 +
  1188 + data_bh = sb_getblk(inode->i_sb, map.m_pblk);
  1189 + if (!data_bh) {
  1190 + error = -EIO;
  1191 + goto out_restore;
  1192 + }
  1193 +
  1194 + lock_buffer(data_bh);
  1195 + error = ext4_journal_get_create_access(handle, data_bh);
  1196 + if (error) {
  1197 + unlock_buffer(data_bh);
  1198 + error = -EIO;
  1199 + goto out_restore;
  1200 + }
  1201 + memset(data_bh->b_data, 0, inode->i_sb->s_blocksize);
  1202 +
  1203 + if (!S_ISDIR(inode->i_mode)) {
  1204 + memcpy(data_bh->b_data, buf, inline_size);
  1205 + set_buffer_uptodate(data_bh);
  1206 + error = ext4_handle_dirty_metadata(handle,
  1207 + inode, data_bh);
  1208 + } else {
  1209 + error = ext4_finish_convert_inline_dir(handle, inode, data_bh,
  1210 + buf, inline_size);
  1211 + }
  1212 +
  1213 + unlock_buffer(data_bh);
  1214 +out_restore:
  1215 + if (error)
  1216 + ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
  1217 +
  1218 +out:
  1219 + brelse(data_bh);
  1220 + kfree(buf);
  1221 + return error;
  1222 +}
  1223 +
  1224 +/*
  1225 + * Try to add the new entry to the inline data.
  1226 + * If succeeds, return 0. If not, extended the inline dir and copied data to
  1227 + * the new created block.
  1228 + */
  1229 +int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
  1230 + struct inode *inode)
  1231 +{
  1232 + int ret, inline_size;
  1233 + void *inline_start;
  1234 + struct ext4_iloc iloc;
  1235 + struct inode *dir = dentry->d_parent->d_inode;
  1236 +
  1237 + ret = ext4_get_inode_loc(dir, &iloc);
  1238 + if (ret)
  1239 + return ret;
  1240 +
  1241 + down_write(&EXT4_I(dir)->xattr_sem);
  1242 + if (!ext4_has_inline_data(dir))
  1243 + goto out;
  1244 +
  1245 + inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
  1246 + EXT4_INLINE_DOTDOT_SIZE;
  1247 + inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
  1248 +
  1249 + ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
  1250 + inline_start, inline_size);
  1251 + if (ret != -ENOSPC)
  1252 + goto out;
  1253 +
  1254 + /* check whether it can be inserted to inline xattr space. */
  1255 + inline_size = EXT4_I(dir)->i_inline_size -
  1256 + EXT4_MIN_INLINE_DATA_SIZE;
  1257 + if (!inline_size) {
  1258 + /* Try to use the xattr space.*/
  1259 + ret = ext4_update_inline_dir(handle, dir, &iloc);
  1260 + if (ret && ret != -ENOSPC)
  1261 + goto out;
  1262 +
  1263 + inline_size = EXT4_I(dir)->i_inline_size -
  1264 + EXT4_MIN_INLINE_DATA_SIZE;
  1265 + }
  1266 +
  1267 + if (inline_size) {
  1268 + inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
  1269 +
  1270 + ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
  1271 + inline_start, inline_size);
  1272 +
  1273 + if (ret != -ENOSPC)
  1274 + goto out;
  1275 + }
  1276 +
  1277 + /*
  1278 + * The inline space is filled up, so create a new block for it.
  1279 + * As the extent tree will be created, we have to save the inline
  1280 + * dir first.
  1281 + */
  1282 + ret = ext4_convert_inline_data_nolock(handle, dir, &iloc);
  1283 +
  1284 +out:
  1285 + ext4_mark_inode_dirty(handle, dir);
  1286 + up_write(&EXT4_I(dir)->xattr_sem);
  1287 + brelse(iloc.bh);
  1288 + return ret;
  1289 +}
  1290 +
  1291 +/*
  1292 + * Try to create the inline data for the new dir.
  1293 + * If it succeeds, return 0, otherwise return the error.
  1294 + * In case of ENOSPC, the caller should create the normal disk layout dir.
  1295 + */
  1296 +int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent,
  1297 + struct inode *inode)
  1298 +{
  1299 + int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE;
  1300 + struct ext4_iloc iloc;
  1301 + struct ext4_dir_entry_2 *de;
  1302 +
  1303 + ret = ext4_get_inode_loc(inode, &iloc);
  1304 + if (ret)
  1305 + return ret;
  1306 +
  1307 + ret = ext4_prepare_inline_data(handle, inode, inline_size);
  1308 + if (ret)
  1309 + goto out;
  1310 +
  1311 + /*
  1312 + * For inline dir, we only save the inode information for the ".."
  1313 + * and create a fake dentry to cover the left space.
  1314 + */
  1315 + de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
  1316 + de->inode = cpu_to_le32(parent->i_ino);
  1317 + de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE);
  1318 + de->inode = 0;
  1319 + de->rec_len = ext4_rec_len_to_disk(
  1320 + inline_size - EXT4_INLINE_DOTDOT_SIZE,
  1321 + inline_size);
  1322 + set_nlink(inode, 2);
  1323 + inode->i_size = EXT4_I(inode)->i_disksize = inline_size;
  1324 +out:
  1325 + brelse(iloc.bh);
  1326 + return ret;
950 1327 }
951 1328  
952 1329 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
... ... @@ -202,13 +202,8 @@
202 202 struct inode *inode);
203 203  
204 204 /* checksumming functions */
205   -#define EXT4_DIRENT_TAIL(block, blocksize) \
206   - ((struct ext4_dir_entry_tail *)(((void *)(block)) + \
207   - ((blocksize) - \
208   - sizeof(struct ext4_dir_entry_tail))))
209   -
210   -static void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
211   - unsigned int blocksize)
  205 +void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
  206 + unsigned int blocksize)
212 207 {
213 208 memset(t, 0, sizeof(struct ext4_dir_entry_tail));
214 209 t->det_rec_len = ext4_rec_len_to_disk(
... ... @@ -307,9 +302,9 @@
307 302 (void *)t - (void *)dirent);
308 303 }
309 304  
310   -static inline int ext4_handle_dirty_dirent_node(handle_t *handle,
311   - struct inode *inode,
312   - struct buffer_head *bh)
  305 +int ext4_handle_dirty_dirent_node(handle_t *handle,
  306 + struct inode *inode,
  307 + struct buffer_head *bh)
313 308 {
314 309 ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
315 310 return ext4_handle_dirty_metadata(handle, inode, bh);
... ... @@ -1878,6 +1873,17 @@
1878 1873 blocksize = sb->s_blocksize;
1879 1874 if (!dentry->d_name.len)
1880 1875 return -EINVAL;
  1876 +
  1877 + if (ext4_has_inline_data(dir)) {
  1878 + retval = ext4_try_add_inline_entry(handle, dentry, inode);
  1879 + if (retval < 0)
  1880 + return retval;
  1881 + if (retval == 1) {
  1882 + retval = 0;
  1883 + return retval;
  1884 + }
  1885 + }
  1886 +
1881 1887 if (is_dx(dir)) {
1882 1888 retval = ext4_dx_add_entry(handle, dentry, inode);
1883 1889 if (!retval || (retval != ERR_BAD_DX_DIR))
... ... @@ -2300,6 +2306,14 @@
2300 2306 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2301 2307 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2302 2308 csum_size = sizeof(struct ext4_dir_entry_tail);
  2309 +
  2310 + if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
  2311 + err = ext4_try_create_inline_dir(handle, dir, inode);
  2312 + if (err < 0 && err != -ENOSPC)
  2313 + goto out;
  2314 + if (!err)
  2315 + goto out;
  2316 + }
2303 2317  
2304 2318 inode->i_size = EXT4_I(inode)->i_disksize = blocksize;
2305 2319 dir_block = ext4_bread(handle, inode, 0, 1, &err);
... ... @@ -163,6 +163,11 @@
163 163 extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
164 164 unsigned len, unsigned copied,
165 165 struct page *page);
  166 +extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
  167 + struct inode *inode);
  168 +extern int ext4_try_create_inline_dir(handle_t *handle,
  169 + struct inode *parent,
  170 + struct inode *inode);
166 171 # else /* CONFIG_EXT4_FS_XATTR */
167 172  
168 173 static inline int
... ... @@ -324,6 +329,20 @@
324 329 static inline int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
325 330 unsigned len, unsigned copied,
326 331 struct page *page)
  332 +{
  333 + return 0;
  334 +}
  335 +
  336 +static inline int ext4_try_add_inline_entry(handle_t *handle,
  337 + struct dentry *dentry,
  338 + struct inode *inode)
  339 +{
  340 + return 0;
  341 +}
  342 +
  343 +static inline int ext4_try_create_inline_dir(handle_t *handle,
  344 + struct inode *parent,
  345 + struct inode *inode)
327 346 {
328 347 return 0;
329 348 }