Commit ef7f38359ea8b3e9c7f2cae9a4d4935f55ca9e80

Authored by Kalpak Shah
Committed by Theodore Ts'o
1 parent 0f49d5d019

ext4: Add nanosecond timestamps

This patch adds nanosecond timestamps for ext4. This involves adding
*time_extra fields to the ext4_inode to extend the timestamps to
64-bits.  Creation time is also added by this patch.

These extended fields will fit into an inode if the filesystem was
formatted with large inodes (-I 256 or larger) and there are currently
no EAs consuming all of the available space. For new inodes we always
reserve enough space for the kernel's known extended fields, but for
inodes created with an old kernel this might not have been the case. So
this patch also adds the EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature
flag(ro-compat so that older kernels can't create inodes with a smaller
extra_isize). which indicates if the fields fitting inside
s_min_extra_isize are available or not.  If the expansion of inodes if
unsuccessful then this feature will be disabled.  This feature is only
enabled if requested by the sysadmin.

None of the extended inode fields is critical for correct filesystem
operation.

Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Kalpak Shah <kalpak@clusterfs.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

Showing 9 changed files with 147 additions and 25 deletions Side-by-side Diff

... ... @@ -563,7 +563,8 @@
563 563 inode->i_ino = ino;
564 564 /* This is the optimal IO size (for stat), not the fs block size */
565 565 inode->i_blocks = 0;
566   - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
  566 + inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
  567 + ext4_current_time(inode);
567 568  
568 569 memset(ei->i_data, 0, sizeof(ei->i_data));
569 570 ei->i_dir_start_lookup = 0;
... ... @@ -595,9 +596,8 @@
595 596 spin_unlock(&sbi->s_next_gen_lock);
596 597  
597 598 ei->i_state = EXT4_STATE_NEW;
598   - ei->i_extra_isize =
599   - (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ?
600   - sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0;
  599 +
  600 + ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
601 601  
602 602 ret = inode;
603 603 if(DQUOT_ALLOC_INODE(inode)) {
... ... @@ -726,7 +726,7 @@
726 726  
727 727 /* We are done with atomic stuff, now do the rest of housekeeping */
728 728  
729   - inode->i_ctime = CURRENT_TIME_SEC;
  729 + inode->i_ctime = ext4_current_time(inode);
730 730 ext4_mark_inode_dirty(handle, inode);
731 731  
732 732 /* had we spliced it onto indirect block? */
... ... @@ -2375,7 +2375,7 @@
2375 2375 ext4_discard_reservation(inode);
2376 2376  
2377 2377 mutex_unlock(&ei->truncate_mutex);
2378   - inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
  2378 + inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
2379 2379 ext4_mark_inode_dirty(handle, inode);
2380 2380  
2381 2381 /*
... ... @@ -2629,10 +2629,6 @@
2629 2629 }
2630 2630 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2631 2631 inode->i_size = le32_to_cpu(raw_inode->i_size);
2632   - inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2633   - inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
2634   - inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
2635   - inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
2636 2632  
2637 2633 ei->i_state = 0;
2638 2634 ei->i_dir_start_lookup = 0;
... ... @@ -2710,6 +2706,11 @@
2710 2706 } else
2711 2707 ei->i_extra_isize = 0;
2712 2708  
  2709 + EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
  2710 + EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
  2711 + EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
  2712 + EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
  2713 +
2713 2714 if (S_ISREG(inode->i_mode)) {
2714 2715 inode->i_op = &ext4_file_inode_operations;
2715 2716 inode->i_fop = &ext4_file_operations;
... ... @@ -2791,9 +2792,12 @@
2791 2792 }
2792 2793 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2793 2794 raw_inode->i_size = cpu_to_le32(ei->i_disksize);
2794   - raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
2795   - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
2796   - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
  2795 +
  2796 + EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
  2797 + EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
  2798 + EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
  2799 + EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
  2800 +
2797 2801 raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
2798 2802 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
2799 2803 raw_inode->i_flags = cpu_to_le32(ei->i_flags);
... ... @@ -97,7 +97,7 @@
97 97 ei->i_flags = flags;
98 98  
99 99 ext4_set_inode_flags(inode);
100   - inode->i_ctime = CURRENT_TIME_SEC;
  100 + inode->i_ctime = ext4_current_time(inode);
101 101  
102 102 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
103 103 flags_err:
... ... @@ -134,7 +134,7 @@
134 134 return PTR_ERR(handle);
135 135 err = ext4_reserve_inode_write(handle, inode, &iloc);
136 136 if (err == 0) {
137   - inode->i_ctime = CURRENT_TIME_SEC;
  137 + inode->i_ctime = ext4_current_time(inode);
138 138 inode->i_generation = generation;
139 139 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
140 140 }
... ... @@ -1295,7 +1295,7 @@
1295 1295 * happen is that the times are slightly out of date
1296 1296 * and/or different from the directory change time.
1297 1297 */
1298   - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
  1298 + dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
1299 1299 ext4_update_dx_flag(dir);
1300 1300 dir->i_version++;
1301 1301 ext4_mark_inode_dirty(handle, dir);
... ... @@ -2056,7 +2056,7 @@
2056 2056 * recovery. */
2057 2057 inode->i_size = 0;
2058 2058 ext4_orphan_add(handle, inode);
2059   - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
  2059 + inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
2060 2060 ext4_mark_inode_dirty(handle, inode);
2061 2061 drop_nlink(dir);
2062 2062 ext4_update_dx_flag(dir);
2063 2063  
... ... @@ -2106,13 +2106,13 @@
2106 2106 retval = ext4_delete_entry(handle, dir, de, bh);
2107 2107 if (retval)
2108 2108 goto end_unlink;
2109   - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
  2109 + dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
2110 2110 ext4_update_dx_flag(dir);
2111 2111 ext4_mark_inode_dirty(handle, dir);
2112 2112 drop_nlink(inode);
2113 2113 if (!inode->i_nlink)
2114 2114 ext4_orphan_add(handle, inode);
2115   - inode->i_ctime = dir->i_ctime;
  2115 + inode->i_ctime = ext4_current_time(inode);
2116 2116 ext4_mark_inode_dirty(handle, inode);
2117 2117 retval = 0;
2118 2118  
... ... @@ -2203,7 +2203,7 @@
2203 2203 if (IS_DIRSYNC(dir))
2204 2204 handle->h_sync = 1;
2205 2205  
2206   - inode->i_ctime = CURRENT_TIME_SEC;
  2206 + inode->i_ctime = ext4_current_time(inode);
2207 2207 inc_nlink(inode);
2208 2208 atomic_inc(&inode->i_count);
2209 2209  
... ... @@ -2305,7 +2305,7 @@
2305 2305 * Like most other Unix systems, set the ctime for inodes on a
2306 2306 * rename.
2307 2307 */
2308   - old_inode->i_ctime = CURRENT_TIME_SEC;
  2308 + old_inode->i_ctime = ext4_current_time(old_inode);
2309 2309 ext4_mark_inode_dirty(handle, old_inode);
2310 2310  
2311 2311 /*
2312 2312  
... ... @@ -2338,9 +2338,9 @@
2338 2338  
2339 2339 if (new_inode) {
2340 2340 drop_nlink(new_inode);
2341   - new_inode->i_ctime = CURRENT_TIME_SEC;
  2341 + new_inode->i_ctime = ext4_current_time(new_inode);
2342 2342 }
2343   - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
  2343 + old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
2344 2344 ext4_update_dx_flag(old_dir);
2345 2345 if (dir_bh) {
2346 2346 BUFFER_TRACE(dir_bh, "get_write_access");
... ... @@ -1651,6 +1651,8 @@
1651 1651 sbi->s_inode_size);
1652 1652 goto failed_mount;
1653 1653 }
  1654 + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
  1655 + sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
1654 1656 }
1655 1657 sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
1656 1658 le32_to_cpu(es->s_log_frag_size);
... ... @@ -1874,6 +1876,32 @@
1874 1876 }
1875 1877  
1876 1878 ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
  1879 +
  1880 + /* determine the minimum size of new large inodes, if present */
  1881 + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
  1882 + sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
  1883 + EXT4_GOOD_OLD_INODE_SIZE;
  1884 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
  1885 + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
  1886 + if (sbi->s_want_extra_isize <
  1887 + le16_to_cpu(es->s_want_extra_isize))
  1888 + sbi->s_want_extra_isize =
  1889 + le16_to_cpu(es->s_want_extra_isize);
  1890 + if (sbi->s_want_extra_isize <
  1891 + le16_to_cpu(es->s_min_extra_isize))
  1892 + sbi->s_want_extra_isize =
  1893 + le16_to_cpu(es->s_min_extra_isize);
  1894 + }
  1895 + }
  1896 + /* Check if enough inode space is available */
  1897 + if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
  1898 + sbi->s_inode_size) {
  1899 + sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
  1900 + EXT4_GOOD_OLD_INODE_SIZE;
  1901 + printk(KERN_INFO "EXT4-fs: required extra inode space not"
  1902 + "available.\n");
  1903 + }
  1904 +
1877 1905 /*
1878 1906 * akpm: core read_super() calls in here with the superblock locked.
1879 1907 * That deadlocks, because orphan cleanup needs to lock the superblock
... ... @@ -1013,7 +1013,7 @@
1013 1013 }
1014 1014 if (!error) {
1015 1015 ext4_xattr_update_super_block(handle, inode->i_sb);
1016   - inode->i_ctime = CURRENT_TIME_SEC;
  1016 + inode->i_ctime = ext4_current_time(inode);
1017 1017 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
1018 1018 /*
1019 1019 * The bh is consumed by ext4_mark_iloc_dirty, even with
include/linux/ext4_fs.h
... ... @@ -288,7 +288,7 @@
288 288 __le16 i_uid; /* Low 16 bits of Owner Uid */
289 289 __le32 i_size; /* Size in bytes */
290 290 __le32 i_atime; /* Access time */
291   - __le32 i_ctime; /* Creation time */
  291 + __le32 i_ctime; /* Inode Change time */
292 292 __le32 i_mtime; /* Modification time */
293 293 __le32 i_dtime; /* Deletion Time */
294 294 __le16 i_gid; /* Low 16 bits of Group Id */
295 295  
... ... @@ -337,10 +337,85 @@
337 337 } osd2; /* OS dependent 2 */
338 338 __le16 i_extra_isize;
339 339 __le16 i_pad1;
  340 + __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
  341 + __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
  342 + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
  343 + __le32 i_crtime; /* File Creation time */
  344 + __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
340 345 };
341 346  
342 347 #define i_size_high i_dir_acl
343 348  
  349 +#define EXT4_EPOCH_BITS 2
  350 +#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
  351 +#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
  352 +
  353 +/*
  354 + * Extended fields will fit into an inode if the filesystem was formatted
  355 + * with large inodes (-I 256 or larger) and there are not currently any EAs
  356 + * consuming all of the available space. For new inodes we always reserve
  357 + * enough space for the kernel's known extended fields, but for inodes
  358 + * created with an old kernel this might not have been the case. None of
  359 + * the extended inode fields is critical for correct filesystem operation.
  360 + * This macro checks if a certain field fits in the inode. Note that
  361 + * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
  362 + */
  363 +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
  364 + ((offsetof(typeof(*ext4_inode), field) + \
  365 + sizeof((ext4_inode)->field)) \
  366 + <= (EXT4_GOOD_OLD_INODE_SIZE + \
  367 + (einode)->i_extra_isize)) \
  368 +
  369 +static inline __le32 ext4_encode_extra_time(struct timespec *time)
  370 +{
  371 + return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
  372 + time->tv_sec >> 32 : 0) |
  373 + ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
  374 +}
  375 +
  376 +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
  377 +{
  378 + if (sizeof(time->tv_sec) > 4)
  379 + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
  380 + << 32;
  381 + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
  382 +}
  383 +
  384 +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
  385 +do { \
  386 + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
  387 + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
  388 + (raw_inode)->xtime ## _extra = \
  389 + ext4_encode_extra_time(&(inode)->xtime); \
  390 +} while (0)
  391 +
  392 +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
  393 +do { \
  394 + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
  395 + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
  396 + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
  397 + (raw_inode)->xtime ## _extra = \
  398 + ext4_encode_extra_time(&(einode)->xtime); \
  399 +} while (0)
  400 +
  401 +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
  402 +do { \
  403 + (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
  404 + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
  405 + ext4_decode_extra_time(&(inode)->xtime, \
  406 + raw_inode->xtime ## _extra); \
  407 +} while (0)
  408 +
  409 +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
  410 +do { \
  411 + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
  412 + (einode)->xtime.tv_sec = \
  413 + (signed)le32_to_cpu((raw_inode)->xtime); \
  414 + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
  415 + ext4_decode_extra_time(&(einode)->xtime, \
  416 + raw_inode->xtime ## _extra); \
  417 +} while (0)
  418 +
344 419 #if defined(__KERNEL__) || defined(__linux__)
345 420 #define i_reserved1 osd1.linux1.l_i_reserved1
346 421 #define i_frag osd2.linux2.l_i_frag
... ... @@ -539,6 +614,13 @@
539 614 return container_of(inode, struct ext4_inode_info, vfs_inode);
540 615 }
541 616  
  617 +static inline struct timespec ext4_current_time(struct inode *inode)
  618 +{
  619 + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
  620 + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
  621 +}
  622 +
  623 +
542 624 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
543 625 {
544 626 return ino == EXT4_ROOT_INO ||
... ... @@ -609,6 +691,7 @@
609 691 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
610 692 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
611 693 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
  694 +#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
612 695  
613 696 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
614 697 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
... ... @@ -626,6 +709,7 @@
626 709 EXT4_FEATURE_INCOMPAT_64BIT)
627 710 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
628 711 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
  712 + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
629 713 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
630 714  
631 715 /*
include/linux/ext4_fs_i.h
... ... @@ -153,6 +153,11 @@
153 153  
154 154 unsigned long i_ext_generation;
155 155 struct ext4_ext_cache i_cached_extent;
  156 + /*
  157 + * File creation time. Its function is same as that of
  158 + * struct timespec i_{a,c,m}time in the generic inode.
  159 + */
  160 + struct timespec i_crtime;
156 161 };
157 162  
158 163 #endif /* _LINUX_EXT4_FS_I */
include/linux/ext4_fs_sb.h
... ... @@ -81,6 +81,7 @@
81 81 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
82 82 int s_jquota_fmt; /* Format of quota to use */
83 83 #endif
  84 + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
84 85  
85 86 #ifdef EXTENTS_STATS
86 87 /* ext4 extents stats */