Commit feaf222925cdfbc841a695fd30df8c6d0a694146

Authored by Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Ext4 bug fixes.

  We also reserved code points for encryption and read-only images (for
  which the implementation is mostly just the reserved code point for a
  read-only feature :-)"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix indirect punch hole corruption
  ext4: ignore journal checksum on remount; don't fail
  ext4: remove duplicate remount check for JOURNAL_CHECKSUM change
  ext4: fix mmap data corruption in nodelalloc mode when blocksize < pagesize
  ext4: support read-only images
  ext4: change to use setup_timer() instead of init_timer()
  ext4: reserve codepoints used by the ext4 encryption feature
  jbd2: complain about descriptor block checksum errors

Showing 5 changed files Side-by-side Diff

... ... @@ -364,7 +364,8 @@
364 364 #define EXT4_DIRTY_FL 0x00000100
365 365 #define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
366 366 #define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */
367   -#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */
  367 + /* nb: was previously EXT2_ECOMPR_FL */
  368 +#define EXT4_ENCRYPT_FL 0x00000800 /* encrypted file */
368 369 /* End compression flags --- maybe not all used */
369 370 #define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */
370 371 #define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */
... ... @@ -421,7 +422,7 @@
421 422 EXT4_INODE_DIRTY = 8,
422 423 EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
423 424 EXT4_INODE_NOCOMPR = 10, /* Don't compress */
424   - EXT4_INODE_ECOMPR = 11, /* Compression error */
  425 + EXT4_INODE_ENCRYPT = 11, /* Compression error */
425 426 /* End compression flags --- maybe not all used */
426 427 EXT4_INODE_INDEX = 12, /* hash-indexed directory */
427 428 EXT4_INODE_IMAGIC = 13, /* AFS directory */
... ... @@ -466,7 +467,7 @@
466 467 CHECK_FLAG_VALUE(DIRTY);
467 468 CHECK_FLAG_VALUE(COMPRBLK);
468 469 CHECK_FLAG_VALUE(NOCOMPR);
469   - CHECK_FLAG_VALUE(ECOMPR);
  470 + CHECK_FLAG_VALUE(ENCRYPT);
470 471 CHECK_FLAG_VALUE(INDEX);
471 472 CHECK_FLAG_VALUE(IMAGIC);
472 473 CHECK_FLAG_VALUE(JOURNAL_DATA);
... ... @@ -1048,6 +1049,12 @@
1048 1049 /* Metadata checksum algorithm codes */
1049 1050 #define EXT4_CRC32C_CHKSUM 1
1050 1051  
  1052 +/* Encryption algorithms */
  1053 +#define EXT4_ENCRYPTION_MODE_INVALID 0
  1054 +#define EXT4_ENCRYPTION_MODE_AES_256_XTS 1
  1055 +#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
  1056 +#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
  1057 +
1051 1058 /*
1052 1059 * Structure of the super block
1053 1060 */
... ... @@ -1161,7 +1168,8 @@
1161 1168 __le32 s_grp_quota_inum; /* inode for tracking group quota */
1162 1169 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
1163 1170 __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */
1164   - __le32 s_reserved[106]; /* Padding to the end of the block */
  1171 + __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */
  1172 + __le32 s_reserved[105]; /* Padding to the end of the block */
1165 1173 __le32 s_checksum; /* crc32c(superblock) */
1166 1174 };
1167 1175  
... ... @@ -1527,6 +1535,7 @@
1527 1535 * GDT_CSUM bits are mutually exclusive.
1528 1536 */
1529 1537 #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
  1538 +#define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000
1530 1539  
1531 1540 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1532 1541 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
... ... @@ -1542,6 +1551,7 @@
1542 1551 #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
1543 1552 #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
1544 1553 #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
  1554 +#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
1545 1555  
1546 1556 #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1547 1557 #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
... ... @@ -1401,10 +1401,7 @@
1401 1401 * to free. Everything was covered by the start
1402 1402 * of the range.
1403 1403 */
1404   - return 0;
1405   - } else {
1406   - /* Shared branch grows from an indirect block */
1407   - partial2--;
  1404 + goto do_indirects;
1408 1405 }
1409 1406 } else {
1410 1407 /*
1411 1408  
1412 1409  
1413 1410  
1414 1411  
1415 1412  
1416 1413  
1417 1414  
1418 1415  
1419 1416  
1420 1417  
1421 1418  
1422 1419  
1423 1420  
... ... @@ -1435,56 +1432,96 @@
1435 1432 /* Punch happened within the same level (n == n2) */
1436 1433 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1437 1434 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1438   - /*
1439   - * ext4_find_shared returns Indirect structure which
1440   - * points to the last element which should not be
1441   - * removed by truncate. But this is end of the range
1442   - * in punch_hole so we need to point to the next element
1443   - */
1444   - partial2->p++;
1445   - while ((partial > chain) || (partial2 > chain2)) {
1446   - /* We're at the same block, so we're almost finished */
1447   - if ((partial->bh && partial2->bh) &&
1448   - (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
1449   - if ((partial > chain) && (partial2 > chain2)) {
  1435 +
  1436 + /* Free top, but only if partial2 isn't its subtree. */
  1437 + if (nr) {
  1438 + int level = min(partial - chain, partial2 - chain2);
  1439 + int i;
  1440 + int subtree = 1;
  1441 +
  1442 + for (i = 0; i <= level; i++) {
  1443 + if (offsets[i] != offsets2[i]) {
  1444 + subtree = 0;
  1445 + break;
  1446 + }
  1447 + }
  1448 +
  1449 + if (!subtree) {
  1450 + if (partial == chain) {
  1451 + /* Shared branch grows from the inode */
  1452 + ext4_free_branches(handle, inode, NULL,
  1453 + &nr, &nr+1,
  1454 + (chain+n-1) - partial);
  1455 + *partial->p = 0;
  1456 + } else {
  1457 + /* Shared branch grows from an indirect block */
  1458 + BUFFER_TRACE(partial->bh, "get_write_access");
1450 1459 ext4_free_branches(handle, inode, partial->bh,
1451   - partial->p + 1,
1452   - partial2->p,
  1460 + partial->p,
  1461 + partial->p+1,
1453 1462 (chain+n-1) - partial);
1454   - BUFFER_TRACE(partial->bh, "call brelse");
1455   - brelse(partial->bh);
1456   - BUFFER_TRACE(partial2->bh, "call brelse");
1457   - brelse(partial2->bh);
1458 1463 }
1459   - return 0;
1460 1464 }
  1465 + }
  1466 +
  1467 + if (!nr2) {
1461 1468 /*
1462   - * Clear the ends of indirect blocks on the shared branch
1463   - * at the start of the range
  1469 + * ext4_find_shared returns Indirect structure which
  1470 + * points to the last element which should not be
  1471 + * removed by truncate. But this is end of the range
  1472 + * in punch_hole so we need to point to the next element
1464 1473 */
1465   - if (partial > chain) {
  1474 + partial2->p++;
  1475 + }
  1476 +
  1477 + while (partial > chain || partial2 > chain2) {
  1478 + int depth = (chain+n-1) - partial;
  1479 + int depth2 = (chain2+n2-1) - partial2;
  1480 +
  1481 + if (partial > chain && partial2 > chain2 &&
  1482 + partial->bh->b_blocknr == partial2->bh->b_blocknr) {
  1483 + /*
  1484 + * We've converged on the same block. Clear the range,
  1485 + * then we're done.
  1486 + */
1466 1487 ext4_free_branches(handle, inode, partial->bh,
1467   - partial->p + 1,
1468   - (__le32 *)partial->bh->b_data+addr_per_block,
1469   - (chain+n-1) - partial);
  1488 + partial->p + 1,
  1489 + partial2->p,
  1490 + (chain+n-1) - partial);
1470 1491 BUFFER_TRACE(partial->bh, "call brelse");
1471 1492 brelse(partial->bh);
1472   - partial--;
  1493 + BUFFER_TRACE(partial2->bh, "call brelse");
  1494 + brelse(partial2->bh);
  1495 + return 0;
1473 1496 }
  1497 +
1474 1498 /*
1475   - * Clear the ends of indirect blocks on the shared branch
1476   - * at the end of the range
  1499 + * The start and end partial branches may not be at the same
  1500 + * level even though the punch happened within one level. So, we
  1501 + * give them a chance to arrive at the same level, then walk
  1502 + * them in step with each other until we converge on the same
  1503 + * block.
1477 1504 */
1478   - if (partial2 > chain2) {
  1505 + if (partial > chain && depth <= depth2) {
  1506 + ext4_free_branches(handle, inode, partial->bh,
  1507 + partial->p + 1,
  1508 + (__le32 *)partial->bh->b_data+addr_per_block,
  1509 + (chain+n-1) - partial);
  1510 + BUFFER_TRACE(partial->bh, "call brelse");
  1511 + brelse(partial->bh);
  1512 + partial--;
  1513 + }
  1514 + if (partial2 > chain2 && depth2 <= depth) {
1479 1515 ext4_free_branches(handle, inode, partial2->bh,
1480 1516 (__le32 *)partial2->bh->b_data,
1481 1517 partial2->p,
1482   - (chain2+n-1) - partial2);
  1518 + (chain2+n2-1) - partial2);
1483 1519 BUFFER_TRACE(partial2->bh, "call brelse");
1484 1520 brelse(partial2->bh);
1485 1521 partial2--;
1486 1522 }
1487 1523 }
  1524 + return 0;
1488 1525  
1489 1526 do_indirects:
1490 1527 /* Kill the remaining (whole) subtrees */
... ... @@ -1024,6 +1024,7 @@
1024 1024 {
1025 1025 handle_t *handle = ext4_journal_current_handle();
1026 1026 struct inode *inode = mapping->host;
  1027 + loff_t old_size = inode->i_size;
1027 1028 int ret = 0, ret2;
1028 1029 int i_size_changed = 0;
1029 1030  
... ... @@ -1054,6 +1055,8 @@
1054 1055 unlock_page(page);
1055 1056 page_cache_release(page);
1056 1057  
  1058 + if (old_size < pos)
  1059 + pagecache_isize_extended(inode, old_size, pos);
1057 1060 /*
1058 1061 * Don't mark the inode dirty under page lock. First, it unnecessarily
1059 1062 * makes the holding time of page lock longer. Second, it forces lock
... ... @@ -1095,6 +1098,7 @@
1095 1098 {
1096 1099 handle_t *handle = ext4_journal_current_handle();
1097 1100 struct inode *inode = mapping->host;
  1101 + loff_t old_size = inode->i_size;
1098 1102 int ret = 0, ret2;
1099 1103 int partial = 0;
1100 1104 unsigned from, to;
... ... @@ -1126,6 +1130,9 @@
1126 1130 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1127 1131 unlock_page(page);
1128 1132 page_cache_release(page);
  1133 +
  1134 + if (old_size < pos)
  1135 + pagecache_isize_extended(inode, old_size, pos);
1129 1136  
1130 1137 if (size_changed) {
1131 1138 ret2 = ext4_mark_inode_dirty(handle, inode);
... ... @@ -2779,6 +2779,12 @@
2779 2779 if (readonly)
2780 2780 return 1;
2781 2781  
  2782 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) {
  2783 + ext4_msg(sb, KERN_INFO, "filesystem is read-only");
  2784 + sb->s_flags |= MS_RDONLY;
  2785 + return 1;
  2786 + }
  2787 +
2782 2788 /* Check that feature set is OK for a read-write mount */
2783 2789 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
2784 2790 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
... ... @@ -3936,9 +3942,8 @@
3936 3942 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3937 3943 spin_lock_init(&sbi->s_next_gen_lock);
3938 3944  
3939   - init_timer(&sbi->s_err_report);
3940   - sbi->s_err_report.function = print_daily_error_info;
3941   - sbi->s_err_report.data = (unsigned long) sb;
  3945 + setup_timer(&sbi->s_err_report, print_daily_error_info,
  3946 + (unsigned long) sb);
3942 3947  
3943 3948 /* Register extent status tree shrinker */
3944 3949 if (ext4_es_register_shrinker(sbi))
... ... @@ -4866,9 +4871,6 @@
4866 4871 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4867 4872 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
4868 4873  
4869   - /*
4870   - * Allow the "check" option to be passed as a remount option.
4871   - */
4872 4874 if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
4873 4875 err = -EINVAL;
4874 4876 goto restore_opts;
4875 4877  
... ... @@ -4877,19 +4879,10 @@
4877 4879 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
4878 4880 test_opt(sb, JOURNAL_CHECKSUM)) {
4879 4881 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
4880   - "during remount not supported");
4881   - err = -EINVAL;
4882   - goto restore_opts;
  4882 + "during remount not supported; ignoring");
  4883 + sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
4883 4884 }
4884 4885  
4885   - if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
4886   - test_opt(sb, JOURNAL_CHECKSUM)) {
4887   - ext4_msg(sb, KERN_ERR, "changing journal_checksum "
4888   - "during remount not supported");
4889   - err = -EINVAL;
4890   - goto restore_opts;
4891   - }
4892   -
4893 4886 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4894 4887 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4895 4888 ext4_msg(sb, KERN_ERR, "can't mount with "
... ... @@ -4963,7 +4956,9 @@
4963 4956 ext4_mark_recovery_complete(sb, es);
4964 4957 } else {
4965 4958 /* Make sure we can mount this feature set readwrite */
4966   - if (!ext4_feature_set_ok(sb, 0)) {
  4959 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
  4960 + EXT4_FEATURE_RO_COMPAT_READONLY) ||
  4961 + !ext4_feature_set_ok(sb, 0)) {
4967 4962 err = -EROFS;
4968 4963 goto restore_opts;
4969 4964 }
... ... @@ -524,6 +524,9 @@
524 524 if (descr_csum_size > 0 &&
525 525 !jbd2_descr_block_csum_verify(journal,
526 526 bh->b_data)) {
  527 + printk(KERN_ERR "JBD2: Invalid checksum "
  528 + "recovering block %lu in log\n",
  529 + next_log_block);
527 530 err = -EIO;
528 531 brelse(bh);
529 532 goto failed;