Commit feaf222925cdfbc841a695fd30df8c6d0a694146
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 fixes from Ted Ts'o: "Ext4 bug fixes. We also reserved code points for encryption and read-only images (for which the implementation is mostly just the reserved code point for a read-only feature :-)" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: fix indirect punch hole corruption ext4: ignore journal checksum on remount; don't fail ext4: remove duplicate remount check for JOURNAL_CHECKSUM change ext4: fix mmap data corruption in nodelalloc mode when blocksize < pagesize ext4: support read-only images ext4: change to use setup_timer() instead of init_timer() ext4: reserve codepoints used by the ext4 encryption feature jbd2: complain about descriptor block checksum errors
Showing 5 changed files Side-by-side Diff
fs/ext4/ext4.h
... | ... | @@ -364,7 +364,8 @@ |
364 | 364 | #define EXT4_DIRTY_FL 0x00000100 |
365 | 365 | #define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ |
366 | 366 | #define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ |
367 | -#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */ | |
367 | + /* nb: was previously EXT2_ECOMPR_FL */ | |
368 | +#define EXT4_ENCRYPT_FL 0x00000800 /* encrypted file */ | |
368 | 369 | /* End compression flags --- maybe not all used */ |
369 | 370 | #define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ |
370 | 371 | #define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ |
... | ... | @@ -421,7 +422,7 @@ |
421 | 422 | EXT4_INODE_DIRTY = 8, |
422 | 423 | EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ |
423 | 424 | EXT4_INODE_NOCOMPR = 10, /* Don't compress */ |
424 | - EXT4_INODE_ECOMPR = 11, /* Compression error */ | |
425 | + EXT4_INODE_ENCRYPT = 11, /* Compression error */ | |
425 | 426 | /* End compression flags --- maybe not all used */ |
426 | 427 | EXT4_INODE_INDEX = 12, /* hash-indexed directory */ |
427 | 428 | EXT4_INODE_IMAGIC = 13, /* AFS directory */ |
... | ... | @@ -466,7 +467,7 @@ |
466 | 467 | CHECK_FLAG_VALUE(DIRTY); |
467 | 468 | CHECK_FLAG_VALUE(COMPRBLK); |
468 | 469 | CHECK_FLAG_VALUE(NOCOMPR); |
469 | - CHECK_FLAG_VALUE(ECOMPR); | |
470 | + CHECK_FLAG_VALUE(ENCRYPT); | |
470 | 471 | CHECK_FLAG_VALUE(INDEX); |
471 | 472 | CHECK_FLAG_VALUE(IMAGIC); |
472 | 473 | CHECK_FLAG_VALUE(JOURNAL_DATA); |
... | ... | @@ -1048,6 +1049,12 @@ |
1048 | 1049 | /* Metadata checksum algorithm codes */ |
1049 | 1050 | #define EXT4_CRC32C_CHKSUM 1 |
1050 | 1051 | |
1052 | +/* Encryption algorithms */ | |
1053 | +#define EXT4_ENCRYPTION_MODE_INVALID 0 | |
1054 | +#define EXT4_ENCRYPTION_MODE_AES_256_XTS 1 | |
1055 | +#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2 | |
1056 | +#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 | |
1057 | + | |
1051 | 1058 | /* |
1052 | 1059 | * Structure of the super block |
1053 | 1060 | */ |
... | ... | @@ -1161,7 +1168,8 @@ |
1161 | 1168 | __le32 s_grp_quota_inum; /* inode for tracking group quota */ |
1162 | 1169 | __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ |
1163 | 1170 | __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ |
1164 | - __le32 s_reserved[106]; /* Padding to the end of the block */ | |
1171 | + __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */ | |
1172 | + __le32 s_reserved[105]; /* Padding to the end of the block */ | |
1165 | 1173 | __le32 s_checksum; /* crc32c(superblock) */ |
1166 | 1174 | }; |
1167 | 1175 | |
... | ... | @@ -1527,6 +1535,7 @@ |
1527 | 1535 | * GDT_CSUM bits are mutually exclusive. |
1528 | 1536 | */ |
1529 | 1537 | #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 |
1538 | +#define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000 | |
1530 | 1539 | |
1531 | 1540 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 |
1532 | 1541 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 |
... | ... | @@ -1542,6 +1551,7 @@ |
1542 | 1551 | #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ |
1543 | 1552 | #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ |
1544 | 1553 | #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ |
1554 | +#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000 | |
1545 | 1555 | |
1546 | 1556 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR |
1547 | 1557 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
fs/ext4/indirect.c
... | ... | @@ -1401,10 +1401,7 @@ |
1401 | 1401 | * to free. Everything was covered by the start |
1402 | 1402 | * of the range. |
1403 | 1403 | */ |
1404 | - return 0; | |
1405 | - } else { | |
1406 | - /* Shared branch grows from an indirect block */ | |
1407 | - partial2--; | |
1404 | + goto do_indirects; | |
1408 | 1405 | } |
1409 | 1406 | } else { |
1410 | 1407 | /* |
1411 | 1408 | |
1412 | 1409 | |
1413 | 1410 | |
1414 | 1411 | |
1415 | 1412 | |
1416 | 1413 | |
1417 | 1414 | |
1418 | 1415 | |
1419 | 1416 | |
1420 | 1417 | |
1421 | 1418 | |
1422 | 1419 | |
1423 | 1420 | |
... | ... | @@ -1435,56 +1432,96 @@ |
1435 | 1432 | /* Punch happened within the same level (n == n2) */ |
1436 | 1433 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); |
1437 | 1434 | partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); |
1438 | - /* | |
1439 | - * ext4_find_shared returns Indirect structure which | |
1440 | - * points to the last element which should not be | |
1441 | - * removed by truncate. But this is end of the range | |
1442 | - * in punch_hole so we need to point to the next element | |
1443 | - */ | |
1444 | - partial2->p++; | |
1445 | - while ((partial > chain) || (partial2 > chain2)) { | |
1446 | - /* We're at the same block, so we're almost finished */ | |
1447 | - if ((partial->bh && partial2->bh) && | |
1448 | - (partial->bh->b_blocknr == partial2->bh->b_blocknr)) { | |
1449 | - if ((partial > chain) && (partial2 > chain2)) { | |
1435 | + | |
1436 | + /* Free top, but only if partial2 isn't its subtree. */ | |
1437 | + if (nr) { | |
1438 | + int level = min(partial - chain, partial2 - chain2); | |
1439 | + int i; | |
1440 | + int subtree = 1; | |
1441 | + | |
1442 | + for (i = 0; i <= level; i++) { | |
1443 | + if (offsets[i] != offsets2[i]) { | |
1444 | + subtree = 0; | |
1445 | + break; | |
1446 | + } | |
1447 | + } | |
1448 | + | |
1449 | + if (!subtree) { | |
1450 | + if (partial == chain) { | |
1451 | + /* Shared branch grows from the inode */ | |
1452 | + ext4_free_branches(handle, inode, NULL, | |
1453 | + &nr, &nr+1, | |
1454 | + (chain+n-1) - partial); | |
1455 | + *partial->p = 0; | |
1456 | + } else { | |
1457 | + /* Shared branch grows from an indirect block */ | |
1458 | + BUFFER_TRACE(partial->bh, "get_write_access"); | |
1450 | 1459 | ext4_free_branches(handle, inode, partial->bh, |
1451 | - partial->p + 1, | |
1452 | - partial2->p, | |
1460 | + partial->p, | |
1461 | + partial->p+1, | |
1453 | 1462 | (chain+n-1) - partial); |
1454 | - BUFFER_TRACE(partial->bh, "call brelse"); | |
1455 | - brelse(partial->bh); | |
1456 | - BUFFER_TRACE(partial2->bh, "call brelse"); | |
1457 | - brelse(partial2->bh); | |
1458 | 1463 | } |
1459 | - return 0; | |
1460 | 1464 | } |
1465 | + } | |
1466 | + | |
1467 | + if (!nr2) { | |
1461 | 1468 | /* |
1462 | - * Clear the ends of indirect blocks on the shared branch | |
1463 | - * at the start of the range | |
1469 | + * ext4_find_shared returns Indirect structure which | |
1470 | + * points to the last element which should not be | |
1471 | + * removed by truncate. But this is end of the range | |
1472 | + * in punch_hole so we need to point to the next element | |
1464 | 1473 | */ |
1465 | - if (partial > chain) { | |
1474 | + partial2->p++; | |
1475 | + } | |
1476 | + | |
1477 | + while (partial > chain || partial2 > chain2) { | |
1478 | + int depth = (chain+n-1) - partial; | |
1479 | + int depth2 = (chain2+n2-1) - partial2; | |
1480 | + | |
1481 | + if (partial > chain && partial2 > chain2 && | |
1482 | + partial->bh->b_blocknr == partial2->bh->b_blocknr) { | |
1483 | + /* | |
1484 | + * We've converged on the same block. Clear the range, | |
1485 | + * then we're done. | |
1486 | + */ | |
1466 | 1487 | ext4_free_branches(handle, inode, partial->bh, |
1467 | - partial->p + 1, | |
1468 | - (__le32 *)partial->bh->b_data+addr_per_block, | |
1469 | - (chain+n-1) - partial); | |
1488 | + partial->p + 1, | |
1489 | + partial2->p, | |
1490 | + (chain+n-1) - partial); | |
1470 | 1491 | BUFFER_TRACE(partial->bh, "call brelse"); |
1471 | 1492 | brelse(partial->bh); |
1472 | - partial--; | |
1493 | + BUFFER_TRACE(partial2->bh, "call brelse"); | |
1494 | + brelse(partial2->bh); | |
1495 | + return 0; | |
1473 | 1496 | } |
1497 | + | |
1474 | 1498 | /* |
1475 | - * Clear the ends of indirect blocks on the shared branch | |
1476 | - * at the end of the range | |
1499 | + * The start and end partial branches may not be at the same | |
1500 | + * level even though the punch happened within one level. So, we | |
1501 | + * give them a chance to arrive at the same level, then walk | |
1502 | + * them in step with each other until we converge on the same | |
1503 | + * block. | |
1477 | 1504 | */ |
1478 | - if (partial2 > chain2) { | |
1505 | + if (partial > chain && depth <= depth2) { | |
1506 | + ext4_free_branches(handle, inode, partial->bh, | |
1507 | + partial->p + 1, | |
1508 | + (__le32 *)partial->bh->b_data+addr_per_block, | |
1509 | + (chain+n-1) - partial); | |
1510 | + BUFFER_TRACE(partial->bh, "call brelse"); | |
1511 | + brelse(partial->bh); | |
1512 | + partial--; | |
1513 | + } | |
1514 | + if (partial2 > chain2 && depth2 <= depth) { | |
1479 | 1515 | ext4_free_branches(handle, inode, partial2->bh, |
1480 | 1516 | (__le32 *)partial2->bh->b_data, |
1481 | 1517 | partial2->p, |
1482 | - (chain2+n-1) - partial2); | |
1518 | + (chain2+n2-1) - partial2); | |
1483 | 1519 | BUFFER_TRACE(partial2->bh, "call brelse"); |
1484 | 1520 | brelse(partial2->bh); |
1485 | 1521 | partial2--; |
1486 | 1522 | } |
1487 | 1523 | } |
1524 | + return 0; | |
1488 | 1525 | |
1489 | 1526 | do_indirects: |
1490 | 1527 | /* Kill the remaining (whole) subtrees */ |
fs/ext4/inode.c
... | ... | @@ -1024,6 +1024,7 @@ |
1024 | 1024 | { |
1025 | 1025 | handle_t *handle = ext4_journal_current_handle(); |
1026 | 1026 | struct inode *inode = mapping->host; |
1027 | + loff_t old_size = inode->i_size; | |
1027 | 1028 | int ret = 0, ret2; |
1028 | 1029 | int i_size_changed = 0; |
1029 | 1030 | |
... | ... | @@ -1054,6 +1055,8 @@ |
1054 | 1055 | unlock_page(page); |
1055 | 1056 | page_cache_release(page); |
1056 | 1057 | |
1058 | + if (old_size < pos) | |
1059 | + pagecache_isize_extended(inode, old_size, pos); | |
1057 | 1060 | /* |
1058 | 1061 | * Don't mark the inode dirty under page lock. First, it unnecessarily |
1059 | 1062 | * makes the holding time of page lock longer. Second, it forces lock |
... | ... | @@ -1095,6 +1098,7 @@ |
1095 | 1098 | { |
1096 | 1099 | handle_t *handle = ext4_journal_current_handle(); |
1097 | 1100 | struct inode *inode = mapping->host; |
1101 | + loff_t old_size = inode->i_size; | |
1098 | 1102 | int ret = 0, ret2; |
1099 | 1103 | int partial = 0; |
1100 | 1104 | unsigned from, to; |
... | ... | @@ -1126,6 +1130,9 @@ |
1126 | 1130 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; |
1127 | 1131 | unlock_page(page); |
1128 | 1132 | page_cache_release(page); |
1133 | + | |
1134 | + if (old_size < pos) | |
1135 | + pagecache_isize_extended(inode, old_size, pos); | |
1129 | 1136 | |
1130 | 1137 | if (size_changed) { |
1131 | 1138 | ret2 = ext4_mark_inode_dirty(handle, inode); |
fs/ext4/super.c
... | ... | @@ -2779,6 +2779,12 @@ |
2779 | 2779 | if (readonly) |
2780 | 2780 | return 1; |
2781 | 2781 | |
2782 | + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) { | |
2783 | + ext4_msg(sb, KERN_INFO, "filesystem is read-only"); | |
2784 | + sb->s_flags |= MS_RDONLY; | |
2785 | + return 1; | |
2786 | + } | |
2787 | + | |
2782 | 2788 | /* Check that feature set is OK for a read-write mount */ |
2783 | 2789 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { |
2784 | 2790 | ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " |
... | ... | @@ -3936,9 +3942,8 @@ |
3936 | 3942 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
3937 | 3943 | spin_lock_init(&sbi->s_next_gen_lock); |
3938 | 3944 | |
3939 | - init_timer(&sbi->s_err_report); | |
3940 | - sbi->s_err_report.function = print_daily_error_info; | |
3941 | - sbi->s_err_report.data = (unsigned long) sb; | |
3945 | + setup_timer(&sbi->s_err_report, print_daily_error_info, | |
3946 | + (unsigned long) sb); | |
3942 | 3947 | |
3943 | 3948 | /* Register extent status tree shrinker */ |
3944 | 3949 | if (ext4_es_register_shrinker(sbi)) |
... | ... | @@ -4866,9 +4871,6 @@ |
4866 | 4871 | if (sbi->s_journal && sbi->s_journal->j_task->io_context) |
4867 | 4872 | journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; |
4868 | 4873 | |
4869 | - /* | |
4870 | - * Allow the "check" option to be passed as a remount option. | |
4871 | - */ | |
4872 | 4874 | if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { |
4873 | 4875 | err = -EINVAL; |
4874 | 4876 | goto restore_opts; |
4875 | 4877 | |
... | ... | @@ -4877,19 +4879,10 @@ |
4877 | 4879 | if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ |
4878 | 4880 | test_opt(sb, JOURNAL_CHECKSUM)) { |
4879 | 4881 | ext4_msg(sb, KERN_ERR, "changing journal_checksum " |
4880 | - "during remount not supported"); | |
4881 | - err = -EINVAL; | |
4882 | - goto restore_opts; | |
4882 | + "during remount not supported; ignoring"); | |
4883 | + sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM; | |
4883 | 4884 | } |
4884 | 4885 | |
4885 | - if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ | |
4886 | - test_opt(sb, JOURNAL_CHECKSUM)) { | |
4887 | - ext4_msg(sb, KERN_ERR, "changing journal_checksum " | |
4888 | - "during remount not supported"); | |
4889 | - err = -EINVAL; | |
4890 | - goto restore_opts; | |
4891 | - } | |
4892 | - | |
4893 | 4886 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
4894 | 4887 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { |
4895 | 4888 | ext4_msg(sb, KERN_ERR, "can't mount with " |
... | ... | @@ -4963,7 +4956,9 @@ |
4963 | 4956 | ext4_mark_recovery_complete(sb, es); |
4964 | 4957 | } else { |
4965 | 4958 | /* Make sure we can mount this feature set readwrite */ |
4966 | - if (!ext4_feature_set_ok(sb, 0)) { | |
4959 | + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | |
4960 | + EXT4_FEATURE_RO_COMPAT_READONLY) || | |
4961 | + !ext4_feature_set_ok(sb, 0)) { | |
4967 | 4962 | err = -EROFS; |
4968 | 4963 | goto restore_opts; |
4969 | 4964 | } |
fs/jbd2/recovery.c
... | ... | @@ -524,6 +524,9 @@ |
524 | 524 | if (descr_csum_size > 0 && |
525 | 525 | !jbd2_descr_block_csum_verify(journal, |
526 | 526 | bh->b_data)) { |
527 | + printk(KERN_ERR "JBD2: Invalid checksum " | |
528 | + "recovering block %lu in log\n", | |
529 | + next_log_block); | |
527 | 530 | err = -EIO; |
528 | 531 | brelse(bh); |
529 | 532 | goto failed; |