Commit 0e446be44806240c779666591bb9e8cb0e86a50d

Authored by Christoph Hellwig
Committed by Ben Myers
1 parent bc02e8693d

xfs: add CRC checks to the log

Implement CRCs for the log buffers.  We re-use a field in
struct xlog_rec_header that was used for a weak checksum of the
log buffer payload in debug builds before.

The new checksumming uses the crc32c checksum we will use elsewhere
in XFS, and also protects the record header and addition cycle data.

Due to this there are some interesting changes in xlog_sync, as we
need to do the cycle wrapping for the split buffer case much earlier,
as we would touch the buffer after generating the checksum otherwise.

The CRC calculation is always enabled, even for non-CRC filesystems,
as adding this CRC does not change the log format. On non-CRC
filesystems, only issue an alert if a CRC mismatch is found and
allow recovery to continue - this will act as an indicator that
log recovery problems are a result of log corruption. On CRC enabled
filesystems, however, log recovery will fail.

Note that existing debug kernels will write a simple checksum value
to the log, so the first time this is run on a filesystem taht was
last used on a debug kernel it will through CRC mismatch warning
errors. These can be ignored.

Initially based on a patch from Dave Chinner, then modified
significantly by Christoph Hellwig.  Modified again by Dave Chinner
to get to this version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

Showing 3 changed files with 176 additions and 99 deletions Side-by-side Diff

... ... @@ -35,6 +35,7 @@
35 35 #include "xfs_inode.h"
36 36 #include "xfs_trace.h"
37 37 #include "xfs_fsops.h"
  38 +#include "xfs_cksum.h"
38 39  
39 40 kmem_zone_t *xfs_log_ticket_zone;
40 41  
... ... @@ -1490,6 +1491,84 @@
1490 1491 }
1491 1492  
1492 1493 /*
  1494 + * Stamp cycle number in every block
  1495 + */
  1496 +STATIC void
  1497 +xlog_pack_data(
  1498 + struct xlog *log,
  1499 + struct xlog_in_core *iclog,
  1500 + int roundoff)
  1501 +{
  1502 + int i, j, k;
  1503 + int size = iclog->ic_offset + roundoff;
  1504 + __be32 cycle_lsn;
  1505 + xfs_caddr_t dp;
  1506 +
  1507 + cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
  1508 +
  1509 + dp = iclog->ic_datap;
  1510 + for (i = 0; i < BTOBB(size); i++) {
  1511 + if (i >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE))
  1512 + break;
  1513 + iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
  1514 + *(__be32 *)dp = cycle_lsn;
  1515 + dp += BBSIZE;
  1516 + }
  1517 +
  1518 + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
  1519 + xlog_in_core_2_t *xhdr = iclog->ic_data;
  1520 +
  1521 + for ( ; i < BTOBB(size); i++) {
  1522 + j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
  1523 + k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
  1524 + xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
  1525 + *(__be32 *)dp = cycle_lsn;
  1526 + dp += BBSIZE;
  1527 + }
  1528 +
  1529 + for (i = 1; i < log->l_iclog_heads; i++)
  1530 + xhdr[i].hic_xheader.xh_cycle = cycle_lsn;
  1531 + }
  1532 +}
  1533 +
  1534 +/*
  1535 + * Calculate the checksum for a log buffer.
  1536 + *
  1537 + * This is a little more complicated than it should be because the various
  1538 + * headers and the actual data are non-contiguous.
  1539 + */
  1540 +__be32
  1541 +xlog_cksum(
  1542 + struct xlog *log,
  1543 + struct xlog_rec_header *rhead,
  1544 + char *dp,
  1545 + int size)
  1546 +{
  1547 + __uint32_t crc;
  1548 +
  1549 + /* first generate the crc for the record header ... */
  1550 + crc = xfs_start_cksum((char *)rhead,
  1551 + sizeof(struct xlog_rec_header),
  1552 + offsetof(struct xlog_rec_header, h_crc));
  1553 +
  1554 + /* ... then for additional cycle data for v2 logs ... */
  1555 + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
  1556 + union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead;
  1557 + int i;
  1558 +
  1559 + for (i = 1; i < log->l_iclog_heads; i++) {
  1560 + crc = crc32c(crc, &xhdr[i].hic_xheader,
  1561 + sizeof(struct xlog_rec_ext_header));
  1562 + }
  1563 + }
  1564 +
  1565 + /* ... and finally for the payload */
  1566 + crc = crc32c(crc, dp, size);
  1567 +
  1568 + return xfs_end_cksum(crc);
  1569 +}
  1570 +
  1571 +/*
1493 1572 * The bdstrat callback function for log bufs. This gives us a central
1494 1573 * place to trap bufs in case we get hit by a log I/O error and need to
1495 1574 * shutdown. Actually, in practice, even when we didn't get a log error,
... ... @@ -1549,7 +1628,6 @@
1549 1628 struct xlog *log,
1550 1629 struct xlog_in_core *iclog)
1551 1630 {
1552   - xfs_caddr_t dptr; /* pointer to byte sized element */
1553 1631 xfs_buf_t *bp;
1554 1632 int i;
1555 1633 uint count; /* byte count of bwrite */
... ... @@ -1558,6 +1636,7 @@
1558 1636 int split = 0; /* split write into two regions */
1559 1637 int error;
1560 1638 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
  1639 + int size;
1561 1640  
1562 1641 XFS_STATS_INC(xs_log_writes);
1563 1642 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
... ... @@ -1588,13 +1667,10 @@
1588 1667 xlog_pack_data(log, iclog, roundoff);
1589 1668  
1590 1669 /* real byte length */
1591   - if (v2) {
1592   - iclog->ic_header.h_len =
1593   - cpu_to_be32(iclog->ic_offset + roundoff);
1594   - } else {
1595   - iclog->ic_header.h_len =
1596   - cpu_to_be32(iclog->ic_offset);
1597   - }
  1670 + size = iclog->ic_offset;
  1671 + if (v2)
  1672 + size += roundoff;
  1673 + iclog->ic_header.h_len = cpu_to_be32(size);
1598 1674  
1599 1675 bp = iclog->ic_bp;
1600 1676 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
1601 1677  
1602 1678  
... ... @@ -1603,12 +1679,36 @@
1603 1679  
1604 1680 /* Do we need to split this write into 2 parts? */
1605 1681 if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
  1682 + char *dptr;
  1683 +
1606 1684 split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
1607 1685 count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
1608   - iclog->ic_bwritecnt = 2; /* split into 2 writes */
  1686 + iclog->ic_bwritecnt = 2;
  1687 +
  1688 + /*
  1689 + * Bump the cycle numbers at the start of each block in the
  1690 + * part of the iclog that ends up in the buffer that gets
  1691 + * written to the start of the log.
  1692 + *
  1693 + * Watch out for the header magic number case, though.
  1694 + */
  1695 + dptr = (char *)&iclog->ic_header + count;
  1696 + for (i = 0; i < split; i += BBSIZE) {
  1697 + __uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
  1698 + if (++cycle == XLOG_HEADER_MAGIC_NUM)
  1699 + cycle++;
  1700 + *(__be32 *)dptr = cpu_to_be32(cycle);
  1701 +
  1702 + dptr += BBSIZE;
  1703 + }
1609 1704 } else {
1610 1705 iclog->ic_bwritecnt = 1;
1611 1706 }
  1707 +
  1708 + /* calculcate the checksum */
  1709 + iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
  1710 + iclog->ic_datap, size);
  1711 +
1612 1712 bp->b_io_length = BTOBB(count);
1613 1713 bp->b_fspriv = iclog;
1614 1714 XFS_BUF_ZEROFLAGS(bp);
... ... @@ -1662,19 +1762,6 @@
1662 1762 bp->b_flags |= XBF_SYNCIO;
1663 1763 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1664 1764 bp->b_flags |= XBF_FUA;
1665   - dptr = bp->b_addr;
1666   - /*
1667   - * Bump the cycle numbers at the start of each block
1668   - * since this part of the buffer is at the start of
1669   - * a new cycle. Watch out for the header magic number
1670   - * case, though.
1671   - */
1672   - for (i = 0; i < split; i += BBSIZE) {
1673   - be32_add_cpu((__be32 *)dptr, 1);
1674   - if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM)
1675   - be32_add_cpu((__be32 *)dptr, 1);
1676   - dptr += BBSIZE;
1677   - }
1678 1765  
1679 1766 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1680 1767 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
... ... @@ -1690,7 +1777,6 @@
1690 1777 }
1691 1778 return 0;
1692 1779 } /* xlog_sync */
1693   -
1694 1780  
1695 1781 /*
1696 1782 * Deallocate a log structure
fs/xfs/xfs_log_priv.h
... ... @@ -139,7 +139,6 @@
139 139 /*
140 140 * Flags for log structure
141 141 */
142   -#define XLOG_CHKSUM_MISMATCH 0x1 /* used only during recovery */
143 142 #define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */
144 143 #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */
145 144 #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being
... ... @@ -291,7 +290,7 @@
291 290 __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */
292 291 __be64 h_lsn; /* lsn of this LR : 8 */
293 292 __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */
294   - __be32 h_chksum; /* may not be used; non-zero if used : 4 */
  293 + __le32 h_crc; /* crc of log record : 4 */
295 294 __be32 h_prev_block; /* block number to previous LR : 4 */
296 295 __be32 h_num_logops; /* number of log operations in this LR : 4 */
297 296 __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
... ... @@ -555,11 +554,9 @@
555 554 extern int
556 555 xlog_recover_finish(
557 556 struct xlog *log);
558   -extern void
559   -xlog_pack_data(
560   - struct xlog *log,
561   - struct xlog_in_core *iclog,
562   - int);
  557 +
  558 +extern __be32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
  559 + char *dp, int size);
563 560  
564 561 extern kmem_zone_t *xfs_log_ticket_zone;
565 562 struct xlog_ticket *
fs/xfs/xfs_log_recover.c
... ... @@ -41,6 +41,7 @@
41 41 #include "xfs_trans_priv.h"
42 42 #include "xfs_quota.h"
43 43 #include "xfs_utils.h"
  44 +#include "xfs_cksum.h"
44 45 #include "xfs_trace.h"
45 46 #include "xfs_icache.h"
46 47  
47 48  
48 49  
49 50  
50 51  
51 52  
52 53  
53 54  
54 55  
55 56  
... ... @@ -3216,81 +3217,59 @@
3216 3217 mp->m_dmevmask = mp_dmevmask;
3217 3218 }
3218 3219  
3219   -
3220   -#ifdef DEBUG
3221   -STATIC void
3222   -xlog_pack_data_checksum(
3223   - struct xlog *log,
3224   - struct xlog_in_core *iclog,
3225   - int size)
3226   -{
3227   - int i;
3228   - __be32 *up;
3229   - uint chksum = 0;
3230   -
3231   - up = (__be32 *)iclog->ic_datap;
3232   - /* divide length by 4 to get # words */
3233   - for (i = 0; i < (size >> 2); i++) {
3234   - chksum ^= be32_to_cpu(*up);
3235   - up++;
3236   - }
3237   - iclog->ic_header.h_chksum = cpu_to_be32(chksum);
3238   -}
3239   -#else
3240   -#define xlog_pack_data_checksum(log, iclog, size)
3241   -#endif
3242   -
3243 3220 /*
3244   - * Stamp cycle number in every block
  3221 + * Upack the log buffer data and crc check it. If the check fails, issue a
  3222 + * warning if and only if the CRC in the header is non-zero. This makes the
  3223 + * check an advisory warning, and the zero CRC check will prevent failure
  3224 + * warnings from being emitted when upgrading the kernel from one that does not
  3225 + * add CRCs by default.
  3226 + *
  3227 + * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log
  3228 + * corruption failure
3245 3229 */
3246   -void
3247   -xlog_pack_data(
3248   - struct xlog *log,
3249   - struct xlog_in_core *iclog,
3250   - int roundoff)
  3230 +STATIC int
  3231 +xlog_unpack_data_crc(
  3232 + struct xlog_rec_header *rhead,
  3233 + xfs_caddr_t dp,
  3234 + struct xlog *log)
3251 3235 {
3252   - int i, j, k;
3253   - int size = iclog->ic_offset + roundoff;
3254   - __be32 cycle_lsn;
3255   - xfs_caddr_t dp;
  3236 + __be32 crc;
3256 3237  
3257   - xlog_pack_data_checksum(log, iclog, size);
3258   -
3259   - cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
3260   -
3261   - dp = iclog->ic_datap;
3262   - for (i = 0; i < BTOBB(size) &&
3263   - i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
3264   - iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
3265   - *(__be32 *)dp = cycle_lsn;
3266   - dp += BBSIZE;
3267   - }
3268   -
3269   - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3270   - xlog_in_core_2_t *xhdr = iclog->ic_data;
3271   -
3272   - for ( ; i < BTOBB(size); i++) {
3273   - j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3274   - k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3275   - xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
3276   - *(__be32 *)dp = cycle_lsn;
3277   - dp += BBSIZE;
  3238 + crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
  3239 + if (crc != rhead->h_crc) {
  3240 + if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
  3241 + xfs_alert(log->l_mp,
  3242 + "log record CRC mismatch: found 0x%x, expected 0x%x.\n",
  3243 + be32_to_cpu(rhead->h_crc),
  3244 + be32_to_cpu(crc));
  3245 + xfs_hex_dump(dp, 32);
3278 3246 }
3279 3247  
3280   - for (i = 1; i < log->l_iclog_heads; i++) {
3281   - xhdr[i].hic_xheader.xh_cycle = cycle_lsn;
3282   - }
  3248 + /*
  3249 + * If we've detected a log record corruption, then we can't
  3250 + * recover past this point. Abort recovery if we are enforcing
  3251 + * CRC protection by punting an error back up the stack.
  3252 + */
  3253 + if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
  3254 + return EFSCORRUPTED;
3283 3255 }
  3256 +
  3257 + return 0;
3284 3258 }
3285 3259  
3286   -STATIC void
  3260 +STATIC int
3287 3261 xlog_unpack_data(
3288 3262 struct xlog_rec_header *rhead,
3289 3263 xfs_caddr_t dp,
3290 3264 struct xlog *log)
3291 3265 {
3292 3266 int i, j, k;
  3267 + int error;
3293 3268  
  3269 + error = xlog_unpack_data_crc(rhead, dp, log);
  3270 + if (error)
  3271 + return error;
  3272 +
3294 3273 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
3295 3274 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
3296 3275 *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
... ... @@ -3306,6 +3285,8 @@
3306 3285 dp += BBSIZE;
3307 3286 }
3308 3287 }
  3288 +
  3289 + return 0;
3309 3290 }
3310 3291  
3311 3292 STATIC int
3312 3293  
... ... @@ -3437,10 +3418,14 @@
3437 3418 if (error)
3438 3419 goto bread_err2;
3439 3420  
3440   - xlog_unpack_data(rhead, offset, log);
3441   - if ((error = xlog_recover_process_data(log,
3442   - rhash, rhead, offset, pass)))
  3421 + error = xlog_unpack_data(rhead, offset, log);
  3422 + if (error)
3443 3423 goto bread_err2;
  3424 +
  3425 + error = xlog_recover_process_data(log,
  3426 + rhash, rhead, offset, pass);
  3427 + if (error)
  3428 + goto bread_err2;
3444 3429 blk_no += bblks + hblks;
3445 3430 }
3446 3431 } else {
3447 3432  
... ... @@ -3549,10 +3534,15 @@
3549 3534 if (error)
3550 3535 goto bread_err2;
3551 3536 }
3552   - xlog_unpack_data(rhead, offset, log);
3553   - if ((error = xlog_recover_process_data(log, rhash,
3554   - rhead, offset, pass)))
  3537 +
  3538 + error = xlog_unpack_data(rhead, offset, log);
  3539 + if (error)
3555 3540 goto bread_err2;
  3541 +
  3542 + error = xlog_recover_process_data(log, rhash,
  3543 + rhead, offset, pass);
  3544 + if (error)
  3545 + goto bread_err2;
3556 3546 blk_no += bblks;
3557 3547 }
3558 3548  
... ... @@ -3576,9 +3566,13 @@
3576 3566 if (error)
3577 3567 goto bread_err2;
3578 3568  
3579   - xlog_unpack_data(rhead, offset, log);
3580   - if ((error = xlog_recover_process_data(log, rhash,
3581   - rhead, offset, pass)))
  3569 + error = xlog_unpack_data(rhead, offset, log);
  3570 + if (error)
  3571 + goto bread_err2;
  3572 +
  3573 + error = xlog_recover_process_data(log, rhash,
  3574 + rhead, offset, pass);
  3575 + if (error)
3582 3576 goto bread_err2;
3583 3577 blk_no += bblks + hblks;
3584 3578 }