Commit b394e43e995d08821588a22561c6a71a63b4ff27

Authored by Lachlan McIlroy
Committed by Tim Shimmin
1 parent 776a75fa5c

[XFS] Avoid replaying inode buffer initialisation log items if on-disk version is newer.

SGI-PV: 969656
SGI-Modid: xfs-linux-melb:xfs-kern:29676a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>

Showing 3 changed files with 54 additions and 3 deletions Inline Diff

fs/xfs/xfs_buf_item.h
1 /* 1 /*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #ifndef __XFS_BUF_ITEM_H__ 18 #ifndef __XFS_BUF_ITEM_H__
19 #define __XFS_BUF_ITEM_H__ 19 #define __XFS_BUF_ITEM_H__
20 20
21 /* 21 /*
22 * This is the structure used to lay out a buf log item in the 22 * This is the structure used to lay out a buf log item in the
23 * log. The data map describes which 128 byte chunks of the buffer 23 * log. The data map describes which 128 byte chunks of the buffer
24 * have been logged. 24 * have been logged.
25 * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. 25 * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything.
26 */ 26 */
27 typedef struct xfs_buf_log_format_t { 27 typedef struct xfs_buf_log_format_t {
28 unsigned short blf_type; /* buf log item type indicator */ 28 unsigned short blf_type; /* buf log item type indicator */
29 unsigned short blf_size; /* size of this item */ 29 unsigned short blf_size; /* size of this item */
30 ushort blf_flags; /* misc state */ 30 ushort blf_flags; /* misc state */
31 ushort blf_len; /* number of blocks in this buf */ 31 ushort blf_len; /* number of blocks in this buf */
32 __int64_t blf_blkno; /* starting blkno of this buf */ 32 __int64_t blf_blkno; /* starting blkno of this buf */
33 unsigned int blf_map_size; /* size of data bitmap in words */ 33 unsigned int blf_map_size; /* size of data bitmap in words */
34 unsigned int blf_data_map[1];/* variable size bitmap of */ 34 unsigned int blf_data_map[1];/* variable size bitmap of */
35 /* regions of buffer in this item */ 35 /* regions of buffer in this item */
36 } xfs_buf_log_format_t; 36 } xfs_buf_log_format_t;
37 37
38 /* 38 /*
39 * This flag indicates that the buffer contains on disk inodes 39 * This flag indicates that the buffer contains on disk inodes
40 * and requires special recovery handling. 40 * and requires special recovery handling.
41 */ 41 */
42 #define XFS_BLI_INODE_BUF 0x1 42 #define XFS_BLI_INODE_BUF 0x1
43 /* 43 /*
44 * This flag indicates that the buffer should not be replayed 44 * This flag indicates that the buffer should not be replayed
45 * during recovery because its blocks are being freed. 45 * during recovery because its blocks are being freed.
46 */ 46 */
47 #define XFS_BLI_CANCEL 0x2 47 #define XFS_BLI_CANCEL 0x2
48 /* 48 /*
49 * This flag indicates that the buffer contains on disk 49 * This flag indicates that the buffer contains on disk
50 * user or group dquots and may require special recovery handling. 50 * user or group dquots and may require special recovery handling.
51 */ 51 */
52 #define XFS_BLI_UDQUOT_BUF 0x4 52 #define XFS_BLI_UDQUOT_BUF 0x4
53 #define XFS_BLI_PDQUOT_BUF 0x8 53 #define XFS_BLI_PDQUOT_BUF 0x8
54 #define XFS_BLI_GDQUOT_BUF 0x10 54 #define XFS_BLI_GDQUOT_BUF 0x10
55 /*
56 * This flag indicates that the buffer contains newly allocated
57 * inodes.
58 */
59 #define XFS_BLI_INODE_NEW_BUF 0x20
55 60
56 #define XFS_BLI_CHUNK 128 61 #define XFS_BLI_CHUNK 128
57 #define XFS_BLI_SHIFT 7 62 #define XFS_BLI_SHIFT 7
58 #define BIT_TO_WORD_SHIFT 5 63 #define BIT_TO_WORD_SHIFT 5
59 #define NBWORD (NBBY * sizeof(unsigned int)) 64 #define NBWORD (NBBY * sizeof(unsigned int))
60 65
61 /* 66 /*
62 * buf log item flags 67 * buf log item flags
63 */ 68 */
64 #define XFS_BLI_HOLD 0x01 69 #define XFS_BLI_HOLD 0x01
65 #define XFS_BLI_DIRTY 0x02 70 #define XFS_BLI_DIRTY 0x02
66 #define XFS_BLI_STALE 0x04 71 #define XFS_BLI_STALE 0x04
67 #define XFS_BLI_LOGGED 0x08 72 #define XFS_BLI_LOGGED 0x08
68 #define XFS_BLI_INODE_ALLOC_BUF 0x10 73 #define XFS_BLI_INODE_ALLOC_BUF 0x10
69 #define XFS_BLI_STALE_INODE 0x20 74 #define XFS_BLI_STALE_INODE 0x20
70 75
71 76
72 #ifdef __KERNEL__ 77 #ifdef __KERNEL__
73 78
74 struct xfs_buf; 79 struct xfs_buf;
75 struct ktrace; 80 struct ktrace;
76 struct xfs_mount; 81 struct xfs_mount;
77 struct xfs_buf_log_item; 82 struct xfs_buf_log_item;
78 83
79 #if defined(XFS_BLI_TRACE) 84 #if defined(XFS_BLI_TRACE)
80 #define XFS_BLI_TRACE_SIZE 32 85 #define XFS_BLI_TRACE_SIZE 32
81 86
82 void xfs_buf_item_trace(char *, struct xfs_buf_log_item *); 87 void xfs_buf_item_trace(char *, struct xfs_buf_log_item *);
83 #else 88 #else
84 #define xfs_buf_item_trace(id, bip) 89 #define xfs_buf_item_trace(id, bip)
85 #endif 90 #endif
86 91
87 /* 92 /*
88 * This is the in core log item structure used to track information 93 * This is the in core log item structure used to track information
89 * needed to log buffers. It tracks how many times the lock has been 94 * needed to log buffers. It tracks how many times the lock has been
90 * locked, and which 128 byte chunks of the buffer are dirty. 95 * locked, and which 128 byte chunks of the buffer are dirty.
91 */ 96 */
92 typedef struct xfs_buf_log_item { 97 typedef struct xfs_buf_log_item {
93 xfs_log_item_t bli_item; /* common item structure */ 98 xfs_log_item_t bli_item; /* common item structure */
94 struct xfs_buf *bli_buf; /* real buffer pointer */ 99 struct xfs_buf *bli_buf; /* real buffer pointer */
95 unsigned int bli_flags; /* misc flags */ 100 unsigned int bli_flags; /* misc flags */
96 unsigned int bli_recur; /* lock recursion count */ 101 unsigned int bli_recur; /* lock recursion count */
97 atomic_t bli_refcount; /* cnt of tp refs */ 102 atomic_t bli_refcount; /* cnt of tp refs */
98 #ifdef XFS_BLI_TRACE 103 #ifdef XFS_BLI_TRACE
99 struct ktrace *bli_trace; /* event trace buf */ 104 struct ktrace *bli_trace; /* event trace buf */
100 #endif 105 #endif
101 #ifdef XFS_TRANS_DEBUG 106 #ifdef XFS_TRANS_DEBUG
102 char *bli_orig; /* original buffer copy */ 107 char *bli_orig; /* original buffer copy */
103 char *bli_logged; /* bytes logged (bitmap) */ 108 char *bli_logged; /* bytes logged (bitmap) */
104 #endif 109 #endif
105 xfs_buf_log_format_t bli_format; /* in-log header */ 110 xfs_buf_log_format_t bli_format; /* in-log header */
106 } xfs_buf_log_item_t; 111 } xfs_buf_log_item_t;
107 112
108 /* 113 /*
109 * This structure is used during recovery to record the buf log 114 * This structure is used during recovery to record the buf log
110 * items which have been canceled and should not be replayed. 115 * items which have been canceled and should not be replayed.
111 */ 116 */
112 typedef struct xfs_buf_cancel { 117 typedef struct xfs_buf_cancel {
113 xfs_daddr_t bc_blkno; 118 xfs_daddr_t bc_blkno;
114 uint bc_len; 119 uint bc_len;
115 int bc_refcount; 120 int bc_refcount;
116 struct xfs_buf_cancel *bc_next; 121 struct xfs_buf_cancel *bc_next;
117 } xfs_buf_cancel_t; 122 } xfs_buf_cancel_t;
118 123
119 void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 124 void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
120 void xfs_buf_item_relse(struct xfs_buf *); 125 void xfs_buf_item_relse(struct xfs_buf *);
121 void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); 126 void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
122 uint xfs_buf_item_dirty(xfs_buf_log_item_t *); 127 uint xfs_buf_item_dirty(xfs_buf_log_item_t *);
123 void xfs_buf_attach_iodone(struct xfs_buf *, 128 void xfs_buf_attach_iodone(struct xfs_buf *,
124 void(*)(struct xfs_buf *, xfs_log_item_t *), 129 void(*)(struct xfs_buf *, xfs_log_item_t *),
125 xfs_log_item_t *); 130 xfs_log_item_t *);
126 void xfs_buf_iodone_callbacks(struct xfs_buf *); 131 void xfs_buf_iodone_callbacks(struct xfs_buf *);
127 void xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *); 132 void xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *);
128 133
129 #ifdef XFS_TRANS_DEBUG 134 #ifdef XFS_TRANS_DEBUG
130 void 135 void
131 xfs_buf_item_flush_log_debug( 136 xfs_buf_item_flush_log_debug(
132 struct xfs_buf *bp, 137 struct xfs_buf *bp,
133 uint first, 138 uint first,
134 uint last); 139 uint last);
135 #else 140 #else
136 #define xfs_buf_item_flush_log_debug(bp, first, last) 141 #define xfs_buf_item_flush_log_debug(bp, first, last)
137 #endif 142 #endif
138 143
139 #endif /* __KERNEL__ */ 144 #endif /* __KERNEL__ */
140 145
141 #endif /* __XFS_BUF_ITEM_H__ */ 146 #endif /* __XFS_BUF_ITEM_H__ */
142 147
fs/xfs/xfs_log_recover.c
1 /* 1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_sb.h" 25 #include "xfs_sb.h"
26 #include "xfs_ag.h" 26 #include "xfs_ag.h"
27 #include "xfs_dir2.h" 27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h" 28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h" 29 #include "xfs_mount.h"
30 #include "xfs_error.h" 30 #include "xfs_error.h"
31 #include "xfs_bmap_btree.h" 31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h" 32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h" 33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h" 34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h" 35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h" 36 #include "xfs_dinode.h"
37 #include "xfs_inode.h" 37 #include "xfs_inode.h"
38 #include "xfs_inode_item.h" 38 #include "xfs_inode_item.h"
39 #include "xfs_imap.h" 39 #include "xfs_imap.h"
40 #include "xfs_alloc.h" 40 #include "xfs_alloc.h"
41 #include "xfs_ialloc.h" 41 #include "xfs_ialloc.h"
42 #include "xfs_log_priv.h" 42 #include "xfs_log_priv.h"
43 #include "xfs_buf_item.h" 43 #include "xfs_buf_item.h"
44 #include "xfs_log_recover.h" 44 #include "xfs_log_recover.h"
45 #include "xfs_extfree_item.h" 45 #include "xfs_extfree_item.h"
46 #include "xfs_trans_priv.h" 46 #include "xfs_trans_priv.h"
47 #include "xfs_quota.h" 47 #include "xfs_quota.h"
48 #include "xfs_rw.h" 48 #include "xfs_rw.h"
49 49
50 STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 50 STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
51 STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); 51 STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
52 STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, 52 STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
53 xlog_recover_item_t *item); 53 xlog_recover_item_t *item);
54 #if defined(DEBUG) 54 #if defined(DEBUG)
55 STATIC void xlog_recover_check_summary(xlog_t *); 55 STATIC void xlog_recover_check_summary(xlog_t *);
56 STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int); 56 STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int);
57 #else 57 #else
58 #define xlog_recover_check_summary(log) 58 #define xlog_recover_check_summary(log)
59 #define xlog_recover_check_ail(mp, lip, gen) 59 #define xlog_recover_check_ail(mp, lip, gen)
60 #endif 60 #endif
61 61
62 62
63 /* 63 /*
64 * Sector aligned buffer routines for buffer create/read/write/access 64 * Sector aligned buffer routines for buffer create/read/write/access
65 */ 65 */
66 66
67 #define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ 67 #define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \
68 ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ 68 ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \
69 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) 69 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
70 #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) 70 #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask)
71 71
72 xfs_buf_t * 72 xfs_buf_t *
73 xlog_get_bp( 73 xlog_get_bp(
74 xlog_t *log, 74 xlog_t *log,
75 int num_bblks) 75 int num_bblks)
76 { 76 {
77 ASSERT(num_bblks > 0); 77 ASSERT(num_bblks > 0);
78 78
79 if (log->l_sectbb_log) { 79 if (log->l_sectbb_log) {
80 if (num_bblks > 1) 80 if (num_bblks > 1)
81 num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); 81 num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
82 num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); 82 num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks);
83 } 83 }
84 return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); 84 return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp);
85 } 85 }
86 86
87 void 87 void
88 xlog_put_bp( 88 xlog_put_bp(
89 xfs_buf_t *bp) 89 xfs_buf_t *bp)
90 { 90 {
91 xfs_buf_free(bp); 91 xfs_buf_free(bp);
92 } 92 }
93 93
94 94
95 /* 95 /*
96 * nbblks should be uint, but oh well. Just want to catch that 32-bit length. 96 * nbblks should be uint, but oh well. Just want to catch that 32-bit length.
97 */ 97 */
98 int 98 int
99 xlog_bread( 99 xlog_bread(
100 xlog_t *log, 100 xlog_t *log,
101 xfs_daddr_t blk_no, 101 xfs_daddr_t blk_no,
102 int nbblks, 102 int nbblks,
103 xfs_buf_t *bp) 103 xfs_buf_t *bp)
104 { 104 {
105 int error; 105 int error;
106 106
107 if (log->l_sectbb_log) { 107 if (log->l_sectbb_log) {
108 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 108 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
109 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); 109 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
110 } 110 }
111 111
112 ASSERT(nbblks > 0); 112 ASSERT(nbblks > 0);
113 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 113 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
114 ASSERT(bp); 114 ASSERT(bp);
115 115
116 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 116 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
117 XFS_BUF_READ(bp); 117 XFS_BUF_READ(bp);
118 XFS_BUF_BUSY(bp); 118 XFS_BUF_BUSY(bp);
119 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); 119 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
120 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 120 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
121 121
122 xfsbdstrat(log->l_mp, bp); 122 xfsbdstrat(log->l_mp, bp);
123 if ((error = xfs_iowait(bp))) 123 if ((error = xfs_iowait(bp)))
124 xfs_ioerror_alert("xlog_bread", log->l_mp, 124 xfs_ioerror_alert("xlog_bread", log->l_mp,
125 bp, XFS_BUF_ADDR(bp)); 125 bp, XFS_BUF_ADDR(bp));
126 return error; 126 return error;
127 } 127 }
128 128
129 /* 129 /*
130 * Write out the buffer at the given block for the given number of blocks. 130 * Write out the buffer at the given block for the given number of blocks.
131 * The buffer is kept locked across the write and is returned locked. 131 * The buffer is kept locked across the write and is returned locked.
132 * This can only be used for synchronous log writes. 132 * This can only be used for synchronous log writes.
133 */ 133 */
134 STATIC int 134 STATIC int
135 xlog_bwrite( 135 xlog_bwrite(
136 xlog_t *log, 136 xlog_t *log,
137 xfs_daddr_t blk_no, 137 xfs_daddr_t blk_no,
138 int nbblks, 138 int nbblks,
139 xfs_buf_t *bp) 139 xfs_buf_t *bp)
140 { 140 {
141 int error; 141 int error;
142 142
143 if (log->l_sectbb_log) { 143 if (log->l_sectbb_log) {
144 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 144 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
145 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); 145 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
146 } 146 }
147 147
148 ASSERT(nbblks > 0); 148 ASSERT(nbblks > 0);
149 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 149 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
150 150
151 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 151 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
152 XFS_BUF_ZEROFLAGS(bp); 152 XFS_BUF_ZEROFLAGS(bp);
153 XFS_BUF_BUSY(bp); 153 XFS_BUF_BUSY(bp);
154 XFS_BUF_HOLD(bp); 154 XFS_BUF_HOLD(bp);
155 XFS_BUF_PSEMA(bp, PRIBIO); 155 XFS_BUF_PSEMA(bp, PRIBIO);
156 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); 156 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
157 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 157 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
158 158
159 if ((error = xfs_bwrite(log->l_mp, bp))) 159 if ((error = xfs_bwrite(log->l_mp, bp)))
160 xfs_ioerror_alert("xlog_bwrite", log->l_mp, 160 xfs_ioerror_alert("xlog_bwrite", log->l_mp,
161 bp, XFS_BUF_ADDR(bp)); 161 bp, XFS_BUF_ADDR(bp));
162 return error; 162 return error;
163 } 163 }
164 164
165 STATIC xfs_caddr_t 165 STATIC xfs_caddr_t
166 xlog_align( 166 xlog_align(
167 xlog_t *log, 167 xlog_t *log,
168 xfs_daddr_t blk_no, 168 xfs_daddr_t blk_no,
169 int nbblks, 169 int nbblks,
170 xfs_buf_t *bp) 170 xfs_buf_t *bp)
171 { 171 {
172 xfs_caddr_t ptr; 172 xfs_caddr_t ptr;
173 173
174 if (!log->l_sectbb_log) 174 if (!log->l_sectbb_log)
175 return XFS_BUF_PTR(bp); 175 return XFS_BUF_PTR(bp);
176 176
177 ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); 177 ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
178 ASSERT(XFS_BUF_SIZE(bp) >= 178 ASSERT(XFS_BUF_SIZE(bp) >=
179 BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); 179 BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
180 return ptr; 180 return ptr;
181 } 181 }
182 182
183 #ifdef DEBUG 183 #ifdef DEBUG
184 /* 184 /*
185 * dump debug superblock and log record information 185 * dump debug superblock and log record information
186 */ 186 */
187 STATIC void 187 STATIC void
188 xlog_header_check_dump( 188 xlog_header_check_dump(
189 xfs_mount_t *mp, 189 xfs_mount_t *mp,
190 xlog_rec_header_t *head) 190 xlog_rec_header_t *head)
191 { 191 {
192 int b; 192 int b;
193 193
194 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __FUNCTION__); 194 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __FUNCTION__);
195 for (b = 0; b < 16; b++) 195 for (b = 0; b < 16; b++)
196 cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); 196 cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]);
197 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); 197 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
198 cmn_err(CE_DEBUG, " log : uuid = "); 198 cmn_err(CE_DEBUG, " log : uuid = ");
199 for (b = 0; b < 16; b++) 199 for (b = 0; b < 16; b++)
200 cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); 200 cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]);
201 cmn_err(CE_DEBUG, ", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT)); 201 cmn_err(CE_DEBUG, ", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT));
202 } 202 }
203 #else 203 #else
204 #define xlog_header_check_dump(mp, head) 204 #define xlog_header_check_dump(mp, head)
205 #endif 205 #endif
206 206
207 /* 207 /*
208 * check log record header for recovery 208 * check log record header for recovery
209 */ 209 */
210 STATIC int 210 STATIC int
211 xlog_header_check_recover( 211 xlog_header_check_recover(
212 xfs_mount_t *mp, 212 xfs_mount_t *mp,
213 xlog_rec_header_t *head) 213 xlog_rec_header_t *head)
214 { 214 {
215 ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); 215 ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
216 216
217 /* 217 /*
218 * IRIX doesn't write the h_fmt field and leaves it zeroed 218 * IRIX doesn't write the h_fmt field and leaves it zeroed
219 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover 219 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover
220 * a dirty log created in IRIX. 220 * a dirty log created in IRIX.
221 */ 221 */
222 if (unlikely(INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT)) { 222 if (unlikely(INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT)) {
223 xlog_warn( 223 xlog_warn(
224 "XFS: dirty log written in incompatible format - can't recover"); 224 "XFS: dirty log written in incompatible format - can't recover");
225 xlog_header_check_dump(mp, head); 225 xlog_header_check_dump(mp, head);
226 XFS_ERROR_REPORT("xlog_header_check_recover(1)", 226 XFS_ERROR_REPORT("xlog_header_check_recover(1)",
227 XFS_ERRLEVEL_HIGH, mp); 227 XFS_ERRLEVEL_HIGH, mp);
228 return XFS_ERROR(EFSCORRUPTED); 228 return XFS_ERROR(EFSCORRUPTED);
229 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 229 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
230 xlog_warn( 230 xlog_warn(
231 "XFS: dirty log entry has mismatched uuid - can't recover"); 231 "XFS: dirty log entry has mismatched uuid - can't recover");
232 xlog_header_check_dump(mp, head); 232 xlog_header_check_dump(mp, head);
233 XFS_ERROR_REPORT("xlog_header_check_recover(2)", 233 XFS_ERROR_REPORT("xlog_header_check_recover(2)",
234 XFS_ERRLEVEL_HIGH, mp); 234 XFS_ERRLEVEL_HIGH, mp);
235 return XFS_ERROR(EFSCORRUPTED); 235 return XFS_ERROR(EFSCORRUPTED);
236 } 236 }
237 return 0; 237 return 0;
238 } 238 }
239 239
240 /* 240 /*
241 * read the head block of the log and check the header 241 * read the head block of the log and check the header
242 */ 242 */
243 STATIC int 243 STATIC int
244 xlog_header_check_mount( 244 xlog_header_check_mount(
245 xfs_mount_t *mp, 245 xfs_mount_t *mp,
246 xlog_rec_header_t *head) 246 xlog_rec_header_t *head)
247 { 247 {
248 ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); 248 ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
249 249
250 if (uuid_is_nil(&head->h_fs_uuid)) { 250 if (uuid_is_nil(&head->h_fs_uuid)) {
251 /* 251 /*
252 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If 252 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If
253 * h_fs_uuid is nil, we assume this log was last mounted 253 * h_fs_uuid is nil, we assume this log was last mounted
254 * by IRIX and continue. 254 * by IRIX and continue.
255 */ 255 */
256 xlog_warn("XFS: nil uuid in log - IRIX style log"); 256 xlog_warn("XFS: nil uuid in log - IRIX style log");
257 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 257 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
258 xlog_warn("XFS: log has mismatched uuid - can't recover"); 258 xlog_warn("XFS: log has mismatched uuid - can't recover");
259 xlog_header_check_dump(mp, head); 259 xlog_header_check_dump(mp, head);
260 XFS_ERROR_REPORT("xlog_header_check_mount", 260 XFS_ERROR_REPORT("xlog_header_check_mount",
261 XFS_ERRLEVEL_HIGH, mp); 261 XFS_ERRLEVEL_HIGH, mp);
262 return XFS_ERROR(EFSCORRUPTED); 262 return XFS_ERROR(EFSCORRUPTED);
263 } 263 }
264 return 0; 264 return 0;
265 } 265 }
266 266
267 STATIC void 267 STATIC void
268 xlog_recover_iodone( 268 xlog_recover_iodone(
269 struct xfs_buf *bp) 269 struct xfs_buf *bp)
270 { 270 {
271 xfs_mount_t *mp; 271 xfs_mount_t *mp;
272 272
273 ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); 273 ASSERT(XFS_BUF_FSPRIVATE(bp, void *));
274 274
275 if (XFS_BUF_GETERROR(bp)) { 275 if (XFS_BUF_GETERROR(bp)) {
276 /* 276 /*
277 * We're not going to bother about retrying 277 * We're not going to bother about retrying
278 * this during recovery. One strike! 278 * this during recovery. One strike!
279 */ 279 */
280 mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); 280 mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *);
281 xfs_ioerror_alert("xlog_recover_iodone", 281 xfs_ioerror_alert("xlog_recover_iodone",
282 mp, bp, XFS_BUF_ADDR(bp)); 282 mp, bp, XFS_BUF_ADDR(bp));
283 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 283 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
284 } 284 }
285 XFS_BUF_SET_FSPRIVATE(bp, NULL); 285 XFS_BUF_SET_FSPRIVATE(bp, NULL);
286 XFS_BUF_CLR_IODONE_FUNC(bp); 286 XFS_BUF_CLR_IODONE_FUNC(bp);
287 xfs_biodone(bp); 287 xfs_biodone(bp);
288 } 288 }
289 289
290 /* 290 /*
291 * This routine finds (to an approximation) the first block in the physical 291 * This routine finds (to an approximation) the first block in the physical
292 * log which contains the given cycle. It uses a binary search algorithm. 292 * log which contains the given cycle. It uses a binary search algorithm.
293 * Note that the algorithm can not be perfect because the disk will not 293 * Note that the algorithm can not be perfect because the disk will not
294 * necessarily be perfect. 294 * necessarily be perfect.
295 */ 295 */
296 int 296 int
297 xlog_find_cycle_start( 297 xlog_find_cycle_start(
298 xlog_t *log, 298 xlog_t *log,
299 xfs_buf_t *bp, 299 xfs_buf_t *bp,
300 xfs_daddr_t first_blk, 300 xfs_daddr_t first_blk,
301 xfs_daddr_t *last_blk, 301 xfs_daddr_t *last_blk,
302 uint cycle) 302 uint cycle)
303 { 303 {
304 xfs_caddr_t offset; 304 xfs_caddr_t offset;
305 xfs_daddr_t mid_blk; 305 xfs_daddr_t mid_blk;
306 uint mid_cycle; 306 uint mid_cycle;
307 int error; 307 int error;
308 308
309 mid_blk = BLK_AVG(first_blk, *last_blk); 309 mid_blk = BLK_AVG(first_blk, *last_blk);
310 while (mid_blk != first_blk && mid_blk != *last_blk) { 310 while (mid_blk != first_blk && mid_blk != *last_blk) {
311 if ((error = xlog_bread(log, mid_blk, 1, bp))) 311 if ((error = xlog_bread(log, mid_blk, 1, bp)))
312 return error; 312 return error;
313 offset = xlog_align(log, mid_blk, 1, bp); 313 offset = xlog_align(log, mid_blk, 1, bp);
314 mid_cycle = GET_CYCLE(offset, ARCH_CONVERT); 314 mid_cycle = GET_CYCLE(offset, ARCH_CONVERT);
315 if (mid_cycle == cycle) { 315 if (mid_cycle == cycle) {
316 *last_blk = mid_blk; 316 *last_blk = mid_blk;
317 /* last_half_cycle == mid_cycle */ 317 /* last_half_cycle == mid_cycle */
318 } else { 318 } else {
319 first_blk = mid_blk; 319 first_blk = mid_blk;
320 /* first_half_cycle == mid_cycle */ 320 /* first_half_cycle == mid_cycle */
321 } 321 }
322 mid_blk = BLK_AVG(first_blk, *last_blk); 322 mid_blk = BLK_AVG(first_blk, *last_blk);
323 } 323 }
324 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || 324 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
325 (mid_blk == *last_blk && mid_blk-1 == first_blk)); 325 (mid_blk == *last_blk && mid_blk-1 == first_blk));
326 326
327 return 0; 327 return 0;
328 } 328 }
329 329
330 /* 330 /*
331 * Check that the range of blocks does not contain the cycle number 331 * Check that the range of blocks does not contain the cycle number
332 * given. The scan needs to occur from front to back and the ptr into the 332 * given. The scan needs to occur from front to back and the ptr into the
333 * region must be updated since a later routine will need to perform another 333 * region must be updated since a later routine will need to perform another
334 * test. If the region is completely good, we end up returning the same 334 * test. If the region is completely good, we end up returning the same
335 * last block number. 335 * last block number.
336 * 336 *
337 * Set blkno to -1 if we encounter no errors. This is an invalid block number 337 * Set blkno to -1 if we encounter no errors. This is an invalid block number
338 * since we don't ever expect logs to get this large. 338 * since we don't ever expect logs to get this large.
339 */ 339 */
340 STATIC int 340 STATIC int
341 xlog_find_verify_cycle( 341 xlog_find_verify_cycle(
342 xlog_t *log, 342 xlog_t *log,
343 xfs_daddr_t start_blk, 343 xfs_daddr_t start_blk,
344 int nbblks, 344 int nbblks,
345 uint stop_on_cycle_no, 345 uint stop_on_cycle_no,
346 xfs_daddr_t *new_blk) 346 xfs_daddr_t *new_blk)
347 { 347 {
348 xfs_daddr_t i, j; 348 xfs_daddr_t i, j;
349 uint cycle; 349 uint cycle;
350 xfs_buf_t *bp; 350 xfs_buf_t *bp;
351 xfs_daddr_t bufblks; 351 xfs_daddr_t bufblks;
352 xfs_caddr_t buf = NULL; 352 xfs_caddr_t buf = NULL;
353 int error = 0; 353 int error = 0;
354 354
355 bufblks = 1 << ffs(nbblks); 355 bufblks = 1 << ffs(nbblks);
356 356
357 while (!(bp = xlog_get_bp(log, bufblks))) { 357 while (!(bp = xlog_get_bp(log, bufblks))) {
358 /* can't get enough memory to do everything in one big buffer */ 358 /* can't get enough memory to do everything in one big buffer */
359 bufblks >>= 1; 359 bufblks >>= 1;
360 if (bufblks <= log->l_sectbb_log) 360 if (bufblks <= log->l_sectbb_log)
361 return ENOMEM; 361 return ENOMEM;
362 } 362 }
363 363
364 for (i = start_blk; i < start_blk + nbblks; i += bufblks) { 364 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
365 int bcount; 365 int bcount;
366 366
367 bcount = min(bufblks, (start_blk + nbblks - i)); 367 bcount = min(bufblks, (start_blk + nbblks - i));
368 368
369 if ((error = xlog_bread(log, i, bcount, bp))) 369 if ((error = xlog_bread(log, i, bcount, bp)))
370 goto out; 370 goto out;
371 371
372 buf = xlog_align(log, i, bcount, bp); 372 buf = xlog_align(log, i, bcount, bp);
373 for (j = 0; j < bcount; j++) { 373 for (j = 0; j < bcount; j++) {
374 cycle = GET_CYCLE(buf, ARCH_CONVERT); 374 cycle = GET_CYCLE(buf, ARCH_CONVERT);
375 if (cycle == stop_on_cycle_no) { 375 if (cycle == stop_on_cycle_no) {
376 *new_blk = i+j; 376 *new_blk = i+j;
377 goto out; 377 goto out;
378 } 378 }
379 379
380 buf += BBSIZE; 380 buf += BBSIZE;
381 } 381 }
382 } 382 }
383 383
384 *new_blk = -1; 384 *new_blk = -1;
385 385
386 out: 386 out:
387 xlog_put_bp(bp); 387 xlog_put_bp(bp);
388 return error; 388 return error;
389 } 389 }
390 390
391 /* 391 /*
392 * Potentially backup over partial log record write. 392 * Potentially backup over partial log record write.
393 * 393 *
394 * In the typical case, last_blk is the number of the block directly after 394 * In the typical case, last_blk is the number of the block directly after
395 * a good log record. Therefore, we subtract one to get the block number 395 * a good log record. Therefore, we subtract one to get the block number
396 * of the last block in the given buffer. extra_bblks contains the number 396 * of the last block in the given buffer. extra_bblks contains the number
397 * of blocks we would have read on a previous read. This happens when the 397 * of blocks we would have read on a previous read. This happens when the
398 * last log record is split over the end of the physical log. 398 * last log record is split over the end of the physical log.
399 * 399 *
400 * extra_bblks is the number of blocks potentially verified on a previous 400 * extra_bblks is the number of blocks potentially verified on a previous
401 * call to this routine. 401 * call to this routine.
402 */ 402 */
403 STATIC int 403 STATIC int
404 xlog_find_verify_log_record( 404 xlog_find_verify_log_record(
405 xlog_t *log, 405 xlog_t *log,
406 xfs_daddr_t start_blk, 406 xfs_daddr_t start_blk,
407 xfs_daddr_t *last_blk, 407 xfs_daddr_t *last_blk,
408 int extra_bblks) 408 int extra_bblks)
409 { 409 {
410 xfs_daddr_t i; 410 xfs_daddr_t i;
411 xfs_buf_t *bp; 411 xfs_buf_t *bp;
412 xfs_caddr_t offset = NULL; 412 xfs_caddr_t offset = NULL;
413 xlog_rec_header_t *head = NULL; 413 xlog_rec_header_t *head = NULL;
414 int error = 0; 414 int error = 0;
415 int smallmem = 0; 415 int smallmem = 0;
416 int num_blks = *last_blk - start_blk; 416 int num_blks = *last_blk - start_blk;
417 int xhdrs; 417 int xhdrs;
418 418
419 ASSERT(start_blk != 0 || *last_blk != start_blk); 419 ASSERT(start_blk != 0 || *last_blk != start_blk);
420 420
421 if (!(bp = xlog_get_bp(log, num_blks))) { 421 if (!(bp = xlog_get_bp(log, num_blks))) {
422 if (!(bp = xlog_get_bp(log, 1))) 422 if (!(bp = xlog_get_bp(log, 1)))
423 return ENOMEM; 423 return ENOMEM;
424 smallmem = 1; 424 smallmem = 1;
425 } else { 425 } else {
426 if ((error = xlog_bread(log, start_blk, num_blks, bp))) 426 if ((error = xlog_bread(log, start_blk, num_blks, bp)))
427 goto out; 427 goto out;
428 offset = xlog_align(log, start_blk, num_blks, bp); 428 offset = xlog_align(log, start_blk, num_blks, bp);
429 offset += ((num_blks - 1) << BBSHIFT); 429 offset += ((num_blks - 1) << BBSHIFT);
430 } 430 }
431 431
432 for (i = (*last_blk) - 1; i >= 0; i--) { 432 for (i = (*last_blk) - 1; i >= 0; i--) {
433 if (i < start_blk) { 433 if (i < start_blk) {
434 /* valid log record not found */ 434 /* valid log record not found */
435 xlog_warn( 435 xlog_warn(
436 "XFS: Log inconsistent (didn't find previous header)"); 436 "XFS: Log inconsistent (didn't find previous header)");
437 ASSERT(0); 437 ASSERT(0);
438 error = XFS_ERROR(EIO); 438 error = XFS_ERROR(EIO);
439 goto out; 439 goto out;
440 } 440 }
441 441
442 if (smallmem) { 442 if (smallmem) {
443 if ((error = xlog_bread(log, i, 1, bp))) 443 if ((error = xlog_bread(log, i, 1, bp)))
444 goto out; 444 goto out;
445 offset = xlog_align(log, i, 1, bp); 445 offset = xlog_align(log, i, 1, bp);
446 } 446 }
447 447
448 head = (xlog_rec_header_t *)offset; 448 head = (xlog_rec_header_t *)offset;
449 449
450 if (XLOG_HEADER_MAGIC_NUM == 450 if (XLOG_HEADER_MAGIC_NUM ==
451 INT_GET(head->h_magicno, ARCH_CONVERT)) 451 INT_GET(head->h_magicno, ARCH_CONVERT))
452 break; 452 break;
453 453
454 if (!smallmem) 454 if (!smallmem)
455 offset -= BBSIZE; 455 offset -= BBSIZE;
456 } 456 }
457 457
458 /* 458 /*
459 * We hit the beginning of the physical log & still no header. Return 459 * We hit the beginning of the physical log & still no header. Return
460 * to caller. If caller can handle a return of -1, then this routine 460 * to caller. If caller can handle a return of -1, then this routine
461 * will be called again for the end of the physical log. 461 * will be called again for the end of the physical log.
462 */ 462 */
463 if (i == -1) { 463 if (i == -1) {
464 error = -1; 464 error = -1;
465 goto out; 465 goto out;
466 } 466 }
467 467
468 /* 468 /*
469 * We have the final block of the good log (the first block 469 * We have the final block of the good log (the first block
470 * of the log record _before_ the head. So we check the uuid. 470 * of the log record _before_ the head. So we check the uuid.
471 */ 471 */
472 if ((error = xlog_header_check_mount(log->l_mp, head))) 472 if ((error = xlog_header_check_mount(log->l_mp, head)))
473 goto out; 473 goto out;
474 474
475 /* 475 /*
476 * We may have found a log record header before we expected one. 476 * We may have found a log record header before we expected one.
477 * last_blk will be the 1st block # with a given cycle #. We may end 477 * last_blk will be the 1st block # with a given cycle #. We may end
478 * up reading an entire log record. In this case, we don't want to 478 * up reading an entire log record. In this case, we don't want to
479 * reset last_blk. Only when last_blk points in the middle of a log 479 * reset last_blk. Only when last_blk points in the middle of a log
480 * record do we update last_blk. 480 * record do we update last_blk.
481 */ 481 */
482 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 482 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
483 uint h_size = INT_GET(head->h_size, ARCH_CONVERT); 483 uint h_size = INT_GET(head->h_size, ARCH_CONVERT);
484 484
485 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; 485 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
486 if (h_size % XLOG_HEADER_CYCLE_SIZE) 486 if (h_size % XLOG_HEADER_CYCLE_SIZE)
487 xhdrs++; 487 xhdrs++;
488 } else { 488 } else {
489 xhdrs = 1; 489 xhdrs = 1;
490 } 490 }
491 491
492 if (*last_blk - i + extra_bblks 492 if (*last_blk - i + extra_bblks
493 != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs) 493 != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs)
494 *last_blk = i; 494 *last_blk = i;
495 495
496 out: 496 out:
497 xlog_put_bp(bp); 497 xlog_put_bp(bp);
498 return error; 498 return error;
499 } 499 }
500 500
501 /* 501 /*
502 * Head is defined to be the point of the log where the next log write 502 * Head is defined to be the point of the log where the next log write
503 * write could go. This means that incomplete LR writes at the end are 503 * write could go. This means that incomplete LR writes at the end are
504 * eliminated when calculating the head. We aren't guaranteed that previous 504 * eliminated when calculating the head. We aren't guaranteed that previous
505 * LR have complete transactions. We only know that a cycle number of 505 * LR have complete transactions. We only know that a cycle number of
506 * current cycle number -1 won't be present in the log if we start writing 506 * current cycle number -1 won't be present in the log if we start writing
507 * from our current block number. 507 * from our current block number.
508 * 508 *
509 * last_blk contains the block number of the first block with a given 509 * last_blk contains the block number of the first block with a given
510 * cycle number. 510 * cycle number.
511 * 511 *
512 * Return: zero if normal, non-zero if error. 512 * Return: zero if normal, non-zero if error.
513 */ 513 */
514 STATIC int 514 STATIC int
515 xlog_find_head( 515 xlog_find_head(
516 xlog_t *log, 516 xlog_t *log,
517 xfs_daddr_t *return_head_blk) 517 xfs_daddr_t *return_head_blk)
518 { 518 {
519 xfs_buf_t *bp; 519 xfs_buf_t *bp;
520 xfs_caddr_t offset; 520 xfs_caddr_t offset;
521 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; 521 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
522 int num_scan_bblks; 522 int num_scan_bblks;
523 uint first_half_cycle, last_half_cycle; 523 uint first_half_cycle, last_half_cycle;
524 uint stop_on_cycle; 524 uint stop_on_cycle;
525 int error, log_bbnum = log->l_logBBsize; 525 int error, log_bbnum = log->l_logBBsize;
526 526
527 /* Is the end of the log device zeroed? */ 527 /* Is the end of the log device zeroed? */
528 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { 528 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
529 *return_head_blk = first_blk; 529 *return_head_blk = first_blk;
530 530
531 /* Is the whole lot zeroed? */ 531 /* Is the whole lot zeroed? */
532 if (!first_blk) { 532 if (!first_blk) {
533 /* Linux XFS shouldn't generate totally zeroed logs - 533 /* Linux XFS shouldn't generate totally zeroed logs -
534 * mkfs etc write a dummy unmount record to a fresh 534 * mkfs etc write a dummy unmount record to a fresh
535 * log so we can store the uuid in there 535 * log so we can store the uuid in there
536 */ 536 */
537 xlog_warn("XFS: totally zeroed log"); 537 xlog_warn("XFS: totally zeroed log");
538 } 538 }
539 539
540 return 0; 540 return 0;
541 } else if (error) { 541 } else if (error) {
542 xlog_warn("XFS: empty log check failed"); 542 xlog_warn("XFS: empty log check failed");
543 return error; 543 return error;
544 } 544 }
545 545
546 first_blk = 0; /* get cycle # of 1st block */ 546 first_blk = 0; /* get cycle # of 1st block */
547 bp = xlog_get_bp(log, 1); 547 bp = xlog_get_bp(log, 1);
548 if (!bp) 548 if (!bp)
549 return ENOMEM; 549 return ENOMEM;
550 if ((error = xlog_bread(log, 0, 1, bp))) 550 if ((error = xlog_bread(log, 0, 1, bp)))
551 goto bp_err; 551 goto bp_err;
552 offset = xlog_align(log, 0, 1, bp); 552 offset = xlog_align(log, 0, 1, bp);
553 first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); 553 first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
554 554
555 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ 555 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
556 if ((error = xlog_bread(log, last_blk, 1, bp))) 556 if ((error = xlog_bread(log, last_blk, 1, bp)))
557 goto bp_err; 557 goto bp_err;
558 offset = xlog_align(log, last_blk, 1, bp); 558 offset = xlog_align(log, last_blk, 1, bp);
559 last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); 559 last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
560 ASSERT(last_half_cycle != 0); 560 ASSERT(last_half_cycle != 0);
561 561
562 /* 562 /*
563 * If the 1st half cycle number is equal to the last half cycle number, 563 * If the 1st half cycle number is equal to the last half cycle number,
564 * then the entire log is stamped with the same cycle number. In this 564 * then the entire log is stamped with the same cycle number. In this
565 * case, head_blk can't be set to zero (which makes sense). The below 565 * case, head_blk can't be set to zero (which makes sense). The below
566 * math doesn't work out properly with head_blk equal to zero. Instead, 566 * math doesn't work out properly with head_blk equal to zero. Instead,
567 * we set it to log_bbnum which is an invalid block number, but this 567 * we set it to log_bbnum which is an invalid block number, but this
568 * value makes the math correct. If head_blk doesn't changed through 568 * value makes the math correct. If head_blk doesn't changed through
569 * all the tests below, *head_blk is set to zero at the very end rather 569 * all the tests below, *head_blk is set to zero at the very end rather
570 * than log_bbnum. In a sense, log_bbnum and zero are the same block 570 * than log_bbnum. In a sense, log_bbnum and zero are the same block
571 * in a circular file. 571 * in a circular file.
572 */ 572 */
573 if (first_half_cycle == last_half_cycle) { 573 if (first_half_cycle == last_half_cycle) {
574 /* 574 /*
575 * In this case we believe that the entire log should have 575 * In this case we believe that the entire log should have
576 * cycle number last_half_cycle. We need to scan backwards 576 * cycle number last_half_cycle. We need to scan backwards
577 * from the end verifying that there are no holes still 577 * from the end verifying that there are no holes still
578 * containing last_half_cycle - 1. If we find such a hole, 578 * containing last_half_cycle - 1. If we find such a hole,
579 * then the start of that hole will be the new head. The 579 * then the start of that hole will be the new head. The
580 * simple case looks like 580 * simple case looks like
581 * x | x ... | x - 1 | x 581 * x | x ... | x - 1 | x
582 * Another case that fits this picture would be 582 * Another case that fits this picture would be
583 * x | x + 1 | x ... | x 583 * x | x + 1 | x ... | x
584 * In this case the head really is somewhere at the end of the 584 * In this case the head really is somewhere at the end of the
585 * log, as one of the latest writes at the beginning was 585 * log, as one of the latest writes at the beginning was
586 * incomplete. 586 * incomplete.
587 * One more case is 587 * One more case is
588 * x | x + 1 | x ... | x - 1 | x 588 * x | x + 1 | x ... | x - 1 | x
589 * This is really the combination of the above two cases, and 589 * This is really the combination of the above two cases, and
590 * the head has to end up at the start of the x-1 hole at the 590 * the head has to end up at the start of the x-1 hole at the
591 * end of the log. 591 * end of the log.
592 * 592 *
593 * In the 256k log case, we will read from the beginning to the 593 * In the 256k log case, we will read from the beginning to the
594 * end of the log and search for cycle numbers equal to x-1. 594 * end of the log and search for cycle numbers equal to x-1.
595 * We don't worry about the x+1 blocks that we encounter, 595 * We don't worry about the x+1 blocks that we encounter,
596 * because we know that they cannot be the head since the log 596 * because we know that they cannot be the head since the log
597 * started with x. 597 * started with x.
598 */ 598 */
599 head_blk = log_bbnum; 599 head_blk = log_bbnum;
600 stop_on_cycle = last_half_cycle - 1; 600 stop_on_cycle = last_half_cycle - 1;
601 } else { 601 } else {
602 /* 602 /*
603 * In this case we want to find the first block with cycle 603 * In this case we want to find the first block with cycle
604 * number matching last_half_cycle. We expect the log to be 604 * number matching last_half_cycle. We expect the log to be
605 * some variation on 605 * some variation on
606 * x + 1 ... | x ... 606 * x + 1 ... | x ...
607 * The first block with cycle number x (last_half_cycle) will 607 * The first block with cycle number x (last_half_cycle) will
608 * be where the new head belongs. First we do a binary search 608 * be where the new head belongs. First we do a binary search
609 * for the first occurrence of last_half_cycle. The binary 609 * for the first occurrence of last_half_cycle. The binary
610 * search may not be totally accurate, so then we scan back 610 * search may not be totally accurate, so then we scan back
611 * from there looking for occurrences of last_half_cycle before 611 * from there looking for occurrences of last_half_cycle before
612 * us. If that backwards scan wraps around the beginning of 612 * us. If that backwards scan wraps around the beginning of
613 * the log, then we look for occurrences of last_half_cycle - 1 613 * the log, then we look for occurrences of last_half_cycle - 1
614 * at the end of the log. The cases we're looking for look 614 * at the end of the log. The cases we're looking for look
615 * like 615 * like
616 * x + 1 ... | x | x + 1 | x ... 616 * x + 1 ... | x | x + 1 | x ...
617 * ^ binary search stopped here 617 * ^ binary search stopped here
618 * or 618 * or
619 * x + 1 ... | x ... | x - 1 | x 619 * x + 1 ... | x ... | x - 1 | x
620 * <---------> less than scan distance 620 * <---------> less than scan distance
621 */ 621 */
622 stop_on_cycle = last_half_cycle; 622 stop_on_cycle = last_half_cycle;
623 if ((error = xlog_find_cycle_start(log, bp, first_blk, 623 if ((error = xlog_find_cycle_start(log, bp, first_blk,
624 &head_blk, last_half_cycle))) 624 &head_blk, last_half_cycle)))
625 goto bp_err; 625 goto bp_err;
626 } 626 }
627 627
628 /* 628 /*
629 * Now validate the answer. Scan back some number of maximum possible 629 * Now validate the answer. Scan back some number of maximum possible
630 * blocks and make sure each one has the expected cycle number. The 630 * blocks and make sure each one has the expected cycle number. The
631 * maximum is determined by the total possible amount of buffering 631 * maximum is determined by the total possible amount of buffering
632 * in the in-core log. The following number can be made tighter if 632 * in the in-core log. The following number can be made tighter if
633 * we actually look at the block size of the filesystem. 633 * we actually look at the block size of the filesystem.
634 */ 634 */
635 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 635 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
636 if (head_blk >= num_scan_bblks) { 636 if (head_blk >= num_scan_bblks) {
637 /* 637 /*
638 * We are guaranteed that the entire check can be performed 638 * We are guaranteed that the entire check can be performed
639 * in one buffer. 639 * in one buffer.
640 */ 640 */
641 start_blk = head_blk - num_scan_bblks; 641 start_blk = head_blk - num_scan_bblks;
642 if ((error = xlog_find_verify_cycle(log, 642 if ((error = xlog_find_verify_cycle(log,
643 start_blk, num_scan_bblks, 643 start_blk, num_scan_bblks,
644 stop_on_cycle, &new_blk))) 644 stop_on_cycle, &new_blk)))
645 goto bp_err; 645 goto bp_err;
646 if (new_blk != -1) 646 if (new_blk != -1)
647 head_blk = new_blk; 647 head_blk = new_blk;
648 } else { /* need to read 2 parts of log */ 648 } else { /* need to read 2 parts of log */
649 /* 649 /*
650 * We are going to scan backwards in the log in two parts. 650 * We are going to scan backwards in the log in two parts.
651 * First we scan the physical end of the log. In this part 651 * First we scan the physical end of the log. In this part
652 * of the log, we are looking for blocks with cycle number 652 * of the log, we are looking for blocks with cycle number
653 * last_half_cycle - 1. 653 * last_half_cycle - 1.
654 * If we find one, then we know that the log starts there, as 654 * If we find one, then we know that the log starts there, as
655 * we've found a hole that didn't get written in going around 655 * we've found a hole that didn't get written in going around
656 * the end of the physical log. The simple case for this is 656 * the end of the physical log. The simple case for this is
657 * x + 1 ... | x ... | x - 1 | x 657 * x + 1 ... | x ... | x - 1 | x
658 * <---------> less than scan distance 658 * <---------> less than scan distance
659 * If all of the blocks at the end of the log have cycle number 659 * If all of the blocks at the end of the log have cycle number
660 * last_half_cycle, then we check the blocks at the start of 660 * last_half_cycle, then we check the blocks at the start of
661 * the log looking for occurrences of last_half_cycle. If we 661 * the log looking for occurrences of last_half_cycle. If we
662 * find one, then our current estimate for the location of the 662 * find one, then our current estimate for the location of the
663 * first occurrence of last_half_cycle is wrong and we move 663 * first occurrence of last_half_cycle is wrong and we move
664 * back to the hole we've found. This case looks like 664 * back to the hole we've found. This case looks like
665 * x + 1 ... | x | x + 1 | x ... 665 * x + 1 ... | x | x + 1 | x ...
666 * ^ binary search stopped here 666 * ^ binary search stopped here
667 * Another case we need to handle that only occurs in 256k 667 * Another case we need to handle that only occurs in 256k
668 * logs is 668 * logs is
669 * x + 1 ... | x ... | x+1 | x ... 669 * x + 1 ... | x ... | x+1 | x ...
670 * ^ binary search stops here 670 * ^ binary search stops here
671 * In a 256k log, the scan at the end of the log will see the 671 * In a 256k log, the scan at the end of the log will see the
672 * x + 1 blocks. We need to skip past those since that is 672 * x + 1 blocks. We need to skip past those since that is
673 * certainly not the head of the log. By searching for 673 * certainly not the head of the log. By searching for
674 * last_half_cycle-1 we accomplish that. 674 * last_half_cycle-1 we accomplish that.
675 */ 675 */
676 start_blk = log_bbnum - num_scan_bblks + head_blk; 676 start_blk = log_bbnum - num_scan_bblks + head_blk;
677 ASSERT(head_blk <= INT_MAX && 677 ASSERT(head_blk <= INT_MAX &&
678 (xfs_daddr_t) num_scan_bblks - head_blk >= 0); 678 (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
679 if ((error = xlog_find_verify_cycle(log, start_blk, 679 if ((error = xlog_find_verify_cycle(log, start_blk,
680 num_scan_bblks - (int)head_blk, 680 num_scan_bblks - (int)head_blk,
681 (stop_on_cycle - 1), &new_blk))) 681 (stop_on_cycle - 1), &new_blk)))
682 goto bp_err; 682 goto bp_err;
683 if (new_blk != -1) { 683 if (new_blk != -1) {
684 head_blk = new_blk; 684 head_blk = new_blk;
685 goto bad_blk; 685 goto bad_blk;
686 } 686 }
687 687
688 /* 688 /*
689 * Scan beginning of log now. The last part of the physical 689 * Scan beginning of log now. The last part of the physical
690 * log is good. This scan needs to verify that it doesn't find 690 * log is good. This scan needs to verify that it doesn't find
691 * the last_half_cycle. 691 * the last_half_cycle.
692 */ 692 */
693 start_blk = 0; 693 start_blk = 0;
694 ASSERT(head_blk <= INT_MAX); 694 ASSERT(head_blk <= INT_MAX);
695 if ((error = xlog_find_verify_cycle(log, 695 if ((error = xlog_find_verify_cycle(log,
696 start_blk, (int)head_blk, 696 start_blk, (int)head_blk,
697 stop_on_cycle, &new_blk))) 697 stop_on_cycle, &new_blk)))
698 goto bp_err; 698 goto bp_err;
699 if (new_blk != -1) 699 if (new_blk != -1)
700 head_blk = new_blk; 700 head_blk = new_blk;
701 } 701 }
702 702
703 bad_blk: 703 bad_blk:
704 /* 704 /*
705 * Now we need to make sure head_blk is not pointing to a block in 705 * Now we need to make sure head_blk is not pointing to a block in
706 * the middle of a log record. 706 * the middle of a log record.
707 */ 707 */
708 num_scan_bblks = XLOG_REC_SHIFT(log); 708 num_scan_bblks = XLOG_REC_SHIFT(log);
709 if (head_blk >= num_scan_bblks) { 709 if (head_blk >= num_scan_bblks) {
710 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ 710 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
711 711
712 /* start ptr at last block ptr before head_blk */ 712 /* start ptr at last block ptr before head_blk */
713 if ((error = xlog_find_verify_log_record(log, start_blk, 713 if ((error = xlog_find_verify_log_record(log, start_blk,
714 &head_blk, 0)) == -1) { 714 &head_blk, 0)) == -1) {
715 error = XFS_ERROR(EIO); 715 error = XFS_ERROR(EIO);
716 goto bp_err; 716 goto bp_err;
717 } else if (error) 717 } else if (error)
718 goto bp_err; 718 goto bp_err;
719 } else { 719 } else {
720 start_blk = 0; 720 start_blk = 0;
721 ASSERT(head_blk <= INT_MAX); 721 ASSERT(head_blk <= INT_MAX);
722 if ((error = xlog_find_verify_log_record(log, start_blk, 722 if ((error = xlog_find_verify_log_record(log, start_blk,
723 &head_blk, 0)) == -1) { 723 &head_blk, 0)) == -1) {
724 /* We hit the beginning of the log during our search */ 724 /* We hit the beginning of the log during our search */
725 start_blk = log_bbnum - num_scan_bblks + head_blk; 725 start_blk = log_bbnum - num_scan_bblks + head_blk;
726 new_blk = log_bbnum; 726 new_blk = log_bbnum;
727 ASSERT(start_blk <= INT_MAX && 727 ASSERT(start_blk <= INT_MAX &&
728 (xfs_daddr_t) log_bbnum-start_blk >= 0); 728 (xfs_daddr_t) log_bbnum-start_blk >= 0);
729 ASSERT(head_blk <= INT_MAX); 729 ASSERT(head_blk <= INT_MAX);
730 if ((error = xlog_find_verify_log_record(log, 730 if ((error = xlog_find_verify_log_record(log,
731 start_blk, &new_blk, 731 start_blk, &new_blk,
732 (int)head_blk)) == -1) { 732 (int)head_blk)) == -1) {
733 error = XFS_ERROR(EIO); 733 error = XFS_ERROR(EIO);
734 goto bp_err; 734 goto bp_err;
735 } else if (error) 735 } else if (error)
736 goto bp_err; 736 goto bp_err;
737 if (new_blk != log_bbnum) 737 if (new_blk != log_bbnum)
738 head_blk = new_blk; 738 head_blk = new_blk;
739 } else if (error) 739 } else if (error)
740 goto bp_err; 740 goto bp_err;
741 } 741 }
742 742
743 xlog_put_bp(bp); 743 xlog_put_bp(bp);
744 if (head_blk == log_bbnum) 744 if (head_blk == log_bbnum)
745 *return_head_blk = 0; 745 *return_head_blk = 0;
746 else 746 else
747 *return_head_blk = head_blk; 747 *return_head_blk = head_blk;
748 /* 748 /*
749 * When returning here, we have a good block number. Bad block 749 * When returning here, we have a good block number. Bad block
750 * means that during a previous crash, we didn't have a clean break 750 * means that during a previous crash, we didn't have a clean break
751 * from cycle number N to cycle number N-1. In this case, we need 751 * from cycle number N to cycle number N-1. In this case, we need
752 * to find the first block with cycle number N-1. 752 * to find the first block with cycle number N-1.
753 */ 753 */
754 return 0; 754 return 0;
755 755
756 bp_err: 756 bp_err:
757 xlog_put_bp(bp); 757 xlog_put_bp(bp);
758 758
759 if (error) 759 if (error)
760 xlog_warn("XFS: failed to find log head"); 760 xlog_warn("XFS: failed to find log head");
761 return error; 761 return error;
762 } 762 }
763 763
764 /* 764 /*
765 * Find the sync block number or the tail of the log. 765 * Find the sync block number or the tail of the log.
766 * 766 *
767 * This will be the block number of the last record to have its 767 * This will be the block number of the last record to have its
768 * associated buffers synced to disk. Every log record header has 768 * associated buffers synced to disk. Every log record header has
769 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy 769 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
770 * to get a sync block number. The only concern is to figure out which 770 * to get a sync block number. The only concern is to figure out which
771 * log record header to believe. 771 * log record header to believe.
772 * 772 *
773 * The following algorithm uses the log record header with the largest 773 * The following algorithm uses the log record header with the largest
774 * lsn. The entire log record does not need to be valid. We only care 774 * lsn. The entire log record does not need to be valid. We only care
775 * that the header is valid. 775 * that the header is valid.
776 * 776 *
777 * We could speed up search by using current head_blk buffer, but it is not 777 * We could speed up search by using current head_blk buffer, but it is not
778 * available. 778 * available.
779 */ 779 */
780 int 780 int
781 xlog_find_tail( 781 xlog_find_tail(
782 xlog_t *log, 782 xlog_t *log,
783 xfs_daddr_t *head_blk, 783 xfs_daddr_t *head_blk,
784 xfs_daddr_t *tail_blk) 784 xfs_daddr_t *tail_blk)
785 { 785 {
786 xlog_rec_header_t *rhead; 786 xlog_rec_header_t *rhead;
787 xlog_op_header_t *op_head; 787 xlog_op_header_t *op_head;
788 xfs_caddr_t offset = NULL; 788 xfs_caddr_t offset = NULL;
789 xfs_buf_t *bp; 789 xfs_buf_t *bp;
790 int error, i, found; 790 int error, i, found;
791 xfs_daddr_t umount_data_blk; 791 xfs_daddr_t umount_data_blk;
792 xfs_daddr_t after_umount_blk; 792 xfs_daddr_t after_umount_blk;
793 xfs_lsn_t tail_lsn; 793 xfs_lsn_t tail_lsn;
794 int hblks; 794 int hblks;
795 795
796 found = 0; 796 found = 0;
797 797
798 /* 798 /*
799 * Find previous log record 799 * Find previous log record
800 */ 800 */
801 if ((error = xlog_find_head(log, head_blk))) 801 if ((error = xlog_find_head(log, head_blk)))
802 return error; 802 return error;
803 803
804 bp = xlog_get_bp(log, 1); 804 bp = xlog_get_bp(log, 1);
805 if (!bp) 805 if (!bp)
806 return ENOMEM; 806 return ENOMEM;
807 if (*head_blk == 0) { /* special case */ 807 if (*head_blk == 0) { /* special case */
808 if ((error = xlog_bread(log, 0, 1, bp))) 808 if ((error = xlog_bread(log, 0, 1, bp)))
809 goto bread_err; 809 goto bread_err;
810 offset = xlog_align(log, 0, 1, bp); 810 offset = xlog_align(log, 0, 1, bp);
811 if (GET_CYCLE(offset, ARCH_CONVERT) == 0) { 811 if (GET_CYCLE(offset, ARCH_CONVERT) == 0) {
812 *tail_blk = 0; 812 *tail_blk = 0;
813 /* leave all other log inited values alone */ 813 /* leave all other log inited values alone */
814 goto exit; 814 goto exit;
815 } 815 }
816 } 816 }
817 817
818 /* 818 /*
819 * Search backwards looking for log record header block 819 * Search backwards looking for log record header block
820 */ 820 */
821 ASSERT(*head_blk < INT_MAX); 821 ASSERT(*head_blk < INT_MAX);
822 for (i = (int)(*head_blk) - 1; i >= 0; i--) { 822 for (i = (int)(*head_blk) - 1; i >= 0; i--) {
823 if ((error = xlog_bread(log, i, 1, bp))) 823 if ((error = xlog_bread(log, i, 1, bp)))
824 goto bread_err; 824 goto bread_err;
825 offset = xlog_align(log, i, 1, bp); 825 offset = xlog_align(log, i, 1, bp);
826 if (XLOG_HEADER_MAGIC_NUM == 826 if (XLOG_HEADER_MAGIC_NUM ==
827 INT_GET(*(uint *)offset, ARCH_CONVERT)) { 827 INT_GET(*(uint *)offset, ARCH_CONVERT)) {
828 found = 1; 828 found = 1;
829 break; 829 break;
830 } 830 }
831 } 831 }
832 /* 832 /*
833 * If we haven't found the log record header block, start looking 833 * If we haven't found the log record header block, start looking
834 * again from the end of the physical log. XXXmiken: There should be 834 * again from the end of the physical log. XXXmiken: There should be
835 * a check here to make sure we didn't search more than N blocks in 835 * a check here to make sure we didn't search more than N blocks in
836 * the previous code. 836 * the previous code.
837 */ 837 */
838 if (!found) { 838 if (!found) {
839 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { 839 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
840 if ((error = xlog_bread(log, i, 1, bp))) 840 if ((error = xlog_bread(log, i, 1, bp)))
841 goto bread_err; 841 goto bread_err;
842 offset = xlog_align(log, i, 1, bp); 842 offset = xlog_align(log, i, 1, bp);
843 if (XLOG_HEADER_MAGIC_NUM == 843 if (XLOG_HEADER_MAGIC_NUM ==
844 INT_GET(*(uint*)offset, ARCH_CONVERT)) { 844 INT_GET(*(uint*)offset, ARCH_CONVERT)) {
845 found = 2; 845 found = 2;
846 break; 846 break;
847 } 847 }
848 } 848 }
849 } 849 }
850 if (!found) { 850 if (!found) {
851 xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); 851 xlog_warn("XFS: xlog_find_tail: couldn't find sync record");
852 ASSERT(0); 852 ASSERT(0);
853 return XFS_ERROR(EIO); 853 return XFS_ERROR(EIO);
854 } 854 }
855 855
856 /* find blk_no of tail of log */ 856 /* find blk_no of tail of log */
857 rhead = (xlog_rec_header_t *)offset; 857 rhead = (xlog_rec_header_t *)offset;
858 *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT)); 858 *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT));
859 859
860 /* 860 /*
861 * Reset log values according to the state of the log when we 861 * Reset log values according to the state of the log when we
862 * crashed. In the case where head_blk == 0, we bump curr_cycle 862 * crashed. In the case where head_blk == 0, we bump curr_cycle
863 * one because the next write starts a new cycle rather than 863 * one because the next write starts a new cycle rather than
864 * continuing the cycle of the last good log record. At this 864 * continuing the cycle of the last good log record. At this
865 * point we have guaranteed that all partial log records have been 865 * point we have guaranteed that all partial log records have been
866 * accounted for. Therefore, we know that the last good log record 866 * accounted for. Therefore, we know that the last good log record
867 * written was complete and ended exactly on the end boundary 867 * written was complete and ended exactly on the end boundary
868 * of the physical log. 868 * of the physical log.
869 */ 869 */
870 log->l_prev_block = i; 870 log->l_prev_block = i;
871 log->l_curr_block = (int)*head_blk; 871 log->l_curr_block = (int)*head_blk;
872 log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT); 872 log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT);
873 if (found == 2) 873 if (found == 2)
874 log->l_curr_cycle++; 874 log->l_curr_cycle++;
875 log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT); 875 log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT);
876 log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT); 876 log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT);
877 log->l_grant_reserve_cycle = log->l_curr_cycle; 877 log->l_grant_reserve_cycle = log->l_curr_cycle;
878 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); 878 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
879 log->l_grant_write_cycle = log->l_curr_cycle; 879 log->l_grant_write_cycle = log->l_curr_cycle;
880 log->l_grant_write_bytes = BBTOB(log->l_curr_block); 880 log->l_grant_write_bytes = BBTOB(log->l_curr_block);
881 881
882 /* 882 /*
883 * Look for unmount record. If we find it, then we know there 883 * Look for unmount record. If we find it, then we know there
884 * was a clean unmount. Since 'i' could be the last block in 884 * was a clean unmount. Since 'i' could be the last block in
885 * the physical log, we convert to a log block before comparing 885 * the physical log, we convert to a log block before comparing
886 * to the head_blk. 886 * to the head_blk.
887 * 887 *
888 * Save the current tail lsn to use to pass to 888 * Save the current tail lsn to use to pass to
889 * xlog_clear_stale_blocks() below. We won't want to clear the 889 * xlog_clear_stale_blocks() below. We won't want to clear the
890 * unmount record if there is one, so we pass the lsn of the 890 * unmount record if there is one, so we pass the lsn of the
891 * unmount record rather than the block after it. 891 * unmount record rather than the block after it.
892 */ 892 */
893 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 893 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
894 int h_size = INT_GET(rhead->h_size, ARCH_CONVERT); 894 int h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
895 int h_version = INT_GET(rhead->h_version, ARCH_CONVERT); 895 int h_version = INT_GET(rhead->h_version, ARCH_CONVERT);
896 896
897 if ((h_version & XLOG_VERSION_2) && 897 if ((h_version & XLOG_VERSION_2) &&
898 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 898 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
899 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 899 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
900 if (h_size % XLOG_HEADER_CYCLE_SIZE) 900 if (h_size % XLOG_HEADER_CYCLE_SIZE)
901 hblks++; 901 hblks++;
902 } else { 902 } else {
903 hblks = 1; 903 hblks = 1;
904 } 904 }
905 } else { 905 } else {
906 hblks = 1; 906 hblks = 1;
907 } 907 }
908 after_umount_blk = (i + hblks + (int) 908 after_umount_blk = (i + hblks + (int)
909 BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize; 909 BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize;
910 tail_lsn = log->l_tail_lsn; 910 tail_lsn = log->l_tail_lsn;
911 if (*head_blk == after_umount_blk && 911 if (*head_blk == after_umount_blk &&
912 INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) { 912 INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) {
913 umount_data_blk = (i + hblks) % log->l_logBBsize; 913 umount_data_blk = (i + hblks) % log->l_logBBsize;
914 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { 914 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
915 goto bread_err; 915 goto bread_err;
916 } 916 }
917 offset = xlog_align(log, umount_data_blk, 1, bp); 917 offset = xlog_align(log, umount_data_blk, 1, bp);
918 op_head = (xlog_op_header_t *)offset; 918 op_head = (xlog_op_header_t *)offset;
919 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 919 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
920 /* 920 /*
921 * Set tail and last sync so that newly written 921 * Set tail and last sync so that newly written
922 * log records will point recovery to after the 922 * log records will point recovery to after the
923 * current unmount record. 923 * current unmount record.
924 */ 924 */
925 ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle, 925 ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle,
926 after_umount_blk); 926 after_umount_blk);
927 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, 927 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
928 after_umount_blk); 928 after_umount_blk);
929 *tail_blk = after_umount_blk; 929 *tail_blk = after_umount_blk;
930 930
931 /* 931 /*
932 * Note that the unmount was clean. If the unmount 932 * Note that the unmount was clean. If the unmount
933 * was not clean, we need to know this to rebuild the 933 * was not clean, we need to know this to rebuild the
934 * superblock counters from the perag headers if we 934 * superblock counters from the perag headers if we
935 * have a filesystem using non-persistent counters. 935 * have a filesystem using non-persistent counters.
936 */ 936 */
937 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; 937 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
938 } 938 }
939 } 939 }
940 940
941 /* 941 /*
942 * Make sure that there are no blocks in front of the head 942 * Make sure that there are no blocks in front of the head
943 * with the same cycle number as the head. This can happen 943 * with the same cycle number as the head. This can happen
944 * because we allow multiple outstanding log writes concurrently, 944 * because we allow multiple outstanding log writes concurrently,
945 * and the later writes might make it out before earlier ones. 945 * and the later writes might make it out before earlier ones.
946 * 946 *
947 * We use the lsn from before modifying it so that we'll never 947 * We use the lsn from before modifying it so that we'll never
948 * overwrite the unmount record after a clean unmount. 948 * overwrite the unmount record after a clean unmount.
949 * 949 *
950 * Do this only if we are going to recover the filesystem 950 * Do this only if we are going to recover the filesystem
951 * 951 *
952 * NOTE: This used to say "if (!readonly)" 952 * NOTE: This used to say "if (!readonly)"
953 * However on Linux, we can & do recover a read-only filesystem. 953 * However on Linux, we can & do recover a read-only filesystem.
954 * We only skip recovery if NORECOVERY is specified on mount, 954 * We only skip recovery if NORECOVERY is specified on mount,
955 * in which case we would not be here. 955 * in which case we would not be here.
956 * 956 *
957 * But... if the -device- itself is readonly, just skip this. 957 * But... if the -device- itself is readonly, just skip this.
958 * We can't recover this device anyway, so it won't matter. 958 * We can't recover this device anyway, so it won't matter.
959 */ 959 */
960 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { 960 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
961 error = xlog_clear_stale_blocks(log, tail_lsn); 961 error = xlog_clear_stale_blocks(log, tail_lsn);
962 } 962 }
963 963
964 bread_err: 964 bread_err:
965 exit: 965 exit:
966 xlog_put_bp(bp); 966 xlog_put_bp(bp);
967 967
968 if (error) 968 if (error)
969 xlog_warn("XFS: failed to locate log tail"); 969 xlog_warn("XFS: failed to locate log tail");
970 return error; 970 return error;
971 } 971 }
972 972
973 /* 973 /*
974 * Is the log zeroed at all? 974 * Is the log zeroed at all?
975 * 975 *
976 * The last binary search should be changed to perform an X block read 976 * The last binary search should be changed to perform an X block read
977 * once X becomes small enough. You can then search linearly through 977 * once X becomes small enough. You can then search linearly through
978 * the X blocks. This will cut down on the number of reads we need to do. 978 * the X blocks. This will cut down on the number of reads we need to do.
979 * 979 *
980 * If the log is partially zeroed, this routine will pass back the blkno 980 * If the log is partially zeroed, this routine will pass back the blkno
981 * of the first block with cycle number 0. It won't have a complete LR 981 * of the first block with cycle number 0. It won't have a complete LR
982 * preceding it. 982 * preceding it.
983 * 983 *
984 * Return: 984 * Return:
985 * 0 => the log is completely written to 985 * 0 => the log is completely written to
986 * -1 => use *blk_no as the first block of the log 986 * -1 => use *blk_no as the first block of the log
987 * >0 => error has occurred 987 * >0 => error has occurred
988 */ 988 */
989 int 989 int
990 xlog_find_zeroed( 990 xlog_find_zeroed(
991 xlog_t *log, 991 xlog_t *log,
992 xfs_daddr_t *blk_no) 992 xfs_daddr_t *blk_no)
993 { 993 {
994 xfs_buf_t *bp; 994 xfs_buf_t *bp;
995 xfs_caddr_t offset; 995 xfs_caddr_t offset;
996 uint first_cycle, last_cycle; 996 uint first_cycle, last_cycle;
997 xfs_daddr_t new_blk, last_blk, start_blk; 997 xfs_daddr_t new_blk, last_blk, start_blk;
998 xfs_daddr_t num_scan_bblks; 998 xfs_daddr_t num_scan_bblks;
999 int error, log_bbnum = log->l_logBBsize; 999 int error, log_bbnum = log->l_logBBsize;
1000 1000
1001 *blk_no = 0; 1001 *blk_no = 0;
1002 1002
1003 /* check totally zeroed log */ 1003 /* check totally zeroed log */
1004 bp = xlog_get_bp(log, 1); 1004 bp = xlog_get_bp(log, 1);
1005 if (!bp) 1005 if (!bp)
1006 return ENOMEM; 1006 return ENOMEM;
1007 if ((error = xlog_bread(log, 0, 1, bp))) 1007 if ((error = xlog_bread(log, 0, 1, bp)))
1008 goto bp_err; 1008 goto bp_err;
1009 offset = xlog_align(log, 0, 1, bp); 1009 offset = xlog_align(log, 0, 1, bp);
1010 first_cycle = GET_CYCLE(offset, ARCH_CONVERT); 1010 first_cycle = GET_CYCLE(offset, ARCH_CONVERT);
1011 if (first_cycle == 0) { /* completely zeroed log */ 1011 if (first_cycle == 0) { /* completely zeroed log */
1012 *blk_no = 0; 1012 *blk_no = 0;
1013 xlog_put_bp(bp); 1013 xlog_put_bp(bp);
1014 return -1; 1014 return -1;
1015 } 1015 }
1016 1016
1017 /* check partially zeroed log */ 1017 /* check partially zeroed log */
1018 if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) 1018 if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
1019 goto bp_err; 1019 goto bp_err;
1020 offset = xlog_align(log, log_bbnum-1, 1, bp); 1020 offset = xlog_align(log, log_bbnum-1, 1, bp);
1021 last_cycle = GET_CYCLE(offset, ARCH_CONVERT); 1021 last_cycle = GET_CYCLE(offset, ARCH_CONVERT);
1022 if (last_cycle != 0) { /* log completely written to */ 1022 if (last_cycle != 0) { /* log completely written to */
1023 xlog_put_bp(bp); 1023 xlog_put_bp(bp);
1024 return 0; 1024 return 0;
1025 } else if (first_cycle != 1) { 1025 } else if (first_cycle != 1) {
1026 /* 1026 /*
1027 * If the cycle of the last block is zero, the cycle of 1027 * If the cycle of the last block is zero, the cycle of
1028 * the first block must be 1. If it's not, maybe we're 1028 * the first block must be 1. If it's not, maybe we're
1029 * not looking at a log... Bail out. 1029 * not looking at a log... Bail out.
1030 */ 1030 */
1031 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); 1031 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)");
1032 return XFS_ERROR(EINVAL); 1032 return XFS_ERROR(EINVAL);
1033 } 1033 }
1034 1034
1035 /* we have a partially zeroed log */ 1035 /* we have a partially zeroed log */
1036 last_blk = log_bbnum-1; 1036 last_blk = log_bbnum-1;
1037 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) 1037 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
1038 goto bp_err; 1038 goto bp_err;
1039 1039
1040 /* 1040 /*
1041 * Validate the answer. Because there is no way to guarantee that 1041 * Validate the answer. Because there is no way to guarantee that
1042 * the entire log is made up of log records which are the same size, 1042 * the entire log is made up of log records which are the same size,
1043 * we scan over the defined maximum blocks. At this point, the maximum 1043 * we scan over the defined maximum blocks. At this point, the maximum
1044 * is not chosen to mean anything special. XXXmiken 1044 * is not chosen to mean anything special. XXXmiken
1045 */ 1045 */
1046 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 1046 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
1047 ASSERT(num_scan_bblks <= INT_MAX); 1047 ASSERT(num_scan_bblks <= INT_MAX);
1048 1048
1049 if (last_blk < num_scan_bblks) 1049 if (last_blk < num_scan_bblks)
1050 num_scan_bblks = last_blk; 1050 num_scan_bblks = last_blk;
1051 start_blk = last_blk - num_scan_bblks; 1051 start_blk = last_blk - num_scan_bblks;
1052 1052
1053 /* 1053 /*
1054 * We search for any instances of cycle number 0 that occur before 1054 * We search for any instances of cycle number 0 that occur before
1055 * our current estimate of the head. What we're trying to detect is 1055 * our current estimate of the head. What we're trying to detect is
1056 * 1 ... | 0 | 1 | 0... 1056 * 1 ... | 0 | 1 | 0...
1057 * ^ binary search ends here 1057 * ^ binary search ends here
1058 */ 1058 */
1059 if ((error = xlog_find_verify_cycle(log, start_blk, 1059 if ((error = xlog_find_verify_cycle(log, start_blk,
1060 (int)num_scan_bblks, 0, &new_blk))) 1060 (int)num_scan_bblks, 0, &new_blk)))
1061 goto bp_err; 1061 goto bp_err;
1062 if (new_blk != -1) 1062 if (new_blk != -1)
1063 last_blk = new_blk; 1063 last_blk = new_blk;
1064 1064
1065 /* 1065 /*
1066 * Potentially backup over partial log record write. We don't need 1066 * Potentially backup over partial log record write. We don't need
1067 * to search the end of the log because we know it is zero. 1067 * to search the end of the log because we know it is zero.
1068 */ 1068 */
1069 if ((error = xlog_find_verify_log_record(log, start_blk, 1069 if ((error = xlog_find_verify_log_record(log, start_blk,
1070 &last_blk, 0)) == -1) { 1070 &last_blk, 0)) == -1) {
1071 error = XFS_ERROR(EIO); 1071 error = XFS_ERROR(EIO);
1072 goto bp_err; 1072 goto bp_err;
1073 } else if (error) 1073 } else if (error)
1074 goto bp_err; 1074 goto bp_err;
1075 1075
1076 *blk_no = last_blk; 1076 *blk_no = last_blk;
1077 bp_err: 1077 bp_err:
1078 xlog_put_bp(bp); 1078 xlog_put_bp(bp);
1079 if (error) 1079 if (error)
1080 return error; 1080 return error;
1081 return -1; 1081 return -1;
1082 } 1082 }
1083 1083
1084 /* 1084 /*
1085 * These are simple subroutines used by xlog_clear_stale_blocks() below 1085 * These are simple subroutines used by xlog_clear_stale_blocks() below
1086 * to initialize a buffer full of empty log record headers and write 1086 * to initialize a buffer full of empty log record headers and write
1087 * them into the log. 1087 * them into the log.
1088 */ 1088 */
1089 STATIC void 1089 STATIC void
1090 xlog_add_record( 1090 xlog_add_record(
1091 xlog_t *log, 1091 xlog_t *log,
1092 xfs_caddr_t buf, 1092 xfs_caddr_t buf,
1093 int cycle, 1093 int cycle,
1094 int block, 1094 int block,
1095 int tail_cycle, 1095 int tail_cycle,
1096 int tail_block) 1096 int tail_block)
1097 { 1097 {
1098 xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; 1098 xlog_rec_header_t *recp = (xlog_rec_header_t *)buf;
1099 1099
1100 memset(buf, 0, BBSIZE); 1100 memset(buf, 0, BBSIZE);
1101 INT_SET(recp->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); 1101 INT_SET(recp->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM);
1102 INT_SET(recp->h_cycle, ARCH_CONVERT, cycle); 1102 INT_SET(recp->h_cycle, ARCH_CONVERT, cycle);
1103 INT_SET(recp->h_version, ARCH_CONVERT, 1103 INT_SET(recp->h_version, ARCH_CONVERT,
1104 XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); 1104 XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1);
1105 ASSIGN_ANY_LSN_DISK(recp->h_lsn, cycle, block); 1105 ASSIGN_ANY_LSN_DISK(recp->h_lsn, cycle, block);
1106 ASSIGN_ANY_LSN_DISK(recp->h_tail_lsn, tail_cycle, tail_block); 1106 ASSIGN_ANY_LSN_DISK(recp->h_tail_lsn, tail_cycle, tail_block);
1107 INT_SET(recp->h_fmt, ARCH_CONVERT, XLOG_FMT); 1107 INT_SET(recp->h_fmt, ARCH_CONVERT, XLOG_FMT);
1108 memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); 1108 memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t));
1109 } 1109 }
1110 1110
1111 STATIC int 1111 STATIC int
1112 xlog_write_log_records( 1112 xlog_write_log_records(
1113 xlog_t *log, 1113 xlog_t *log,
1114 int cycle, 1114 int cycle,
1115 int start_block, 1115 int start_block,
1116 int blocks, 1116 int blocks,
1117 int tail_cycle, 1117 int tail_cycle,
1118 int tail_block) 1118 int tail_block)
1119 { 1119 {
1120 xfs_caddr_t offset; 1120 xfs_caddr_t offset;
1121 xfs_buf_t *bp; 1121 xfs_buf_t *bp;
1122 int balign, ealign; 1122 int balign, ealign;
1123 int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); 1123 int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
1124 int end_block = start_block + blocks; 1124 int end_block = start_block + blocks;
1125 int bufblks; 1125 int bufblks;
1126 int error = 0; 1126 int error = 0;
1127 int i, j = 0; 1127 int i, j = 0;
1128 1128
1129 bufblks = 1 << ffs(blocks); 1129 bufblks = 1 << ffs(blocks);
1130 while (!(bp = xlog_get_bp(log, bufblks))) { 1130 while (!(bp = xlog_get_bp(log, bufblks))) {
1131 bufblks >>= 1; 1131 bufblks >>= 1;
1132 if (bufblks <= log->l_sectbb_log) 1132 if (bufblks <= log->l_sectbb_log)
1133 return ENOMEM; 1133 return ENOMEM;
1134 } 1134 }
1135 1135
1136 /* We may need to do a read at the start to fill in part of 1136 /* We may need to do a read at the start to fill in part of
1137 * the buffer in the starting sector not covered by the first 1137 * the buffer in the starting sector not covered by the first
1138 * write below. 1138 * write below.
1139 */ 1139 */
1140 balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); 1140 balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block);
1141 if (balign != start_block) { 1141 if (balign != start_block) {
1142 if ((error = xlog_bread(log, start_block, 1, bp))) { 1142 if ((error = xlog_bread(log, start_block, 1, bp))) {
1143 xlog_put_bp(bp); 1143 xlog_put_bp(bp);
1144 return error; 1144 return error;
1145 } 1145 }
1146 j = start_block - balign; 1146 j = start_block - balign;
1147 } 1147 }
1148 1148
1149 for (i = start_block; i < end_block; i += bufblks) { 1149 for (i = start_block; i < end_block; i += bufblks) {
1150 int bcount, endcount; 1150 int bcount, endcount;
1151 1151
1152 bcount = min(bufblks, end_block - start_block); 1152 bcount = min(bufblks, end_block - start_block);
1153 endcount = bcount - j; 1153 endcount = bcount - j;
1154 1154
1155 /* We may need to do a read at the end to fill in part of 1155 /* We may need to do a read at the end to fill in part of
1156 * the buffer in the final sector not covered by the write. 1156 * the buffer in the final sector not covered by the write.
1157 * If this is the same sector as the above read, skip it. 1157 * If this is the same sector as the above read, skip it.
1158 */ 1158 */
1159 ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); 1159 ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block);
1160 if (j == 0 && (start_block + endcount > ealign)) { 1160 if (j == 0 && (start_block + endcount > ealign)) {
1161 offset = XFS_BUF_PTR(bp); 1161 offset = XFS_BUF_PTR(bp);
1162 balign = BBTOB(ealign - start_block); 1162 balign = BBTOB(ealign - start_block);
1163 XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb)); 1163 XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb));
1164 if ((error = xlog_bread(log, ealign, sectbb, bp))) 1164 if ((error = xlog_bread(log, ealign, sectbb, bp)))
1165 break; 1165 break;
1166 XFS_BUF_SET_PTR(bp, offset, bufblks); 1166 XFS_BUF_SET_PTR(bp, offset, bufblks);
1167 } 1167 }
1168 1168
1169 offset = xlog_align(log, start_block, endcount, bp); 1169 offset = xlog_align(log, start_block, endcount, bp);
1170 for (; j < endcount; j++) { 1170 for (; j < endcount; j++) {
1171 xlog_add_record(log, offset, cycle, i+j, 1171 xlog_add_record(log, offset, cycle, i+j,
1172 tail_cycle, tail_block); 1172 tail_cycle, tail_block);
1173 offset += BBSIZE; 1173 offset += BBSIZE;
1174 } 1174 }
1175 error = xlog_bwrite(log, start_block, endcount, bp); 1175 error = xlog_bwrite(log, start_block, endcount, bp);
1176 if (error) 1176 if (error)
1177 break; 1177 break;
1178 start_block += endcount; 1178 start_block += endcount;
1179 j = 0; 1179 j = 0;
1180 } 1180 }
1181 xlog_put_bp(bp); 1181 xlog_put_bp(bp);
1182 return error; 1182 return error;
1183 } 1183 }
1184 1184
1185 /* 1185 /*
1186 * This routine is called to blow away any incomplete log writes out 1186 * This routine is called to blow away any incomplete log writes out
1187 * in front of the log head. We do this so that we won't become confused 1187 * in front of the log head. We do this so that we won't become confused
1188 * if we come up, write only a little bit more, and then crash again. 1188 * if we come up, write only a little bit more, and then crash again.
1189 * If we leave the partial log records out there, this situation could 1189 * If we leave the partial log records out there, this situation could
1190 * cause us to think those partial writes are valid blocks since they 1190 * cause us to think those partial writes are valid blocks since they
1191 * have the current cycle number. We get rid of them by overwriting them 1191 * have the current cycle number. We get rid of them by overwriting them
1192 * with empty log records with the old cycle number rather than the 1192 * with empty log records with the old cycle number rather than the
1193 * current one. 1193 * current one.
1194 * 1194 *
1195 * The tail lsn is passed in rather than taken from 1195 * The tail lsn is passed in rather than taken from
1196 * the log so that we will not write over the unmount record after a 1196 * the log so that we will not write over the unmount record after a
1197 * clean unmount in a 512 block log. Doing so would leave the log without 1197 * clean unmount in a 512 block log. Doing so would leave the log without
1198 * any valid log records in it until a new one was written. If we crashed 1198 * any valid log records in it until a new one was written. If we crashed
1199 * during that time we would not be able to recover. 1199 * during that time we would not be able to recover.
1200 */ 1200 */
1201 STATIC int 1201 STATIC int
1202 xlog_clear_stale_blocks( 1202 xlog_clear_stale_blocks(
1203 xlog_t *log, 1203 xlog_t *log,
1204 xfs_lsn_t tail_lsn) 1204 xfs_lsn_t tail_lsn)
1205 { 1205 {
1206 int tail_cycle, head_cycle; 1206 int tail_cycle, head_cycle;
1207 int tail_block, head_block; 1207 int tail_block, head_block;
1208 int tail_distance, max_distance; 1208 int tail_distance, max_distance;
1209 int distance; 1209 int distance;
1210 int error; 1210 int error;
1211 1211
1212 tail_cycle = CYCLE_LSN(tail_lsn); 1212 tail_cycle = CYCLE_LSN(tail_lsn);
1213 tail_block = BLOCK_LSN(tail_lsn); 1213 tail_block = BLOCK_LSN(tail_lsn);
1214 head_cycle = log->l_curr_cycle; 1214 head_cycle = log->l_curr_cycle;
1215 head_block = log->l_curr_block; 1215 head_block = log->l_curr_block;
1216 1216
1217 /* 1217 /*
1218 * Figure out the distance between the new head of the log 1218 * Figure out the distance between the new head of the log
1219 * and the tail. We want to write over any blocks beyond the 1219 * and the tail. We want to write over any blocks beyond the
1220 * head that we may have written just before the crash, but 1220 * head that we may have written just before the crash, but
1221 * we don't want to overwrite the tail of the log. 1221 * we don't want to overwrite the tail of the log.
1222 */ 1222 */
1223 if (head_cycle == tail_cycle) { 1223 if (head_cycle == tail_cycle) {
1224 /* 1224 /*
1225 * The tail is behind the head in the physical log, 1225 * The tail is behind the head in the physical log,
1226 * so the distance from the head to the tail is the 1226 * so the distance from the head to the tail is the
1227 * distance from the head to the end of the log plus 1227 * distance from the head to the end of the log plus
1228 * the distance from the beginning of the log to the 1228 * the distance from the beginning of the log to the
1229 * tail. 1229 * tail.
1230 */ 1230 */
1231 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { 1231 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) {
1232 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", 1232 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)",
1233 XFS_ERRLEVEL_LOW, log->l_mp); 1233 XFS_ERRLEVEL_LOW, log->l_mp);
1234 return XFS_ERROR(EFSCORRUPTED); 1234 return XFS_ERROR(EFSCORRUPTED);
1235 } 1235 }
1236 tail_distance = tail_block + (log->l_logBBsize - head_block); 1236 tail_distance = tail_block + (log->l_logBBsize - head_block);
1237 } else { 1237 } else {
1238 /* 1238 /*
1239 * The head is behind the tail in the physical log, 1239 * The head is behind the tail in the physical log,
1240 * so the distance from the head to the tail is just 1240 * so the distance from the head to the tail is just
1241 * the tail block minus the head block. 1241 * the tail block minus the head block.
1242 */ 1242 */
1243 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ 1243 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){
1244 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", 1244 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)",
1245 XFS_ERRLEVEL_LOW, log->l_mp); 1245 XFS_ERRLEVEL_LOW, log->l_mp);
1246 return XFS_ERROR(EFSCORRUPTED); 1246 return XFS_ERROR(EFSCORRUPTED);
1247 } 1247 }
1248 tail_distance = tail_block - head_block; 1248 tail_distance = tail_block - head_block;
1249 } 1249 }
1250 1250
1251 /* 1251 /*
1252 * If the head is right up against the tail, we can't clear 1252 * If the head is right up against the tail, we can't clear
1253 * anything. 1253 * anything.
1254 */ 1254 */
1255 if (tail_distance <= 0) { 1255 if (tail_distance <= 0) {
1256 ASSERT(tail_distance == 0); 1256 ASSERT(tail_distance == 0);
1257 return 0; 1257 return 0;
1258 } 1258 }
1259 1259
1260 max_distance = XLOG_TOTAL_REC_SHIFT(log); 1260 max_distance = XLOG_TOTAL_REC_SHIFT(log);
1261 /* 1261 /*
1262 * Take the smaller of the maximum amount of outstanding I/O 1262 * Take the smaller of the maximum amount of outstanding I/O
1263 * we could have and the distance to the tail to clear out. 1263 * we could have and the distance to the tail to clear out.
1264 * We take the smaller so that we don't overwrite the tail and 1264 * We take the smaller so that we don't overwrite the tail and
1265 * we don't waste all day writing from the head to the tail 1265 * we don't waste all day writing from the head to the tail
1266 * for no reason. 1266 * for no reason.
1267 */ 1267 */
1268 max_distance = MIN(max_distance, tail_distance); 1268 max_distance = MIN(max_distance, tail_distance);
1269 1269
1270 if ((head_block + max_distance) <= log->l_logBBsize) { 1270 if ((head_block + max_distance) <= log->l_logBBsize) {
1271 /* 1271 /*
1272 * We can stomp all the blocks we need to without 1272 * We can stomp all the blocks we need to without
1273 * wrapping around the end of the log. Just do it 1273 * wrapping around the end of the log. Just do it
1274 * in a single write. Use the cycle number of the 1274 * in a single write. Use the cycle number of the
1275 * current cycle minus one so that the log will look like: 1275 * current cycle minus one so that the log will look like:
1276 * n ... | n - 1 ... 1276 * n ... | n - 1 ...
1277 */ 1277 */
1278 error = xlog_write_log_records(log, (head_cycle - 1), 1278 error = xlog_write_log_records(log, (head_cycle - 1),
1279 head_block, max_distance, tail_cycle, 1279 head_block, max_distance, tail_cycle,
1280 tail_block); 1280 tail_block);
1281 if (error) 1281 if (error)
1282 return error; 1282 return error;
1283 } else { 1283 } else {
1284 /* 1284 /*
1285 * We need to wrap around the end of the physical log in 1285 * We need to wrap around the end of the physical log in
1286 * order to clear all the blocks. Do it in two separate 1286 * order to clear all the blocks. Do it in two separate
1287 * I/Os. The first write should be from the head to the 1287 * I/Os. The first write should be from the head to the
1288 * end of the physical log, and it should use the current 1288 * end of the physical log, and it should use the current
1289 * cycle number minus one just like above. 1289 * cycle number minus one just like above.
1290 */ 1290 */
1291 distance = log->l_logBBsize - head_block; 1291 distance = log->l_logBBsize - head_block;
1292 error = xlog_write_log_records(log, (head_cycle - 1), 1292 error = xlog_write_log_records(log, (head_cycle - 1),
1293 head_block, distance, tail_cycle, 1293 head_block, distance, tail_cycle,
1294 tail_block); 1294 tail_block);
1295 1295
1296 if (error) 1296 if (error)
1297 return error; 1297 return error;
1298 1298
1299 /* 1299 /*
1300 * Now write the blocks at the start of the physical log. 1300 * Now write the blocks at the start of the physical log.
1301 * This writes the remainder of the blocks we want to clear. 1301 * This writes the remainder of the blocks we want to clear.
1302 * It uses the current cycle number since we're now on the 1302 * It uses the current cycle number since we're now on the
1303 * same cycle as the head so that we get: 1303 * same cycle as the head so that we get:
1304 * n ... n ... | n - 1 ... 1304 * n ... n ... | n - 1 ...
1305 * ^^^^^ blocks we're writing 1305 * ^^^^^ blocks we're writing
1306 */ 1306 */
1307 distance = max_distance - (log->l_logBBsize - head_block); 1307 distance = max_distance - (log->l_logBBsize - head_block);
1308 error = xlog_write_log_records(log, head_cycle, 0, distance, 1308 error = xlog_write_log_records(log, head_cycle, 0, distance,
1309 tail_cycle, tail_block); 1309 tail_cycle, tail_block);
1310 if (error) 1310 if (error)
1311 return error; 1311 return error;
1312 } 1312 }
1313 1313
1314 return 0; 1314 return 0;
1315 } 1315 }
1316 1316
1317 /****************************************************************************** 1317 /******************************************************************************
1318 * 1318 *
1319 * Log recover routines 1319 * Log recover routines
1320 * 1320 *
1321 ****************************************************************************** 1321 ******************************************************************************
1322 */ 1322 */
1323 1323
1324 STATIC xlog_recover_t * 1324 STATIC xlog_recover_t *
1325 xlog_recover_find_tid( 1325 xlog_recover_find_tid(
1326 xlog_recover_t *q, 1326 xlog_recover_t *q,
1327 xlog_tid_t tid) 1327 xlog_tid_t tid)
1328 { 1328 {
1329 xlog_recover_t *p = q; 1329 xlog_recover_t *p = q;
1330 1330
1331 while (p != NULL) { 1331 while (p != NULL) {
1332 if (p->r_log_tid == tid) 1332 if (p->r_log_tid == tid)
1333 break; 1333 break;
1334 p = p->r_next; 1334 p = p->r_next;
1335 } 1335 }
1336 return p; 1336 return p;
1337 } 1337 }
1338 1338
1339 STATIC void 1339 STATIC void
1340 xlog_recover_put_hashq( 1340 xlog_recover_put_hashq(
1341 xlog_recover_t **q, 1341 xlog_recover_t **q,
1342 xlog_recover_t *trans) 1342 xlog_recover_t *trans)
1343 { 1343 {
1344 trans->r_next = *q; 1344 trans->r_next = *q;
1345 *q = trans; 1345 *q = trans;
1346 } 1346 }
1347 1347
1348 STATIC void 1348 STATIC void
1349 xlog_recover_add_item( 1349 xlog_recover_add_item(
1350 xlog_recover_item_t **itemq) 1350 xlog_recover_item_t **itemq)
1351 { 1351 {
1352 xlog_recover_item_t *item; 1352 xlog_recover_item_t *item;
1353 1353
1354 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); 1354 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
1355 xlog_recover_insert_item_backq(itemq, item); 1355 xlog_recover_insert_item_backq(itemq, item);
1356 } 1356 }
1357 1357
1358 STATIC int 1358 STATIC int
1359 xlog_recover_add_to_cont_trans( 1359 xlog_recover_add_to_cont_trans(
1360 xlog_recover_t *trans, 1360 xlog_recover_t *trans,
1361 xfs_caddr_t dp, 1361 xfs_caddr_t dp,
1362 int len) 1362 int len)
1363 { 1363 {
1364 xlog_recover_item_t *item; 1364 xlog_recover_item_t *item;
1365 xfs_caddr_t ptr, old_ptr; 1365 xfs_caddr_t ptr, old_ptr;
1366 int old_len; 1366 int old_len;
1367 1367
1368 item = trans->r_itemq; 1368 item = trans->r_itemq;
1369 if (item == NULL) { 1369 if (item == NULL) {
1370 /* finish copying rest of trans header */ 1370 /* finish copying rest of trans header */
1371 xlog_recover_add_item(&trans->r_itemq); 1371 xlog_recover_add_item(&trans->r_itemq);
1372 ptr = (xfs_caddr_t) &trans->r_theader + 1372 ptr = (xfs_caddr_t) &trans->r_theader +
1373 sizeof(xfs_trans_header_t) - len; 1373 sizeof(xfs_trans_header_t) - len;
1374 memcpy(ptr, dp, len); /* d, s, l */ 1374 memcpy(ptr, dp, len); /* d, s, l */
1375 return 0; 1375 return 0;
1376 } 1376 }
1377 item = item->ri_prev; 1377 item = item->ri_prev;
1378 1378
1379 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1379 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1380 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1380 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1381 1381
1382 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); 1382 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
1383 memcpy(&ptr[old_len], dp, len); /* d, s, l */ 1383 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1384 item->ri_buf[item->ri_cnt-1].i_len += len; 1384 item->ri_buf[item->ri_cnt-1].i_len += len;
1385 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 1385 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1386 return 0; 1386 return 0;
1387 } 1387 }
1388 1388
1389 /* 1389 /*
1390 * The next region to add is the start of a new region. It could be 1390 * The next region to add is the start of a new region. It could be
1391 * a whole region or it could be the first part of a new region. Because 1391 * a whole region or it could be the first part of a new region. Because
1392 * of this, the assumption here is that the type and size fields of all 1392 * of this, the assumption here is that the type and size fields of all
1393 * format structures fit into the first 32 bits of the structure. 1393 * format structures fit into the first 32 bits of the structure.
1394 * 1394 *
1395 * This works because all regions must be 32 bit aligned. Therefore, we 1395 * This works because all regions must be 32 bit aligned. Therefore, we
1396 * either have both fields or we have neither field. In the case we have 1396 * either have both fields or we have neither field. In the case we have
1397 * neither field, the data part of the region is zero length. We only have 1397 * neither field, the data part of the region is zero length. We only have
1398 * a log_op_header and can throw away the header since a new one will appear 1398 * a log_op_header and can throw away the header since a new one will appear
1399 * later. If we have at least 4 bytes, then we can determine how many regions 1399 * later. If we have at least 4 bytes, then we can determine how many regions
1400 * will appear in the current log item. 1400 * will appear in the current log item.
1401 */ 1401 */
1402 STATIC int 1402 STATIC int
1403 xlog_recover_add_to_trans( 1403 xlog_recover_add_to_trans(
1404 xlog_recover_t *trans, 1404 xlog_recover_t *trans,
1405 xfs_caddr_t dp, 1405 xfs_caddr_t dp,
1406 int len) 1406 int len)
1407 { 1407 {
1408 xfs_inode_log_format_t *in_f; /* any will do */ 1408 xfs_inode_log_format_t *in_f; /* any will do */
1409 xlog_recover_item_t *item; 1409 xlog_recover_item_t *item;
1410 xfs_caddr_t ptr; 1410 xfs_caddr_t ptr;
1411 1411
1412 if (!len) 1412 if (!len)
1413 return 0; 1413 return 0;
1414 item = trans->r_itemq; 1414 item = trans->r_itemq;
1415 if (item == NULL) { 1415 if (item == NULL) {
1416 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); 1416 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
1417 if (len == sizeof(xfs_trans_header_t)) 1417 if (len == sizeof(xfs_trans_header_t))
1418 xlog_recover_add_item(&trans->r_itemq); 1418 xlog_recover_add_item(&trans->r_itemq);
1419 memcpy(&trans->r_theader, dp, len); /* d, s, l */ 1419 memcpy(&trans->r_theader, dp, len); /* d, s, l */
1420 return 0; 1420 return 0;
1421 } 1421 }
1422 1422
1423 ptr = kmem_alloc(len, KM_SLEEP); 1423 ptr = kmem_alloc(len, KM_SLEEP);
1424 memcpy(ptr, dp, len); 1424 memcpy(ptr, dp, len);
1425 in_f = (xfs_inode_log_format_t *)ptr; 1425 in_f = (xfs_inode_log_format_t *)ptr;
1426 1426
1427 if (item->ri_prev->ri_total != 0 && 1427 if (item->ri_prev->ri_total != 0 &&
1428 item->ri_prev->ri_total == item->ri_prev->ri_cnt) { 1428 item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
1429 xlog_recover_add_item(&trans->r_itemq); 1429 xlog_recover_add_item(&trans->r_itemq);
1430 } 1430 }
1431 item = trans->r_itemq; 1431 item = trans->r_itemq;
1432 item = item->ri_prev; 1432 item = item->ri_prev;
1433 1433
1434 if (item->ri_total == 0) { /* first region to be added */ 1434 if (item->ri_total == 0) { /* first region to be added */
1435 item->ri_total = in_f->ilf_size; 1435 item->ri_total = in_f->ilf_size;
1436 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); 1436 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
1437 item->ri_buf = kmem_zalloc((item->ri_total * 1437 item->ri_buf = kmem_zalloc((item->ri_total *
1438 sizeof(xfs_log_iovec_t)), KM_SLEEP); 1438 sizeof(xfs_log_iovec_t)), KM_SLEEP);
1439 } 1439 }
1440 ASSERT(item->ri_total > item->ri_cnt); 1440 ASSERT(item->ri_total > item->ri_cnt);
1441 /* Description region is ri_buf[0] */ 1441 /* Description region is ri_buf[0] */
1442 item->ri_buf[item->ri_cnt].i_addr = ptr; 1442 item->ri_buf[item->ri_cnt].i_addr = ptr;
1443 item->ri_buf[item->ri_cnt].i_len = len; 1443 item->ri_buf[item->ri_cnt].i_len = len;
1444 item->ri_cnt++; 1444 item->ri_cnt++;
1445 return 0; 1445 return 0;
1446 } 1446 }
1447 1447
1448 STATIC void 1448 STATIC void
1449 xlog_recover_new_tid( 1449 xlog_recover_new_tid(
1450 xlog_recover_t **q, 1450 xlog_recover_t **q,
1451 xlog_tid_t tid, 1451 xlog_tid_t tid,
1452 xfs_lsn_t lsn) 1452 xfs_lsn_t lsn)
1453 { 1453 {
1454 xlog_recover_t *trans; 1454 xlog_recover_t *trans;
1455 1455
1456 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); 1456 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1457 trans->r_log_tid = tid; 1457 trans->r_log_tid = tid;
1458 trans->r_lsn = lsn; 1458 trans->r_lsn = lsn;
1459 xlog_recover_put_hashq(q, trans); 1459 xlog_recover_put_hashq(q, trans);
1460 } 1460 }
1461 1461
1462 STATIC int 1462 STATIC int
1463 xlog_recover_unlink_tid( 1463 xlog_recover_unlink_tid(
1464 xlog_recover_t **q, 1464 xlog_recover_t **q,
1465 xlog_recover_t *trans) 1465 xlog_recover_t *trans)
1466 { 1466 {
1467 xlog_recover_t *tp; 1467 xlog_recover_t *tp;
1468 int found = 0; 1468 int found = 0;
1469 1469
1470 ASSERT(trans != NULL); 1470 ASSERT(trans != NULL);
1471 if (trans == *q) { 1471 if (trans == *q) {
1472 *q = (*q)->r_next; 1472 *q = (*q)->r_next;
1473 } else { 1473 } else {
1474 tp = *q; 1474 tp = *q;
1475 while (tp) { 1475 while (tp) {
1476 if (tp->r_next == trans) { 1476 if (tp->r_next == trans) {
1477 found = 1; 1477 found = 1;
1478 break; 1478 break;
1479 } 1479 }
1480 tp = tp->r_next; 1480 tp = tp->r_next;
1481 } 1481 }
1482 if (!found) { 1482 if (!found) {
1483 xlog_warn( 1483 xlog_warn(
1484 "XFS: xlog_recover_unlink_tid: trans not found"); 1484 "XFS: xlog_recover_unlink_tid: trans not found");
1485 ASSERT(0); 1485 ASSERT(0);
1486 return XFS_ERROR(EIO); 1486 return XFS_ERROR(EIO);
1487 } 1487 }
1488 tp->r_next = tp->r_next->r_next; 1488 tp->r_next = tp->r_next->r_next;
1489 } 1489 }
1490 return 0; 1490 return 0;
1491 } 1491 }
1492 1492
1493 STATIC void 1493 STATIC void
1494 xlog_recover_insert_item_backq( 1494 xlog_recover_insert_item_backq(
1495 xlog_recover_item_t **q, 1495 xlog_recover_item_t **q,
1496 xlog_recover_item_t *item) 1496 xlog_recover_item_t *item)
1497 { 1497 {
1498 if (*q == NULL) { 1498 if (*q == NULL) {
1499 item->ri_prev = item->ri_next = item; 1499 item->ri_prev = item->ri_next = item;
1500 *q = item; 1500 *q = item;
1501 } else { 1501 } else {
1502 item->ri_next = *q; 1502 item->ri_next = *q;
1503 item->ri_prev = (*q)->ri_prev; 1503 item->ri_prev = (*q)->ri_prev;
1504 (*q)->ri_prev = item; 1504 (*q)->ri_prev = item;
1505 item->ri_prev->ri_next = item; 1505 item->ri_prev->ri_next = item;
1506 } 1506 }
1507 } 1507 }
1508 1508
1509 STATIC void 1509 STATIC void
1510 xlog_recover_insert_item_frontq( 1510 xlog_recover_insert_item_frontq(
1511 xlog_recover_item_t **q, 1511 xlog_recover_item_t **q,
1512 xlog_recover_item_t *item) 1512 xlog_recover_item_t *item)
1513 { 1513 {
1514 xlog_recover_insert_item_backq(q, item); 1514 xlog_recover_insert_item_backq(q, item);
1515 *q = item; 1515 *q = item;
1516 } 1516 }
1517 1517
1518 STATIC int 1518 STATIC int
1519 xlog_recover_reorder_trans( 1519 xlog_recover_reorder_trans(
1520 xlog_recover_t *trans) 1520 xlog_recover_t *trans)
1521 { 1521 {
1522 xlog_recover_item_t *first_item, *itemq, *itemq_next; 1522 xlog_recover_item_t *first_item, *itemq, *itemq_next;
1523 xfs_buf_log_format_t *buf_f; 1523 xfs_buf_log_format_t *buf_f;
1524 ushort flags = 0; 1524 ushort flags = 0;
1525 1525
1526 first_item = itemq = trans->r_itemq; 1526 first_item = itemq = trans->r_itemq;
1527 trans->r_itemq = NULL; 1527 trans->r_itemq = NULL;
1528 do { 1528 do {
1529 itemq_next = itemq->ri_next; 1529 itemq_next = itemq->ri_next;
1530 buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; 1530 buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
1531 1531
1532 switch (ITEM_TYPE(itemq)) { 1532 switch (ITEM_TYPE(itemq)) {
1533 case XFS_LI_BUF: 1533 case XFS_LI_BUF:
1534 flags = buf_f->blf_flags; 1534 flags = buf_f->blf_flags;
1535 if (!(flags & XFS_BLI_CANCEL)) { 1535 if (!(flags & XFS_BLI_CANCEL)) {
1536 xlog_recover_insert_item_frontq(&trans->r_itemq, 1536 xlog_recover_insert_item_frontq(&trans->r_itemq,
1537 itemq); 1537 itemq);
1538 break; 1538 break;
1539 } 1539 }
1540 case XFS_LI_INODE: 1540 case XFS_LI_INODE:
1541 case XFS_LI_DQUOT: 1541 case XFS_LI_DQUOT:
1542 case XFS_LI_QUOTAOFF: 1542 case XFS_LI_QUOTAOFF:
1543 case XFS_LI_EFD: 1543 case XFS_LI_EFD:
1544 case XFS_LI_EFI: 1544 case XFS_LI_EFI:
1545 xlog_recover_insert_item_backq(&trans->r_itemq, itemq); 1545 xlog_recover_insert_item_backq(&trans->r_itemq, itemq);
1546 break; 1546 break;
1547 default: 1547 default:
1548 xlog_warn( 1548 xlog_warn(
1549 "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); 1549 "XFS: xlog_recover_reorder_trans: unrecognized type of log operation");
1550 ASSERT(0); 1550 ASSERT(0);
1551 return XFS_ERROR(EIO); 1551 return XFS_ERROR(EIO);
1552 } 1552 }
1553 itemq = itemq_next; 1553 itemq = itemq_next;
1554 } while (first_item != itemq); 1554 } while (first_item != itemq);
1555 return 0; 1555 return 0;
1556 } 1556 }
1557 1557
1558 /* 1558 /*
1559 * Build up the table of buf cancel records so that we don't replay 1559 * Build up the table of buf cancel records so that we don't replay
1560 * cancelled data in the second pass. For buffer records that are 1560 * cancelled data in the second pass. For buffer records that are
1561 * not cancel records, there is nothing to do here so we just return. 1561 * not cancel records, there is nothing to do here so we just return.
1562 * 1562 *
1563 * If we get a cancel record which is already in the table, this indicates 1563 * If we get a cancel record which is already in the table, this indicates
1564 * that the buffer was cancelled multiple times. In order to ensure 1564 * that the buffer was cancelled multiple times. In order to ensure
1565 * that during pass 2 we keep the record in the table until we reach its 1565 * that during pass 2 we keep the record in the table until we reach its
1566 * last occurrence in the log, we keep a reference count in the cancel 1566 * last occurrence in the log, we keep a reference count in the cancel
1567 * record in the table to tell us how many times we expect to see this 1567 * record in the table to tell us how many times we expect to see this
1568 * record during the second pass. 1568 * record during the second pass.
1569 */ 1569 */
1570 STATIC void 1570 STATIC void
1571 xlog_recover_do_buffer_pass1( 1571 xlog_recover_do_buffer_pass1(
1572 xlog_t *log, 1572 xlog_t *log,
1573 xfs_buf_log_format_t *buf_f) 1573 xfs_buf_log_format_t *buf_f)
1574 { 1574 {
1575 xfs_buf_cancel_t *bcp; 1575 xfs_buf_cancel_t *bcp;
1576 xfs_buf_cancel_t *nextp; 1576 xfs_buf_cancel_t *nextp;
1577 xfs_buf_cancel_t *prevp; 1577 xfs_buf_cancel_t *prevp;
1578 xfs_buf_cancel_t **bucket; 1578 xfs_buf_cancel_t **bucket;
1579 xfs_daddr_t blkno = 0; 1579 xfs_daddr_t blkno = 0;
1580 uint len = 0; 1580 uint len = 0;
1581 ushort flags = 0; 1581 ushort flags = 0;
1582 1582
1583 switch (buf_f->blf_type) { 1583 switch (buf_f->blf_type) {
1584 case XFS_LI_BUF: 1584 case XFS_LI_BUF:
1585 blkno = buf_f->blf_blkno; 1585 blkno = buf_f->blf_blkno;
1586 len = buf_f->blf_len; 1586 len = buf_f->blf_len;
1587 flags = buf_f->blf_flags; 1587 flags = buf_f->blf_flags;
1588 break; 1588 break;
1589 } 1589 }
1590 1590
1591 /* 1591 /*
1592 * If this isn't a cancel buffer item, then just return. 1592 * If this isn't a cancel buffer item, then just return.
1593 */ 1593 */
1594 if (!(flags & XFS_BLI_CANCEL)) 1594 if (!(flags & XFS_BLI_CANCEL))
1595 return; 1595 return;
1596 1596
1597 /* 1597 /*
1598 * Insert an xfs_buf_cancel record into the hash table of 1598 * Insert an xfs_buf_cancel record into the hash table of
1599 * them. If there is already an identical record, bump 1599 * them. If there is already an identical record, bump
1600 * its reference count. 1600 * its reference count.
1601 */ 1601 */
1602 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % 1602 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
1603 XLOG_BC_TABLE_SIZE]; 1603 XLOG_BC_TABLE_SIZE];
1604 /* 1604 /*
1605 * If the hash bucket is empty then just insert a new record into 1605 * If the hash bucket is empty then just insert a new record into
1606 * the bucket. 1606 * the bucket.
1607 */ 1607 */
1608 if (*bucket == NULL) { 1608 if (*bucket == NULL) {
1609 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), 1609 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
1610 KM_SLEEP); 1610 KM_SLEEP);
1611 bcp->bc_blkno = blkno; 1611 bcp->bc_blkno = blkno;
1612 bcp->bc_len = len; 1612 bcp->bc_len = len;
1613 bcp->bc_refcount = 1; 1613 bcp->bc_refcount = 1;
1614 bcp->bc_next = NULL; 1614 bcp->bc_next = NULL;
1615 *bucket = bcp; 1615 *bucket = bcp;
1616 return; 1616 return;
1617 } 1617 }
1618 1618
1619 /* 1619 /*
1620 * The hash bucket is not empty, so search for duplicates of our 1620 * The hash bucket is not empty, so search for duplicates of our
1621 * record. If we find one them just bump its refcount. If not 1621 * record. If we find one them just bump its refcount. If not
1622 * then add us at the end of the list. 1622 * then add us at the end of the list.
1623 */ 1623 */
1624 prevp = NULL; 1624 prevp = NULL;
1625 nextp = *bucket; 1625 nextp = *bucket;
1626 while (nextp != NULL) { 1626 while (nextp != NULL) {
1627 if (nextp->bc_blkno == blkno && nextp->bc_len == len) { 1627 if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
1628 nextp->bc_refcount++; 1628 nextp->bc_refcount++;
1629 return; 1629 return;
1630 } 1630 }
1631 prevp = nextp; 1631 prevp = nextp;
1632 nextp = nextp->bc_next; 1632 nextp = nextp->bc_next;
1633 } 1633 }
1634 ASSERT(prevp != NULL); 1634 ASSERT(prevp != NULL);
1635 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), 1635 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
1636 KM_SLEEP); 1636 KM_SLEEP);
1637 bcp->bc_blkno = blkno; 1637 bcp->bc_blkno = blkno;
1638 bcp->bc_len = len; 1638 bcp->bc_len = len;
1639 bcp->bc_refcount = 1; 1639 bcp->bc_refcount = 1;
1640 bcp->bc_next = NULL; 1640 bcp->bc_next = NULL;
1641 prevp->bc_next = bcp; 1641 prevp->bc_next = bcp;
1642 } 1642 }
1643 1643
1644 /* 1644 /*
1645 * Check to see whether the buffer being recovered has a corresponding 1645 * Check to see whether the buffer being recovered has a corresponding
1646 * entry in the buffer cancel record table. If it does then return 1 1646 * entry in the buffer cancel record table. If it does then return 1
1647 * so that it will be cancelled, otherwise return 0. If the buffer is 1647 * so that it will be cancelled, otherwise return 0. If the buffer is
1648 * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement 1648 * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement
1649 * the refcount on the entry in the table and remove it from the table 1649 * the refcount on the entry in the table and remove it from the table
1650 * if this is the last reference. 1650 * if this is the last reference.
1651 * 1651 *
1652 * We remove the cancel record from the table when we encounter its 1652 * We remove the cancel record from the table when we encounter its
1653 * last occurrence in the log so that if the same buffer is re-used 1653 * last occurrence in the log so that if the same buffer is re-used
1654 * again after its last cancellation we actually replay the changes 1654 * again after its last cancellation we actually replay the changes
1655 * made at that point. 1655 * made at that point.
1656 */ 1656 */
1657 STATIC int 1657 STATIC int
1658 xlog_check_buffer_cancelled( 1658 xlog_check_buffer_cancelled(
1659 xlog_t *log, 1659 xlog_t *log,
1660 xfs_daddr_t blkno, 1660 xfs_daddr_t blkno,
1661 uint len, 1661 uint len,
1662 ushort flags) 1662 ushort flags)
1663 { 1663 {
1664 xfs_buf_cancel_t *bcp; 1664 xfs_buf_cancel_t *bcp;
1665 xfs_buf_cancel_t *prevp; 1665 xfs_buf_cancel_t *prevp;
1666 xfs_buf_cancel_t **bucket; 1666 xfs_buf_cancel_t **bucket;
1667 1667
1668 if (log->l_buf_cancel_table == NULL) { 1668 if (log->l_buf_cancel_table == NULL) {
1669 /* 1669 /*
1670 * There is nothing in the table built in pass one, 1670 * There is nothing in the table built in pass one,
1671 * so this buffer must not be cancelled. 1671 * so this buffer must not be cancelled.
1672 */ 1672 */
1673 ASSERT(!(flags & XFS_BLI_CANCEL)); 1673 ASSERT(!(flags & XFS_BLI_CANCEL));
1674 return 0; 1674 return 0;
1675 } 1675 }
1676 1676
1677 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % 1677 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
1678 XLOG_BC_TABLE_SIZE]; 1678 XLOG_BC_TABLE_SIZE];
1679 bcp = *bucket; 1679 bcp = *bucket;
1680 if (bcp == NULL) { 1680 if (bcp == NULL) {
1681 /* 1681 /*
1682 * There is no corresponding entry in the table built 1682 * There is no corresponding entry in the table built
1683 * in pass one, so this buffer has not been cancelled. 1683 * in pass one, so this buffer has not been cancelled.
1684 */ 1684 */
1685 ASSERT(!(flags & XFS_BLI_CANCEL)); 1685 ASSERT(!(flags & XFS_BLI_CANCEL));
1686 return 0; 1686 return 0;
1687 } 1687 }
1688 1688
1689 /* 1689 /*
1690 * Search for an entry in the buffer cancel table that 1690 * Search for an entry in the buffer cancel table that
1691 * matches our buffer. 1691 * matches our buffer.
1692 */ 1692 */
1693 prevp = NULL; 1693 prevp = NULL;
1694 while (bcp != NULL) { 1694 while (bcp != NULL) {
1695 if (bcp->bc_blkno == blkno && bcp->bc_len == len) { 1695 if (bcp->bc_blkno == blkno && bcp->bc_len == len) {
1696 /* 1696 /*
1697 * We've go a match, so return 1 so that the 1697 * We've go a match, so return 1 so that the
1698 * recovery of this buffer is cancelled. 1698 * recovery of this buffer is cancelled.
1699 * If this buffer is actually a buffer cancel 1699 * If this buffer is actually a buffer cancel
1700 * log item, then decrement the refcount on the 1700 * log item, then decrement the refcount on the
1701 * one in the table and remove it if this is the 1701 * one in the table and remove it if this is the
1702 * last reference. 1702 * last reference.
1703 */ 1703 */
1704 if (flags & XFS_BLI_CANCEL) { 1704 if (flags & XFS_BLI_CANCEL) {
1705 bcp->bc_refcount--; 1705 bcp->bc_refcount--;
1706 if (bcp->bc_refcount == 0) { 1706 if (bcp->bc_refcount == 0) {
1707 if (prevp == NULL) { 1707 if (prevp == NULL) {
1708 *bucket = bcp->bc_next; 1708 *bucket = bcp->bc_next;
1709 } else { 1709 } else {
1710 prevp->bc_next = bcp->bc_next; 1710 prevp->bc_next = bcp->bc_next;
1711 } 1711 }
1712 kmem_free(bcp, 1712 kmem_free(bcp,
1713 sizeof(xfs_buf_cancel_t)); 1713 sizeof(xfs_buf_cancel_t));
1714 } 1714 }
1715 } 1715 }
1716 return 1; 1716 return 1;
1717 } 1717 }
1718 prevp = bcp; 1718 prevp = bcp;
1719 bcp = bcp->bc_next; 1719 bcp = bcp->bc_next;
1720 } 1720 }
1721 /* 1721 /*
1722 * We didn't find a corresponding entry in the table, so 1722 * We didn't find a corresponding entry in the table, so
1723 * return 0 so that the buffer is NOT cancelled. 1723 * return 0 so that the buffer is NOT cancelled.
1724 */ 1724 */
1725 ASSERT(!(flags & XFS_BLI_CANCEL)); 1725 ASSERT(!(flags & XFS_BLI_CANCEL));
1726 return 0; 1726 return 0;
1727 } 1727 }
1728 1728
1729 STATIC int 1729 STATIC int
1730 xlog_recover_do_buffer_pass2( 1730 xlog_recover_do_buffer_pass2(
1731 xlog_t *log, 1731 xlog_t *log,
1732 xfs_buf_log_format_t *buf_f) 1732 xfs_buf_log_format_t *buf_f)
1733 { 1733 {
1734 xfs_daddr_t blkno = 0; 1734 xfs_daddr_t blkno = 0;
1735 ushort flags = 0; 1735 ushort flags = 0;
1736 uint len = 0; 1736 uint len = 0;
1737 1737
1738 switch (buf_f->blf_type) { 1738 switch (buf_f->blf_type) {
1739 case XFS_LI_BUF: 1739 case XFS_LI_BUF:
1740 blkno = buf_f->blf_blkno; 1740 blkno = buf_f->blf_blkno;
1741 flags = buf_f->blf_flags; 1741 flags = buf_f->blf_flags;
1742 len = buf_f->blf_len; 1742 len = buf_f->blf_len;
1743 break; 1743 break;
1744 } 1744 }
1745 1745
1746 return xlog_check_buffer_cancelled(log, blkno, len, flags); 1746 return xlog_check_buffer_cancelled(log, blkno, len, flags);
1747 } 1747 }
1748 1748
1749 /* 1749 /*
1750 * Perform recovery for a buffer full of inodes. In these buffers, 1750 * Perform recovery for a buffer full of inodes. In these buffers,
1751 * the only data which should be recovered is that which corresponds 1751 * the only data which should be recovered is that which corresponds
1752 * to the di_next_unlinked pointers in the on disk inode structures. 1752 * to the di_next_unlinked pointers in the on disk inode structures.
1753 * The rest of the data for the inodes is always logged through the 1753 * The rest of the data for the inodes is always logged through the
1754 * inodes themselves rather than the inode buffer and is recovered 1754 * inodes themselves rather than the inode buffer and is recovered
1755 * in xlog_recover_do_inode_trans(). 1755 * in xlog_recover_do_inode_trans().
1756 * 1756 *
1757 * The only time when buffers full of inodes are fully recovered is 1757 * The only time when buffers full of inodes are fully recovered is
1758 * when the buffer is full of newly allocated inodes. In this case 1758 * when the buffer is full of newly allocated inodes. In this case
1759 * the buffer will not be marked as an inode buffer and so will be 1759 * the buffer will not be marked as an inode buffer and so will be
1760 * sent to xlog_recover_do_reg_buffer() below during recovery. 1760 * sent to xlog_recover_do_reg_buffer() below during recovery.
1761 */ 1761 */
1762 STATIC int 1762 STATIC int
1763 xlog_recover_do_inode_buffer( 1763 xlog_recover_do_inode_buffer(
1764 xfs_mount_t *mp, 1764 xfs_mount_t *mp,
1765 xlog_recover_item_t *item, 1765 xlog_recover_item_t *item,
1766 xfs_buf_t *bp, 1766 xfs_buf_t *bp,
1767 xfs_buf_log_format_t *buf_f) 1767 xfs_buf_log_format_t *buf_f)
1768 { 1768 {
1769 int i; 1769 int i;
1770 int item_index; 1770 int item_index;
1771 int bit; 1771 int bit;
1772 int nbits; 1772 int nbits;
1773 int reg_buf_offset; 1773 int reg_buf_offset;
1774 int reg_buf_bytes; 1774 int reg_buf_bytes;
1775 int next_unlinked_offset; 1775 int next_unlinked_offset;
1776 int inodes_per_buf; 1776 int inodes_per_buf;
1777 xfs_agino_t *logged_nextp; 1777 xfs_agino_t *logged_nextp;
1778 xfs_agino_t *buffer_nextp; 1778 xfs_agino_t *buffer_nextp;
1779 unsigned int *data_map = NULL; 1779 unsigned int *data_map = NULL;
1780 unsigned int map_size = 0; 1780 unsigned int map_size = 0;
1781 1781
1782 switch (buf_f->blf_type) { 1782 switch (buf_f->blf_type) {
1783 case XFS_LI_BUF: 1783 case XFS_LI_BUF:
1784 data_map = buf_f->blf_data_map; 1784 data_map = buf_f->blf_data_map;
1785 map_size = buf_f->blf_map_size; 1785 map_size = buf_f->blf_map_size;
1786 break; 1786 break;
1787 } 1787 }
1788 /* 1788 /*
1789 * Set the variables corresponding to the current region to 1789 * Set the variables corresponding to the current region to
1790 * 0 so that we'll initialize them on the first pass through 1790 * 0 so that we'll initialize them on the first pass through
1791 * the loop. 1791 * the loop.
1792 */ 1792 */
1793 reg_buf_offset = 0; 1793 reg_buf_offset = 0;
1794 reg_buf_bytes = 0; 1794 reg_buf_bytes = 0;
1795 bit = 0; 1795 bit = 0;
1796 nbits = 0; 1796 nbits = 0;
1797 item_index = 0; 1797 item_index = 0;
1798 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; 1798 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
1799 for (i = 0; i < inodes_per_buf; i++) { 1799 for (i = 0; i < inodes_per_buf; i++) {
1800 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 1800 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
1801 offsetof(xfs_dinode_t, di_next_unlinked); 1801 offsetof(xfs_dinode_t, di_next_unlinked);
1802 1802
1803 while (next_unlinked_offset >= 1803 while (next_unlinked_offset >=
1804 (reg_buf_offset + reg_buf_bytes)) { 1804 (reg_buf_offset + reg_buf_bytes)) {
1805 /* 1805 /*
1806 * The next di_next_unlinked field is beyond 1806 * The next di_next_unlinked field is beyond
1807 * the current logged region. Find the next 1807 * the current logged region. Find the next
1808 * logged region that contains or is beyond 1808 * logged region that contains or is beyond
1809 * the current di_next_unlinked field. 1809 * the current di_next_unlinked field.
1810 */ 1810 */
1811 bit += nbits; 1811 bit += nbits;
1812 bit = xfs_next_bit(data_map, map_size, bit); 1812 bit = xfs_next_bit(data_map, map_size, bit);
1813 1813
1814 /* 1814 /*
1815 * If there are no more logged regions in the 1815 * If there are no more logged regions in the
1816 * buffer, then we're done. 1816 * buffer, then we're done.
1817 */ 1817 */
1818 if (bit == -1) { 1818 if (bit == -1) {
1819 return 0; 1819 return 0;
1820 } 1820 }
1821 1821
1822 nbits = xfs_contig_bits(data_map, map_size, 1822 nbits = xfs_contig_bits(data_map, map_size,
1823 bit); 1823 bit);
1824 ASSERT(nbits > 0); 1824 ASSERT(nbits > 0);
1825 reg_buf_offset = bit << XFS_BLI_SHIFT; 1825 reg_buf_offset = bit << XFS_BLI_SHIFT;
1826 reg_buf_bytes = nbits << XFS_BLI_SHIFT; 1826 reg_buf_bytes = nbits << XFS_BLI_SHIFT;
1827 item_index++; 1827 item_index++;
1828 } 1828 }
1829 1829
1830 /* 1830 /*
1831 * If the current logged region starts after the current 1831 * If the current logged region starts after the current
1832 * di_next_unlinked field, then move on to the next 1832 * di_next_unlinked field, then move on to the next
1833 * di_next_unlinked field. 1833 * di_next_unlinked field.
1834 */ 1834 */
1835 if (next_unlinked_offset < reg_buf_offset) { 1835 if (next_unlinked_offset < reg_buf_offset) {
1836 continue; 1836 continue;
1837 } 1837 }
1838 1838
1839 ASSERT(item->ri_buf[item_index].i_addr != NULL); 1839 ASSERT(item->ri_buf[item_index].i_addr != NULL);
1840 ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0); 1840 ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0);
1841 ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); 1841 ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp));
1842 1842
1843 /* 1843 /*
1844 * The current logged region contains a copy of the 1844 * The current logged region contains a copy of the
1845 * current di_next_unlinked field. Extract its value 1845 * current di_next_unlinked field. Extract its value
1846 * and copy it to the buffer copy. 1846 * and copy it to the buffer copy.
1847 */ 1847 */
1848 logged_nextp = (xfs_agino_t *) 1848 logged_nextp = (xfs_agino_t *)
1849 ((char *)(item->ri_buf[item_index].i_addr) + 1849 ((char *)(item->ri_buf[item_index].i_addr) +
1850 (next_unlinked_offset - reg_buf_offset)); 1850 (next_unlinked_offset - reg_buf_offset));
1851 if (unlikely(*logged_nextp == 0)) { 1851 if (unlikely(*logged_nextp == 0)) {
1852 xfs_fs_cmn_err(CE_ALERT, mp, 1852 xfs_fs_cmn_err(CE_ALERT, mp,
1853 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", 1853 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field",
1854 item, bp); 1854 item, bp);
1855 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", 1855 XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
1856 XFS_ERRLEVEL_LOW, mp); 1856 XFS_ERRLEVEL_LOW, mp);
1857 return XFS_ERROR(EFSCORRUPTED); 1857 return XFS_ERROR(EFSCORRUPTED);
1858 } 1858 }
1859 1859
1860 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1860 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
1861 next_unlinked_offset); 1861 next_unlinked_offset);
1862 *buffer_nextp = *logged_nextp; 1862 *buffer_nextp = *logged_nextp;
1863 } 1863 }
1864 1864
1865 return 0; 1865 return 0;
1866 } 1866 }
1867 1867
1868 /* 1868 /*
1869 * Perform a 'normal' buffer recovery. Each logged region of the 1869 * Perform a 'normal' buffer recovery. Each logged region of the
1870 * buffer should be copied over the corresponding region in the 1870 * buffer should be copied over the corresponding region in the
1871 * given buffer. The bitmap in the buf log format structure indicates 1871 * given buffer. The bitmap in the buf log format structure indicates
1872 * where to place the logged data. 1872 * where to place the logged data.
1873 */ 1873 */
1874 /*ARGSUSED*/ 1874 /*ARGSUSED*/
1875 STATIC void 1875 STATIC void
1876 xlog_recover_do_reg_buffer( 1876 xlog_recover_do_reg_buffer(
1877 xfs_mount_t *mp,
1877 xlog_recover_item_t *item, 1878 xlog_recover_item_t *item,
1878 xfs_buf_t *bp, 1879 xfs_buf_t *bp,
1879 xfs_buf_log_format_t *buf_f) 1880 xfs_buf_log_format_t *buf_f)
1880 { 1881 {
1881 int i; 1882 int i;
1882 int bit; 1883 int bit;
1883 int nbits; 1884 int nbits;
1884 unsigned int *data_map = NULL; 1885 unsigned int *data_map = NULL;
1885 unsigned int map_size = 0; 1886 unsigned int map_size = 0;
1886 int error; 1887 int error;
1888 int stale_buf = 1;
1887 1889
1890 /*
1891 * Scan through the on-disk inode buffer and attempt to
1892 * determine if it has been written to since it was logged.
1893 *
1894 * - If any of the magic numbers are incorrect then the buffer is stale
1895 * - If any of the modes are non-zero then the buffer is not stale
1896 * - If all of the modes are zero and at least one of the generation
1897 * counts is non-zero then the buffer is stale
1898 *
1899 * If the end result is a stale buffer then the log buffer is replayed
1900 * otherwise it is skipped.
1901 *
1902 * This heuristic is not perfect. It can be improved by scanning the
1903 * entire inode chunk for evidence that any of the inode clusters have
1904 * been updated. To fix this problem completely we will need a major
1905 * architectural change to the logging system.
1906 */
1907 if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
1908 xfs_dinode_t *dip;
1909 int inodes_per_buf;
1910 int mode_count = 0;
1911 int gen_count = 0;
1912
1913 stale_buf = 0;
1914 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
1915 for (i = 0; i < inodes_per_buf; i++) {
1916 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
1917 i * mp->m_sb.sb_inodesize);
1918 if (be16_to_cpu(dip->di_core.di_magic) !=
1919 XFS_DINODE_MAGIC) {
1920 stale_buf = 1;
1921 break;
1922 }
1923 if (be16_to_cpu(dip->di_core.di_mode))
1924 mode_count++;
1925 if (be16_to_cpu(dip->di_core.di_gen))
1926 gen_count++;
1927 }
1928
1929 if (!mode_count && gen_count)
1930 stale_buf = 1;
1931 }
1932
1888 switch (buf_f->blf_type) { 1933 switch (buf_f->blf_type) {
1889 case XFS_LI_BUF: 1934 case XFS_LI_BUF:
1890 data_map = buf_f->blf_data_map; 1935 data_map = buf_f->blf_data_map;
1891 map_size = buf_f->blf_map_size; 1936 map_size = buf_f->blf_map_size;
1892 break; 1937 break;
1893 } 1938 }
1894 bit = 0; 1939 bit = 0;
1895 i = 1; /* 0 is the buf format structure */ 1940 i = 1; /* 0 is the buf format structure */
1896 while (1) { 1941 while (1) {
1897 bit = xfs_next_bit(data_map, map_size, bit); 1942 bit = xfs_next_bit(data_map, map_size, bit);
1898 if (bit == -1) 1943 if (bit == -1)
1899 break; 1944 break;
1900 nbits = xfs_contig_bits(data_map, map_size, bit); 1945 nbits = xfs_contig_bits(data_map, map_size, bit);
1901 ASSERT(nbits > 0); 1946 ASSERT(nbits > 0);
1902 ASSERT(item->ri_buf[i].i_addr != NULL); 1947 ASSERT(item->ri_buf[i].i_addr != NULL);
1903 ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); 1948 ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0);
1904 ASSERT(XFS_BUF_COUNT(bp) >= 1949 ASSERT(XFS_BUF_COUNT(bp) >=
1905 ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); 1950 ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT));
1906 1951
1907 /* 1952 /*
1908 * Do a sanity check if this is a dquot buffer. Just checking 1953 * Do a sanity check if this is a dquot buffer. Just checking
1909 * the first dquot in the buffer should do. XXXThis is 1954 * the first dquot in the buffer should do. XXXThis is
1910 * probably a good thing to do for other buf types also. 1955 * probably a good thing to do for other buf types also.
1911 */ 1956 */
1912 error = 0; 1957 error = 0;
1913 if (buf_f->blf_flags & 1958 if (buf_f->blf_flags &
1914 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 1959 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
1915 error = xfs_qm_dqcheck((xfs_disk_dquot_t *) 1960 error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
1916 item->ri_buf[i].i_addr, 1961 item->ri_buf[i].i_addr,
1917 -1, 0, XFS_QMOPT_DOWARN, 1962 -1, 0, XFS_QMOPT_DOWARN,
1918 "dquot_buf_recover"); 1963 "dquot_buf_recover");
1919 } 1964 }
1920 if (!error) 1965 if (!error && stale_buf)
1921 memcpy(xfs_buf_offset(bp, 1966 memcpy(xfs_buf_offset(bp,
1922 (uint)bit << XFS_BLI_SHIFT), /* dest */ 1967 (uint)bit << XFS_BLI_SHIFT), /* dest */
1923 item->ri_buf[i].i_addr, /* source */ 1968 item->ri_buf[i].i_addr, /* source */
1924 nbits<<XFS_BLI_SHIFT); /* length */ 1969 nbits<<XFS_BLI_SHIFT); /* length */
1925 i++; 1970 i++;
1926 bit += nbits; 1971 bit += nbits;
1927 } 1972 }
1928 1973
1929 /* Shouldn't be any more regions */ 1974 /* Shouldn't be any more regions */
1930 ASSERT(i == item->ri_total); 1975 ASSERT(i == item->ri_total);
1931 } 1976 }
1932 1977
1933 /* 1978 /*
1934 * Do some primitive error checking on ondisk dquot data structures. 1979 * Do some primitive error checking on ondisk dquot data structures.
1935 */ 1980 */
1936 int 1981 int
1937 xfs_qm_dqcheck( 1982 xfs_qm_dqcheck(
1938 xfs_disk_dquot_t *ddq, 1983 xfs_disk_dquot_t *ddq,
1939 xfs_dqid_t id, 1984 xfs_dqid_t id,
1940 uint type, /* used only when IO_dorepair is true */ 1985 uint type, /* used only when IO_dorepair is true */
1941 uint flags, 1986 uint flags,
1942 char *str) 1987 char *str)
1943 { 1988 {
1944 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; 1989 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
1945 int errs = 0; 1990 int errs = 0;
1946 1991
1947 /* 1992 /*
1948 * We can encounter an uninitialized dquot buffer for 2 reasons: 1993 * We can encounter an uninitialized dquot buffer for 2 reasons:
1949 * 1. If we crash while deleting the quotainode(s), and those blks got 1994 * 1. If we crash while deleting the quotainode(s), and those blks got
1950 * used for user data. This is because we take the path of regular 1995 * used for user data. This is because we take the path of regular
1951 * file deletion; however, the size field of quotainodes is never 1996 * file deletion; however, the size field of quotainodes is never
1952 * updated, so all the tricks that we play in itruncate_finish 1997 * updated, so all the tricks that we play in itruncate_finish
1953 * don't quite matter. 1998 * don't quite matter.
1954 * 1999 *
1955 * 2. We don't play the quota buffers when there's a quotaoff logitem. 2000 * 2. We don't play the quota buffers when there's a quotaoff logitem.
1956 * But the allocation will be replayed so we'll end up with an 2001 * But the allocation will be replayed so we'll end up with an
1957 * uninitialized quota block. 2002 * uninitialized quota block.
1958 * 2003 *
1959 * This is all fine; things are still consistent, and we haven't lost 2004 * This is all fine; things are still consistent, and we haven't lost
1960 * any quota information. Just don't complain about bad dquot blks. 2005 * any quota information. Just don't complain about bad dquot blks.
1961 */ 2006 */
1962 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { 2007 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
1963 if (flags & XFS_QMOPT_DOWARN) 2008 if (flags & XFS_QMOPT_DOWARN)
1964 cmn_err(CE_ALERT, 2009 cmn_err(CE_ALERT,
1965 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 2010 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
1966 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); 2011 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
1967 errs++; 2012 errs++;
1968 } 2013 }
1969 if (ddq->d_version != XFS_DQUOT_VERSION) { 2014 if (ddq->d_version != XFS_DQUOT_VERSION) {
1970 if (flags & XFS_QMOPT_DOWARN) 2015 if (flags & XFS_QMOPT_DOWARN)
1971 cmn_err(CE_ALERT, 2016 cmn_err(CE_ALERT,
1972 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", 2017 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
1973 str, id, ddq->d_version, XFS_DQUOT_VERSION); 2018 str, id, ddq->d_version, XFS_DQUOT_VERSION);
1974 errs++; 2019 errs++;
1975 } 2020 }
1976 2021
1977 if (ddq->d_flags != XFS_DQ_USER && 2022 if (ddq->d_flags != XFS_DQ_USER &&
1978 ddq->d_flags != XFS_DQ_PROJ && 2023 ddq->d_flags != XFS_DQ_PROJ &&
1979 ddq->d_flags != XFS_DQ_GROUP) { 2024 ddq->d_flags != XFS_DQ_GROUP) {
1980 if (flags & XFS_QMOPT_DOWARN) 2025 if (flags & XFS_QMOPT_DOWARN)
1981 cmn_err(CE_ALERT, 2026 cmn_err(CE_ALERT,
1982 "%s : XFS dquot ID 0x%x, unknown flags 0x%x", 2027 "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
1983 str, id, ddq->d_flags); 2028 str, id, ddq->d_flags);
1984 errs++; 2029 errs++;
1985 } 2030 }
1986 2031
1987 if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 2032 if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
1988 if (flags & XFS_QMOPT_DOWARN) 2033 if (flags & XFS_QMOPT_DOWARN)
1989 cmn_err(CE_ALERT, 2034 cmn_err(CE_ALERT,
1990 "%s : ondisk-dquot 0x%p, ID mismatch: " 2035 "%s : ondisk-dquot 0x%p, ID mismatch: "
1991 "0x%x expected, found id 0x%x", 2036 "0x%x expected, found id 0x%x",
1992 str, ddq, id, be32_to_cpu(ddq->d_id)); 2037 str, ddq, id, be32_to_cpu(ddq->d_id));
1993 errs++; 2038 errs++;
1994 } 2039 }
1995 2040
1996 if (!errs && ddq->d_id) { 2041 if (!errs && ddq->d_id) {
1997 if (ddq->d_blk_softlimit && 2042 if (ddq->d_blk_softlimit &&
1998 be64_to_cpu(ddq->d_bcount) >= 2043 be64_to_cpu(ddq->d_bcount) >=
1999 be64_to_cpu(ddq->d_blk_softlimit)) { 2044 be64_to_cpu(ddq->d_blk_softlimit)) {
2000 if (!ddq->d_btimer) { 2045 if (!ddq->d_btimer) {
2001 if (flags & XFS_QMOPT_DOWARN) 2046 if (flags & XFS_QMOPT_DOWARN)
2002 cmn_err(CE_ALERT, 2047 cmn_err(CE_ALERT,
2003 "%s : Dquot ID 0x%x (0x%p) " 2048 "%s : Dquot ID 0x%x (0x%p) "
2004 "BLK TIMER NOT STARTED", 2049 "BLK TIMER NOT STARTED",
2005 str, (int)be32_to_cpu(ddq->d_id), ddq); 2050 str, (int)be32_to_cpu(ddq->d_id), ddq);
2006 errs++; 2051 errs++;
2007 } 2052 }
2008 } 2053 }
2009 if (ddq->d_ino_softlimit && 2054 if (ddq->d_ino_softlimit &&
2010 be64_to_cpu(ddq->d_icount) >= 2055 be64_to_cpu(ddq->d_icount) >=
2011 be64_to_cpu(ddq->d_ino_softlimit)) { 2056 be64_to_cpu(ddq->d_ino_softlimit)) {
2012 if (!ddq->d_itimer) { 2057 if (!ddq->d_itimer) {
2013 if (flags & XFS_QMOPT_DOWARN) 2058 if (flags & XFS_QMOPT_DOWARN)
2014 cmn_err(CE_ALERT, 2059 cmn_err(CE_ALERT,
2015 "%s : Dquot ID 0x%x (0x%p) " 2060 "%s : Dquot ID 0x%x (0x%p) "
2016 "INODE TIMER NOT STARTED", 2061 "INODE TIMER NOT STARTED",
2017 str, (int)be32_to_cpu(ddq->d_id), ddq); 2062 str, (int)be32_to_cpu(ddq->d_id), ddq);
2018 errs++; 2063 errs++;
2019 } 2064 }
2020 } 2065 }
2021 if (ddq->d_rtb_softlimit && 2066 if (ddq->d_rtb_softlimit &&
2022 be64_to_cpu(ddq->d_rtbcount) >= 2067 be64_to_cpu(ddq->d_rtbcount) >=
2023 be64_to_cpu(ddq->d_rtb_softlimit)) { 2068 be64_to_cpu(ddq->d_rtb_softlimit)) {
2024 if (!ddq->d_rtbtimer) { 2069 if (!ddq->d_rtbtimer) {
2025 if (flags & XFS_QMOPT_DOWARN) 2070 if (flags & XFS_QMOPT_DOWARN)
2026 cmn_err(CE_ALERT, 2071 cmn_err(CE_ALERT,
2027 "%s : Dquot ID 0x%x (0x%p) " 2072 "%s : Dquot ID 0x%x (0x%p) "
2028 "RTBLK TIMER NOT STARTED", 2073 "RTBLK TIMER NOT STARTED",
2029 str, (int)be32_to_cpu(ddq->d_id), ddq); 2074 str, (int)be32_to_cpu(ddq->d_id), ddq);
2030 errs++; 2075 errs++;
2031 } 2076 }
2032 } 2077 }
2033 } 2078 }
2034 2079
2035 if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) 2080 if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
2036 return errs; 2081 return errs;
2037 2082
2038 if (flags & XFS_QMOPT_DOWARN) 2083 if (flags & XFS_QMOPT_DOWARN)
2039 cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); 2084 cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id);
2040 2085
2041 /* 2086 /*
2042 * Typically, a repair is only requested by quotacheck. 2087 * Typically, a repair is only requested by quotacheck.
2043 */ 2088 */
2044 ASSERT(id != -1); 2089 ASSERT(id != -1);
2045 ASSERT(flags & XFS_QMOPT_DQREPAIR); 2090 ASSERT(flags & XFS_QMOPT_DQREPAIR);
2046 memset(d, 0, sizeof(xfs_dqblk_t)); 2091 memset(d, 0, sizeof(xfs_dqblk_t));
2047 2092
2048 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 2093 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
2049 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 2094 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
2050 d->dd_diskdq.d_flags = type; 2095 d->dd_diskdq.d_flags = type;
2051 d->dd_diskdq.d_id = cpu_to_be32(id); 2096 d->dd_diskdq.d_id = cpu_to_be32(id);
2052 2097
2053 return errs; 2098 return errs;
2054 } 2099 }
2055 2100
2056 /* 2101 /*
2057 * Perform a dquot buffer recovery. 2102 * Perform a dquot buffer recovery.
2058 * Simple algorithm: if we have found a QUOTAOFF logitem of the same type 2103 * Simple algorithm: if we have found a QUOTAOFF logitem of the same type
2059 * (ie. USR or GRP), then just toss this buffer away; don't recover it. 2104 * (ie. USR or GRP), then just toss this buffer away; don't recover it.
2060 * Else, treat it as a regular buffer and do recovery. 2105 * Else, treat it as a regular buffer and do recovery.
2061 */ 2106 */
2062 STATIC void 2107 STATIC void
2063 xlog_recover_do_dquot_buffer( 2108 xlog_recover_do_dquot_buffer(
2064 xfs_mount_t *mp, 2109 xfs_mount_t *mp,
2065 xlog_t *log, 2110 xlog_t *log,
2066 xlog_recover_item_t *item, 2111 xlog_recover_item_t *item,
2067 xfs_buf_t *bp, 2112 xfs_buf_t *bp,
2068 xfs_buf_log_format_t *buf_f) 2113 xfs_buf_log_format_t *buf_f)
2069 { 2114 {
2070 uint type; 2115 uint type;
2071 2116
2072 /* 2117 /*
2073 * Filesystems are required to send in quota flags at mount time. 2118 * Filesystems are required to send in quota flags at mount time.
2074 */ 2119 */
2075 if (mp->m_qflags == 0) { 2120 if (mp->m_qflags == 0) {
2076 return; 2121 return;
2077 } 2122 }
2078 2123
2079 type = 0; 2124 type = 0;
2080 if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) 2125 if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF)
2081 type |= XFS_DQ_USER; 2126 type |= XFS_DQ_USER;
2082 if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) 2127 if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF)
2083 type |= XFS_DQ_PROJ; 2128 type |= XFS_DQ_PROJ;
2084 if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) 2129 if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF)
2085 type |= XFS_DQ_GROUP; 2130 type |= XFS_DQ_GROUP;
2086 /* 2131 /*
2087 * This type of quotas was turned off, so ignore this buffer 2132 * This type of quotas was turned off, so ignore this buffer
2088 */ 2133 */
2089 if (log->l_quotaoffs_flag & type) 2134 if (log->l_quotaoffs_flag & type)
2090 return; 2135 return;
2091 2136
2092 xlog_recover_do_reg_buffer(item, bp, buf_f); 2137 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2093 } 2138 }
2094 2139
2095 /* 2140 /*
2096 * This routine replays a modification made to a buffer at runtime. 2141 * This routine replays a modification made to a buffer at runtime.
2097 * There are actually two types of buffer, regular and inode, which 2142 * There are actually two types of buffer, regular and inode, which
2098 * are handled differently. Inode buffers are handled differently 2143 * are handled differently. Inode buffers are handled differently
2099 * in that we only recover a specific set of data from them, namely 2144 * in that we only recover a specific set of data from them, namely
2100 * the inode di_next_unlinked fields. This is because all other inode 2145 * the inode di_next_unlinked fields. This is because all other inode
2101 * data is actually logged via inode records and any data we replay 2146 * data is actually logged via inode records and any data we replay
2102 * here which overlaps that may be stale. 2147 * here which overlaps that may be stale.
2103 * 2148 *
2104 * When meta-data buffers are freed at run time we log a buffer item 2149 * When meta-data buffers are freed at run time we log a buffer item
2105 * with the XFS_BLI_CANCEL bit set to indicate that previous copies 2150 * with the XFS_BLI_CANCEL bit set to indicate that previous copies
2106 * of the buffer in the log should not be replayed at recovery time. 2151 * of the buffer in the log should not be replayed at recovery time.
2107 * This is so that if the blocks covered by the buffer are reused for 2152 * This is so that if the blocks covered by the buffer are reused for
2108 * file data before we crash we don't end up replaying old, freed 2153 * file data before we crash we don't end up replaying old, freed
2109 * meta-data into a user's file. 2154 * meta-data into a user's file.
2110 * 2155 *
2111 * To handle the cancellation of buffer log items, we make two passes 2156 * To handle the cancellation of buffer log items, we make two passes
2112 * over the log during recovery. During the first we build a table of 2157 * over the log during recovery. During the first we build a table of
2113 * those buffers which have been cancelled, and during the second we 2158 * those buffers which have been cancelled, and during the second we
2114 * only replay those buffers which do not have corresponding cancel 2159 * only replay those buffers which do not have corresponding cancel
2115 * records in the table. See xlog_recover_do_buffer_pass[1,2] above 2160 * records in the table. See xlog_recover_do_buffer_pass[1,2] above
2116 * for more details on the implementation of the table of cancel records. 2161 * for more details on the implementation of the table of cancel records.
2117 */ 2162 */
2118 STATIC int 2163 STATIC int
2119 xlog_recover_do_buffer_trans( 2164 xlog_recover_do_buffer_trans(
2120 xlog_t *log, 2165 xlog_t *log,
2121 xlog_recover_item_t *item, 2166 xlog_recover_item_t *item,
2122 int pass) 2167 int pass)
2123 { 2168 {
2124 xfs_buf_log_format_t *buf_f; 2169 xfs_buf_log_format_t *buf_f;
2125 xfs_mount_t *mp; 2170 xfs_mount_t *mp;
2126 xfs_buf_t *bp; 2171 xfs_buf_t *bp;
2127 int error; 2172 int error;
2128 int cancel; 2173 int cancel;
2129 xfs_daddr_t blkno; 2174 xfs_daddr_t blkno;
2130 int len; 2175 int len;
2131 ushort flags; 2176 ushort flags;
2132 2177
2133 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; 2178 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
2134 2179
2135 if (pass == XLOG_RECOVER_PASS1) { 2180 if (pass == XLOG_RECOVER_PASS1) {
2136 /* 2181 /*
2137 * In this pass we're only looking for buf items 2182 * In this pass we're only looking for buf items
2138 * with the XFS_BLI_CANCEL bit set. 2183 * with the XFS_BLI_CANCEL bit set.
2139 */ 2184 */
2140 xlog_recover_do_buffer_pass1(log, buf_f); 2185 xlog_recover_do_buffer_pass1(log, buf_f);
2141 return 0; 2186 return 0;
2142 } else { 2187 } else {
2143 /* 2188 /*
2144 * In this pass we want to recover all the buffers 2189 * In this pass we want to recover all the buffers
2145 * which have not been cancelled and are not 2190 * which have not been cancelled and are not
2146 * cancellation buffers themselves. The routine 2191 * cancellation buffers themselves. The routine
2147 * we call here will tell us whether or not to 2192 * we call here will tell us whether or not to
2148 * continue with the replay of this buffer. 2193 * continue with the replay of this buffer.
2149 */ 2194 */
2150 cancel = xlog_recover_do_buffer_pass2(log, buf_f); 2195 cancel = xlog_recover_do_buffer_pass2(log, buf_f);
2151 if (cancel) { 2196 if (cancel) {
2152 return 0; 2197 return 0;
2153 } 2198 }
2154 } 2199 }
2155 switch (buf_f->blf_type) { 2200 switch (buf_f->blf_type) {
2156 case XFS_LI_BUF: 2201 case XFS_LI_BUF:
2157 blkno = buf_f->blf_blkno; 2202 blkno = buf_f->blf_blkno;
2158 len = buf_f->blf_len; 2203 len = buf_f->blf_len;
2159 flags = buf_f->blf_flags; 2204 flags = buf_f->blf_flags;
2160 break; 2205 break;
2161 default: 2206 default:
2162 xfs_fs_cmn_err(CE_ALERT, log->l_mp, 2207 xfs_fs_cmn_err(CE_ALERT, log->l_mp,
2163 "xfs_log_recover: unknown buffer type 0x%x, logdev %s", 2208 "xfs_log_recover: unknown buffer type 0x%x, logdev %s",
2164 buf_f->blf_type, log->l_mp->m_logname ? 2209 buf_f->blf_type, log->l_mp->m_logname ?
2165 log->l_mp->m_logname : "internal"); 2210 log->l_mp->m_logname : "internal");
2166 XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", 2211 XFS_ERROR_REPORT("xlog_recover_do_buffer_trans",
2167 XFS_ERRLEVEL_LOW, log->l_mp); 2212 XFS_ERRLEVEL_LOW, log->l_mp);
2168 return XFS_ERROR(EFSCORRUPTED); 2213 return XFS_ERROR(EFSCORRUPTED);
2169 } 2214 }
2170 2215
2171 mp = log->l_mp; 2216 mp = log->l_mp;
2172 if (flags & XFS_BLI_INODE_BUF) { 2217 if (flags & XFS_BLI_INODE_BUF) {
2173 bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, 2218 bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len,
2174 XFS_BUF_LOCK); 2219 XFS_BUF_LOCK);
2175 } else { 2220 } else {
2176 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); 2221 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0);
2177 } 2222 }
2178 if (XFS_BUF_ISERROR(bp)) { 2223 if (XFS_BUF_ISERROR(bp)) {
2179 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, 2224 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
2180 bp, blkno); 2225 bp, blkno);
2181 error = XFS_BUF_GETERROR(bp); 2226 error = XFS_BUF_GETERROR(bp);
2182 xfs_buf_relse(bp); 2227 xfs_buf_relse(bp);
2183 return error; 2228 return error;
2184 } 2229 }
2185 2230
2186 error = 0; 2231 error = 0;
2187 if (flags & XFS_BLI_INODE_BUF) { 2232 if (flags & XFS_BLI_INODE_BUF) {
2188 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2233 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
2189 } else if (flags & 2234 } else if (flags &
2190 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 2235 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
2191 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2236 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2192 } else { 2237 } else {
2193 xlog_recover_do_reg_buffer(item, bp, buf_f); 2238 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2194 } 2239 }
2195 if (error) 2240 if (error)
2196 return XFS_ERROR(error); 2241 return XFS_ERROR(error);
2197 2242
2198 /* 2243 /*
2199 * Perform delayed write on the buffer. Asynchronous writes will be 2244 * Perform delayed write on the buffer. Asynchronous writes will be
2200 * slower when taking into account all the buffers to be flushed. 2245 * slower when taking into account all the buffers to be flushed.
2201 * 2246 *
2202 * Also make sure that only inode buffers with good sizes stay in 2247 * Also make sure that only inode buffers with good sizes stay in
2203 * the buffer cache. The kernel moves inodes in buffers of 1 block 2248 * the buffer cache. The kernel moves inodes in buffers of 1 block
2204 * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode 2249 * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode
2205 * buffers in the log can be a different size if the log was generated 2250 * buffers in the log can be a different size if the log was generated
2206 * by an older kernel using unclustered inode buffers or a newer kernel 2251 * by an older kernel using unclustered inode buffers or a newer kernel
2207 * running with a different inode cluster size. Regardless, if the 2252 * running with a different inode cluster size. Regardless, if the
2208 * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) 2253 * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE)
2209 * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep 2254 * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep
2210 * the buffer out of the buffer cache so that the buffer won't 2255 * the buffer out of the buffer cache so that the buffer won't
2211 * overlap with future reads of those inodes. 2256 * overlap with future reads of those inodes.
2212 */ 2257 */
2213 if (XFS_DINODE_MAGIC == 2258 if (XFS_DINODE_MAGIC ==
2214 INT_GET(*((__uint16_t *)(xfs_buf_offset(bp, 0))), ARCH_CONVERT) && 2259 INT_GET(*((__uint16_t *)(xfs_buf_offset(bp, 0))), ARCH_CONVERT) &&
2215 (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, 2260 (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize,
2216 (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { 2261 (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) {
2217 XFS_BUF_STALE(bp); 2262 XFS_BUF_STALE(bp);
2218 error = xfs_bwrite(mp, bp); 2263 error = xfs_bwrite(mp, bp);
2219 } else { 2264 } else {
2220 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2265 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
2221 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2266 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
2222 XFS_BUF_SET_FSPRIVATE(bp, mp); 2267 XFS_BUF_SET_FSPRIVATE(bp, mp);
2223 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2268 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2224 xfs_bdwrite(mp, bp); 2269 xfs_bdwrite(mp, bp);
2225 } 2270 }
2226 2271
2227 return (error); 2272 return (error);
2228 } 2273 }
2229 2274
2230 STATIC int 2275 STATIC int
2231 xlog_recover_do_inode_trans( 2276 xlog_recover_do_inode_trans(
2232 xlog_t *log, 2277 xlog_t *log,
2233 xlog_recover_item_t *item, 2278 xlog_recover_item_t *item,
2234 int pass) 2279 int pass)
2235 { 2280 {
2236 xfs_inode_log_format_t *in_f; 2281 xfs_inode_log_format_t *in_f;
2237 xfs_mount_t *mp; 2282 xfs_mount_t *mp;
2238 xfs_buf_t *bp; 2283 xfs_buf_t *bp;
2239 xfs_imap_t imap; 2284 xfs_imap_t imap;
2240 xfs_dinode_t *dip; 2285 xfs_dinode_t *dip;
2241 xfs_ino_t ino; 2286 xfs_ino_t ino;
2242 int len; 2287 int len;
2243 xfs_caddr_t src; 2288 xfs_caddr_t src;
2244 xfs_caddr_t dest; 2289 xfs_caddr_t dest;
2245 int error; 2290 int error;
2246 int attr_index; 2291 int attr_index;
2247 uint fields; 2292 uint fields;
2248 xfs_dinode_core_t *dicp; 2293 xfs_dinode_core_t *dicp;
2249 int need_free = 0; 2294 int need_free = 0;
2250 2295
2251 if (pass == XLOG_RECOVER_PASS1) { 2296 if (pass == XLOG_RECOVER_PASS1) {
2252 return 0; 2297 return 0;
2253 } 2298 }
2254 2299
2255 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2300 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
2256 in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; 2301 in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
2257 } else { 2302 } else {
2258 in_f = (xfs_inode_log_format_t *)kmem_alloc( 2303 in_f = (xfs_inode_log_format_t *)kmem_alloc(
2259 sizeof(xfs_inode_log_format_t), KM_SLEEP); 2304 sizeof(xfs_inode_log_format_t), KM_SLEEP);
2260 need_free = 1; 2305 need_free = 1;
2261 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 2306 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
2262 if (error) 2307 if (error)
2263 goto error; 2308 goto error;
2264 } 2309 }
2265 ino = in_f->ilf_ino; 2310 ino = in_f->ilf_ino;
2266 mp = log->l_mp; 2311 mp = log->l_mp;
2267 if (ITEM_TYPE(item) == XFS_LI_INODE) { 2312 if (ITEM_TYPE(item) == XFS_LI_INODE) {
2268 imap.im_blkno = (xfs_daddr_t)in_f->ilf_blkno; 2313 imap.im_blkno = (xfs_daddr_t)in_f->ilf_blkno;
2269 imap.im_len = in_f->ilf_len; 2314 imap.im_len = in_f->ilf_len;
2270 imap.im_boffset = in_f->ilf_boffset; 2315 imap.im_boffset = in_f->ilf_boffset;
2271 } else { 2316 } else {
2272 /* 2317 /*
2273 * It's an old inode format record. We don't know where 2318 * It's an old inode format record. We don't know where
2274 * its cluster is located on disk, and we can't allow 2319 * its cluster is located on disk, and we can't allow
2275 * xfs_imap() to figure it out because the inode btrees 2320 * xfs_imap() to figure it out because the inode btrees
2276 * are not ready to be used. Therefore do not pass the 2321 * are not ready to be used. Therefore do not pass the
2277 * XFS_IMAP_LOOKUP flag to xfs_imap(). This will give 2322 * XFS_IMAP_LOOKUP flag to xfs_imap(). This will give
2278 * us only the single block in which the inode lives 2323 * us only the single block in which the inode lives
2279 * rather than its cluster, so we must make sure to 2324 * rather than its cluster, so we must make sure to
2280 * invalidate the buffer when we write it out below. 2325 * invalidate the buffer when we write it out below.
2281 */ 2326 */
2282 imap.im_blkno = 0; 2327 imap.im_blkno = 0;
2283 xfs_imap(log->l_mp, NULL, ino, &imap, 0); 2328 xfs_imap(log->l_mp, NULL, ino, &imap, 0);
2284 } 2329 }
2285 2330
2286 /* 2331 /*
2287 * Inode buffers can be freed, look out for it, 2332 * Inode buffers can be freed, look out for it,
2288 * and do not replay the inode. 2333 * and do not replay the inode.
2289 */ 2334 */
2290 if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) { 2335 if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) {
2291 error = 0; 2336 error = 0;
2292 goto error; 2337 goto error;
2293 } 2338 }
2294 2339
2295 bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len, 2340 bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len,
2296 XFS_BUF_LOCK); 2341 XFS_BUF_LOCK);
2297 if (XFS_BUF_ISERROR(bp)) { 2342 if (XFS_BUF_ISERROR(bp)) {
2298 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2343 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
2299 bp, imap.im_blkno); 2344 bp, imap.im_blkno);
2300 error = XFS_BUF_GETERROR(bp); 2345 error = XFS_BUF_GETERROR(bp);
2301 xfs_buf_relse(bp); 2346 xfs_buf_relse(bp);
2302 goto error; 2347 goto error;
2303 } 2348 }
2304 error = 0; 2349 error = 0;
2305 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 2350 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
2306 dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 2351 dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
2307 2352
2308 /* 2353 /*
2309 * Make sure the place we're flushing out to really looks 2354 * Make sure the place we're flushing out to really looks
2310 * like an inode! 2355 * like an inode!
2311 */ 2356 */
2312 if (unlikely(INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC)) { 2357 if (unlikely(INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC)) {
2313 xfs_buf_relse(bp); 2358 xfs_buf_relse(bp);
2314 xfs_fs_cmn_err(CE_ALERT, mp, 2359 xfs_fs_cmn_err(CE_ALERT, mp,
2315 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", 2360 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
2316 dip, bp, ino); 2361 dip, bp, ino);
2317 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", 2362 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)",
2318 XFS_ERRLEVEL_LOW, mp); 2363 XFS_ERRLEVEL_LOW, mp);
2319 error = EFSCORRUPTED; 2364 error = EFSCORRUPTED;
2320 goto error; 2365 goto error;
2321 } 2366 }
2322 dicp = (xfs_dinode_core_t*)(item->ri_buf[1].i_addr); 2367 dicp = (xfs_dinode_core_t*)(item->ri_buf[1].i_addr);
2323 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2368 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
2324 xfs_buf_relse(bp); 2369 xfs_buf_relse(bp);
2325 xfs_fs_cmn_err(CE_ALERT, mp, 2370 xfs_fs_cmn_err(CE_ALERT, mp,
2326 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", 2371 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld",
2327 item, ino); 2372 item, ino);
2328 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", 2373 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)",
2329 XFS_ERRLEVEL_LOW, mp); 2374 XFS_ERRLEVEL_LOW, mp);
2330 error = EFSCORRUPTED; 2375 error = EFSCORRUPTED;
2331 goto error; 2376 goto error;
2332 } 2377 }
2333 2378
2334 /* Skip replay when the on disk inode is newer than the log one */ 2379 /* Skip replay when the on disk inode is newer than the log one */
2335 if (dicp->di_flushiter < 2380 if (dicp->di_flushiter <
2336 INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT)) { 2381 INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT)) {
2337 /* 2382 /*
2338 * Deal with the wrap case, DI_MAX_FLUSH is less 2383 * Deal with the wrap case, DI_MAX_FLUSH is less
2339 * than smaller numbers 2384 * than smaller numbers
2340 */ 2385 */
2341 if ((INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT) 2386 if ((INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT)
2342 == DI_MAX_FLUSH) && 2387 == DI_MAX_FLUSH) &&
2343 (dicp->di_flushiter < (DI_MAX_FLUSH>>1))) { 2388 (dicp->di_flushiter < (DI_MAX_FLUSH>>1))) {
2344 /* do nothing */ 2389 /* do nothing */
2345 } else { 2390 } else {
2346 xfs_buf_relse(bp); 2391 xfs_buf_relse(bp);
2347 error = 0; 2392 error = 0;
2348 goto error; 2393 goto error;
2349 } 2394 }
2350 } 2395 }
2351 /* Take the opportunity to reset the flush iteration count */ 2396 /* Take the opportunity to reset the flush iteration count */
2352 dicp->di_flushiter = 0; 2397 dicp->di_flushiter = 0;
2353 2398
2354 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { 2399 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
2355 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2400 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2356 (dicp->di_format != XFS_DINODE_FMT_BTREE)) { 2401 (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
2357 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", 2402 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)",
2358 XFS_ERRLEVEL_LOW, mp, dicp); 2403 XFS_ERRLEVEL_LOW, mp, dicp);
2359 xfs_buf_relse(bp); 2404 xfs_buf_relse(bp);
2360 xfs_fs_cmn_err(CE_ALERT, mp, 2405 xfs_fs_cmn_err(CE_ALERT, mp,
2361 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2406 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2362 item, dip, bp, ino); 2407 item, dip, bp, ino);
2363 error = EFSCORRUPTED; 2408 error = EFSCORRUPTED;
2364 goto error; 2409 goto error;
2365 } 2410 }
2366 } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { 2411 } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
2367 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2412 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2368 (dicp->di_format != XFS_DINODE_FMT_BTREE) && 2413 (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
2369 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { 2414 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
2370 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", 2415 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)",
2371 XFS_ERRLEVEL_LOW, mp, dicp); 2416 XFS_ERRLEVEL_LOW, mp, dicp);
2372 xfs_buf_relse(bp); 2417 xfs_buf_relse(bp);
2373 xfs_fs_cmn_err(CE_ALERT, mp, 2418 xfs_fs_cmn_err(CE_ALERT, mp,
2374 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2419 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2375 item, dip, bp, ino); 2420 item, dip, bp, ino);
2376 error = EFSCORRUPTED; 2421 error = EFSCORRUPTED;
2377 goto error; 2422 goto error;
2378 } 2423 }
2379 } 2424 }
2380 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ 2425 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
2381 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", 2426 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)",
2382 XFS_ERRLEVEL_LOW, mp, dicp); 2427 XFS_ERRLEVEL_LOW, mp, dicp);
2383 xfs_buf_relse(bp); 2428 xfs_buf_relse(bp);
2384 xfs_fs_cmn_err(CE_ALERT, mp, 2429 xfs_fs_cmn_err(CE_ALERT, mp,
2385 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 2430 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
2386 item, dip, bp, ino, 2431 item, dip, bp, ino,
2387 dicp->di_nextents + dicp->di_anextents, 2432 dicp->di_nextents + dicp->di_anextents,
2388 dicp->di_nblocks); 2433 dicp->di_nblocks);
2389 error = EFSCORRUPTED; 2434 error = EFSCORRUPTED;
2390 goto error; 2435 goto error;
2391 } 2436 }
2392 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { 2437 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
2393 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", 2438 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)",
2394 XFS_ERRLEVEL_LOW, mp, dicp); 2439 XFS_ERRLEVEL_LOW, mp, dicp);
2395 xfs_buf_relse(bp); 2440 xfs_buf_relse(bp);
2396 xfs_fs_cmn_err(CE_ALERT, mp, 2441 xfs_fs_cmn_err(CE_ALERT, mp,
2397 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", 2442 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
2398 item, dip, bp, ino, dicp->di_forkoff); 2443 item, dip, bp, ino, dicp->di_forkoff);
2399 error = EFSCORRUPTED; 2444 error = EFSCORRUPTED;
2400 goto error; 2445 goto error;
2401 } 2446 }
2402 if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) { 2447 if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) {
2403 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", 2448 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
2404 XFS_ERRLEVEL_LOW, mp, dicp); 2449 XFS_ERRLEVEL_LOW, mp, dicp);
2405 xfs_buf_relse(bp); 2450 xfs_buf_relse(bp);
2406 xfs_fs_cmn_err(CE_ALERT, mp, 2451 xfs_fs_cmn_err(CE_ALERT, mp,
2407 "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", 2452 "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p",
2408 item->ri_buf[1].i_len, item); 2453 item->ri_buf[1].i_len, item);
2409 error = EFSCORRUPTED; 2454 error = EFSCORRUPTED;
2410 goto error; 2455 goto error;
2411 } 2456 }
2412 2457
2413 /* The core is in in-core format */ 2458 /* The core is in in-core format */
2414 xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, 2459 xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core,
2415 (xfs_dinode_core_t*)item->ri_buf[1].i_addr, -1); 2460 (xfs_dinode_core_t*)item->ri_buf[1].i_addr, -1);
2416 2461
2417 /* the rest is in on-disk format */ 2462 /* the rest is in on-disk format */
2418 if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) { 2463 if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) {
2419 memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t), 2464 memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t),
2420 item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t), 2465 item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t),
2421 item->ri_buf[1].i_len - sizeof(xfs_dinode_core_t)); 2466 item->ri_buf[1].i_len - sizeof(xfs_dinode_core_t));
2422 } 2467 }
2423 2468
2424 fields = in_f->ilf_fields; 2469 fields = in_f->ilf_fields;
2425 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { 2470 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) {
2426 case XFS_ILOG_DEV: 2471 case XFS_ILOG_DEV:
2427 INT_SET(dip->di_u.di_dev, ARCH_CONVERT, in_f->ilf_u.ilfu_rdev); 2472 INT_SET(dip->di_u.di_dev, ARCH_CONVERT, in_f->ilf_u.ilfu_rdev);
2428 2473
2429 break; 2474 break;
2430 case XFS_ILOG_UUID: 2475 case XFS_ILOG_UUID:
2431 dip->di_u.di_muuid = in_f->ilf_u.ilfu_uuid; 2476 dip->di_u.di_muuid = in_f->ilf_u.ilfu_uuid;
2432 break; 2477 break;
2433 } 2478 }
2434 2479
2435 if (in_f->ilf_size == 2) 2480 if (in_f->ilf_size == 2)
2436 goto write_inode_buffer; 2481 goto write_inode_buffer;
2437 len = item->ri_buf[2].i_len; 2482 len = item->ri_buf[2].i_len;
2438 src = item->ri_buf[2].i_addr; 2483 src = item->ri_buf[2].i_addr;
2439 ASSERT(in_f->ilf_size <= 4); 2484 ASSERT(in_f->ilf_size <= 4);
2440 ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); 2485 ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
2441 ASSERT(!(fields & XFS_ILOG_DFORK) || 2486 ASSERT(!(fields & XFS_ILOG_DFORK) ||
2442 (len == in_f->ilf_dsize)); 2487 (len == in_f->ilf_dsize));
2443 2488
2444 switch (fields & XFS_ILOG_DFORK) { 2489 switch (fields & XFS_ILOG_DFORK) {
2445 case XFS_ILOG_DDATA: 2490 case XFS_ILOG_DDATA:
2446 case XFS_ILOG_DEXT: 2491 case XFS_ILOG_DEXT:
2447 memcpy(&dip->di_u, src, len); 2492 memcpy(&dip->di_u, src, len);
2448 break; 2493 break;
2449 2494
2450 case XFS_ILOG_DBROOT: 2495 case XFS_ILOG_DBROOT:
2451 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, 2496 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len,
2452 &(dip->di_u.di_bmbt), 2497 &(dip->di_u.di_bmbt),
2453 XFS_DFORK_DSIZE(dip, mp)); 2498 XFS_DFORK_DSIZE(dip, mp));
2454 break; 2499 break;
2455 2500
2456 default: 2501 default:
2457 /* 2502 /*
2458 * There are no data fork flags set. 2503 * There are no data fork flags set.
2459 */ 2504 */
2460 ASSERT((fields & XFS_ILOG_DFORK) == 0); 2505 ASSERT((fields & XFS_ILOG_DFORK) == 0);
2461 break; 2506 break;
2462 } 2507 }
2463 2508
2464 /* 2509 /*
2465 * If we logged any attribute data, recover it. There may or 2510 * If we logged any attribute data, recover it. There may or
2466 * may not have been any other non-core data logged in this 2511 * may not have been any other non-core data logged in this
2467 * transaction. 2512 * transaction.
2468 */ 2513 */
2469 if (in_f->ilf_fields & XFS_ILOG_AFORK) { 2514 if (in_f->ilf_fields & XFS_ILOG_AFORK) {
2470 if (in_f->ilf_fields & XFS_ILOG_DFORK) { 2515 if (in_f->ilf_fields & XFS_ILOG_DFORK) {
2471 attr_index = 3; 2516 attr_index = 3;
2472 } else { 2517 } else {
2473 attr_index = 2; 2518 attr_index = 2;
2474 } 2519 }
2475 len = item->ri_buf[attr_index].i_len; 2520 len = item->ri_buf[attr_index].i_len;
2476 src = item->ri_buf[attr_index].i_addr; 2521 src = item->ri_buf[attr_index].i_addr;
2477 ASSERT(len == in_f->ilf_asize); 2522 ASSERT(len == in_f->ilf_asize);
2478 2523
2479 switch (in_f->ilf_fields & XFS_ILOG_AFORK) { 2524 switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
2480 case XFS_ILOG_ADATA: 2525 case XFS_ILOG_ADATA:
2481 case XFS_ILOG_AEXT: 2526 case XFS_ILOG_AEXT:
2482 dest = XFS_DFORK_APTR(dip); 2527 dest = XFS_DFORK_APTR(dip);
2483 ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); 2528 ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
2484 memcpy(dest, src, len); 2529 memcpy(dest, src, len);
2485 break; 2530 break;
2486 2531
2487 case XFS_ILOG_ABROOT: 2532 case XFS_ILOG_ABROOT:
2488 dest = XFS_DFORK_APTR(dip); 2533 dest = XFS_DFORK_APTR(dip);
2489 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, 2534 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len,
2490 (xfs_bmdr_block_t*)dest, 2535 (xfs_bmdr_block_t*)dest,
2491 XFS_DFORK_ASIZE(dip, mp)); 2536 XFS_DFORK_ASIZE(dip, mp));
2492 break; 2537 break;
2493 2538
2494 default: 2539 default:
2495 xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); 2540 xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag");
2496 ASSERT(0); 2541 ASSERT(0);
2497 xfs_buf_relse(bp); 2542 xfs_buf_relse(bp);
2498 error = EIO; 2543 error = EIO;
2499 goto error; 2544 goto error;
2500 } 2545 }
2501 } 2546 }
2502 2547
2503 write_inode_buffer: 2548 write_inode_buffer:
2504 if (ITEM_TYPE(item) == XFS_LI_INODE) { 2549 if (ITEM_TYPE(item) == XFS_LI_INODE) {
2505 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2550 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
2506 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2551 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
2507 XFS_BUF_SET_FSPRIVATE(bp, mp); 2552 XFS_BUF_SET_FSPRIVATE(bp, mp);
2508 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2553 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2509 xfs_bdwrite(mp, bp); 2554 xfs_bdwrite(mp, bp);
2510 } else { 2555 } else {
2511 XFS_BUF_STALE(bp); 2556 XFS_BUF_STALE(bp);
2512 error = xfs_bwrite(mp, bp); 2557 error = xfs_bwrite(mp, bp);
2513 } 2558 }
2514 2559
2515 error: 2560 error:
2516 if (need_free) 2561 if (need_free)
2517 kmem_free(in_f, sizeof(*in_f)); 2562 kmem_free(in_f, sizeof(*in_f));
2518 return XFS_ERROR(error); 2563 return XFS_ERROR(error);
2519 } 2564 }
2520 2565
2521 /* 2566 /*
2522 * Recover QUOTAOFF records. We simply make a note of it in the xlog_t 2567 * Recover QUOTAOFF records. We simply make a note of it in the xlog_t
2523 * structure, so that we know not to do any dquot item or dquot buffer recovery, 2568 * structure, so that we know not to do any dquot item or dquot buffer recovery,
2524 * of that type. 2569 * of that type.
2525 */ 2570 */
2526 STATIC int 2571 STATIC int
2527 xlog_recover_do_quotaoff_trans( 2572 xlog_recover_do_quotaoff_trans(
2528 xlog_t *log, 2573 xlog_t *log,
2529 xlog_recover_item_t *item, 2574 xlog_recover_item_t *item,
2530 int pass) 2575 int pass)
2531 { 2576 {
2532 xfs_qoff_logformat_t *qoff_f; 2577 xfs_qoff_logformat_t *qoff_f;
2533 2578
2534 if (pass == XLOG_RECOVER_PASS2) { 2579 if (pass == XLOG_RECOVER_PASS2) {
2535 return (0); 2580 return (0);
2536 } 2581 }
2537 2582
2538 qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; 2583 qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr;
2539 ASSERT(qoff_f); 2584 ASSERT(qoff_f);
2540 2585
2541 /* 2586 /*
2542 * The logitem format's flag tells us if this was user quotaoff, 2587 * The logitem format's flag tells us if this was user quotaoff,
2543 * group/project quotaoff or both. 2588 * group/project quotaoff or both.
2544 */ 2589 */
2545 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 2590 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
2546 log->l_quotaoffs_flag |= XFS_DQ_USER; 2591 log->l_quotaoffs_flag |= XFS_DQ_USER;
2547 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) 2592 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
2548 log->l_quotaoffs_flag |= XFS_DQ_PROJ; 2593 log->l_quotaoffs_flag |= XFS_DQ_PROJ;
2549 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 2594 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
2550 log->l_quotaoffs_flag |= XFS_DQ_GROUP; 2595 log->l_quotaoffs_flag |= XFS_DQ_GROUP;
2551 2596
2552 return (0); 2597 return (0);
2553 } 2598 }
2554 2599
2555 /* 2600 /*
2556 * Recover a dquot record 2601 * Recover a dquot record
2557 */ 2602 */
2558 STATIC int 2603 STATIC int
2559 xlog_recover_do_dquot_trans( 2604 xlog_recover_do_dquot_trans(
2560 xlog_t *log, 2605 xlog_t *log,
2561 xlog_recover_item_t *item, 2606 xlog_recover_item_t *item,
2562 int pass) 2607 int pass)
2563 { 2608 {
2564 xfs_mount_t *mp; 2609 xfs_mount_t *mp;
2565 xfs_buf_t *bp; 2610 xfs_buf_t *bp;
2566 struct xfs_disk_dquot *ddq, *recddq; 2611 struct xfs_disk_dquot *ddq, *recddq;
2567 int error; 2612 int error;
2568 xfs_dq_logformat_t *dq_f; 2613 xfs_dq_logformat_t *dq_f;
2569 uint type; 2614 uint type;
2570 2615
2571 if (pass == XLOG_RECOVER_PASS1) { 2616 if (pass == XLOG_RECOVER_PASS1) {
2572 return 0; 2617 return 0;
2573 } 2618 }
2574 mp = log->l_mp; 2619 mp = log->l_mp;
2575 2620
2576 /* 2621 /*
2577 * Filesystems are required to send in quota flags at mount time. 2622 * Filesystems are required to send in quota flags at mount time.
2578 */ 2623 */
2579 if (mp->m_qflags == 0) 2624 if (mp->m_qflags == 0)
2580 return (0); 2625 return (0);
2581 2626
2582 recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; 2627 recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
2583 ASSERT(recddq); 2628 ASSERT(recddq);
2584 /* 2629 /*
2585 * This type of quotas was turned off, so ignore this record. 2630 * This type of quotas was turned off, so ignore this record.
2586 */ 2631 */
2587 type = INT_GET(recddq->d_flags, ARCH_CONVERT) & 2632 type = INT_GET(recddq->d_flags, ARCH_CONVERT) &
2588 (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); 2633 (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
2589 ASSERT(type); 2634 ASSERT(type);
2590 if (log->l_quotaoffs_flag & type) 2635 if (log->l_quotaoffs_flag & type)
2591 return (0); 2636 return (0);
2592 2637
2593 /* 2638 /*
2594 * At this point we know that quota was _not_ turned off. 2639 * At this point we know that quota was _not_ turned off.
2595 * Since the mount flags are not indicating to us otherwise, this 2640 * Since the mount flags are not indicating to us otherwise, this
2596 * must mean that quota is on, and the dquot needs to be replayed. 2641 * must mean that quota is on, and the dquot needs to be replayed.
2597 * Remember that we may not have fully recovered the superblock yet, 2642 * Remember that we may not have fully recovered the superblock yet,
2598 * so we can't do the usual trick of looking at the SB quota bits. 2643 * so we can't do the usual trick of looking at the SB quota bits.
2599 * 2644 *
2600 * The other possibility, of course, is that the quota subsystem was 2645 * The other possibility, of course, is that the quota subsystem was
2601 * removed since the last mount - ENOSYS. 2646 * removed since the last mount - ENOSYS.
2602 */ 2647 */
2603 dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; 2648 dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr;
2604 ASSERT(dq_f); 2649 ASSERT(dq_f);
2605 if ((error = xfs_qm_dqcheck(recddq, 2650 if ((error = xfs_qm_dqcheck(recddq,
2606 dq_f->qlf_id, 2651 dq_f->qlf_id,
2607 0, XFS_QMOPT_DOWARN, 2652 0, XFS_QMOPT_DOWARN,
2608 "xlog_recover_do_dquot_trans (log copy)"))) { 2653 "xlog_recover_do_dquot_trans (log copy)"))) {
2609 return XFS_ERROR(EIO); 2654 return XFS_ERROR(EIO);
2610 } 2655 }
2611 ASSERT(dq_f->qlf_len == 1); 2656 ASSERT(dq_f->qlf_len == 1);
2612 2657
2613 error = xfs_read_buf(mp, mp->m_ddev_targp, 2658 error = xfs_read_buf(mp, mp->m_ddev_targp,
2614 dq_f->qlf_blkno, 2659 dq_f->qlf_blkno,
2615 XFS_FSB_TO_BB(mp, dq_f->qlf_len), 2660 XFS_FSB_TO_BB(mp, dq_f->qlf_len),
2616 0, &bp); 2661 0, &bp);
2617 if (error) { 2662 if (error) {
2618 xfs_ioerror_alert("xlog_recover_do..(read#3)", mp, 2663 xfs_ioerror_alert("xlog_recover_do..(read#3)", mp,
2619 bp, dq_f->qlf_blkno); 2664 bp, dq_f->qlf_blkno);
2620 return error; 2665 return error;
2621 } 2666 }
2622 ASSERT(bp); 2667 ASSERT(bp);
2623 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); 2668 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
2624 2669
2625 /* 2670 /*
2626 * At least the magic num portion should be on disk because this 2671 * At least the magic num portion should be on disk because this
2627 * was among a chunk of dquots created earlier, and we did some 2672 * was among a chunk of dquots created earlier, and we did some
2628 * minimal initialization then. 2673 * minimal initialization then.
2629 */ 2674 */
2630 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2675 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2631 "xlog_recover_do_dquot_trans")) { 2676 "xlog_recover_do_dquot_trans")) {
2632 xfs_buf_relse(bp); 2677 xfs_buf_relse(bp);
2633 return XFS_ERROR(EIO); 2678 return XFS_ERROR(EIO);
2634 } 2679 }
2635 2680
2636 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2681 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2637 2682
2638 ASSERT(dq_f->qlf_size == 2); 2683 ASSERT(dq_f->qlf_size == 2);
2639 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2684 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
2640 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2685 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
2641 XFS_BUF_SET_FSPRIVATE(bp, mp); 2686 XFS_BUF_SET_FSPRIVATE(bp, mp);
2642 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2687 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2643 xfs_bdwrite(mp, bp); 2688 xfs_bdwrite(mp, bp);
2644 2689
2645 return (0); 2690 return (0);
2646 } 2691 }
2647 2692
2648 /* 2693 /*
2649 * This routine is called to create an in-core extent free intent 2694 * This routine is called to create an in-core extent free intent
2650 * item from the efi format structure which was logged on disk. 2695 * item from the efi format structure which was logged on disk.
2651 * It allocates an in-core efi, copies the extents from the format 2696 * It allocates an in-core efi, copies the extents from the format
2652 * structure into it, and adds the efi to the AIL with the given 2697 * structure into it, and adds the efi to the AIL with the given
2653 * LSN. 2698 * LSN.
2654 */ 2699 */
2655 STATIC int 2700 STATIC int
2656 xlog_recover_do_efi_trans( 2701 xlog_recover_do_efi_trans(
2657 xlog_t *log, 2702 xlog_t *log,
2658 xlog_recover_item_t *item, 2703 xlog_recover_item_t *item,
2659 xfs_lsn_t lsn, 2704 xfs_lsn_t lsn,
2660 int pass) 2705 int pass)
2661 { 2706 {
2662 int error; 2707 int error;
2663 xfs_mount_t *mp; 2708 xfs_mount_t *mp;
2664 xfs_efi_log_item_t *efip; 2709 xfs_efi_log_item_t *efip;
2665 xfs_efi_log_format_t *efi_formatp; 2710 xfs_efi_log_format_t *efi_formatp;
2666 SPLDECL(s); 2711 SPLDECL(s);
2667 2712
2668 if (pass == XLOG_RECOVER_PASS1) { 2713 if (pass == XLOG_RECOVER_PASS1) {
2669 return 0; 2714 return 0;
2670 } 2715 }
2671 2716
2672 efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; 2717 efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr;
2673 2718
2674 mp = log->l_mp; 2719 mp = log->l_mp;
2675 efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 2720 efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
2676 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), 2721 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
2677 &(efip->efi_format)))) { 2722 &(efip->efi_format)))) {
2678 xfs_efi_item_free(efip); 2723 xfs_efi_item_free(efip);
2679 return error; 2724 return error;
2680 } 2725 }
2681 efip->efi_next_extent = efi_formatp->efi_nextents; 2726 efip->efi_next_extent = efi_formatp->efi_nextents;
2682 efip->efi_flags |= XFS_EFI_COMMITTED; 2727 efip->efi_flags |= XFS_EFI_COMMITTED;
2683 2728
2684 AIL_LOCK(mp,s); 2729 AIL_LOCK(mp,s);
2685 /* 2730 /*
2686 * xfs_trans_update_ail() drops the AIL lock. 2731 * xfs_trans_update_ail() drops the AIL lock.
2687 */ 2732 */
2688 xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s); 2733 xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s);
2689 return 0; 2734 return 0;
2690 } 2735 }
2691 2736
2692 2737
2693 /* 2738 /*
2694 * This routine is called when an efd format structure is found in 2739 * This routine is called when an efd format structure is found in
2695 * a committed transaction in the log. It's purpose is to cancel 2740 * a committed transaction in the log. It's purpose is to cancel
2696 * the corresponding efi if it was still in the log. To do this 2741 * the corresponding efi if it was still in the log. To do this
2697 * it searches the AIL for the efi with an id equal to that in the 2742 * it searches the AIL for the efi with an id equal to that in the
2698 * efd format structure. If we find it, we remove the efi from the 2743 * efd format structure. If we find it, we remove the efi from the
2699 * AIL and free it. 2744 * AIL and free it.
2700 */ 2745 */
2701 STATIC void 2746 STATIC void
2702 xlog_recover_do_efd_trans( 2747 xlog_recover_do_efd_trans(
2703 xlog_t *log, 2748 xlog_t *log,
2704 xlog_recover_item_t *item, 2749 xlog_recover_item_t *item,
2705 int pass) 2750 int pass)
2706 { 2751 {
2707 xfs_mount_t *mp; 2752 xfs_mount_t *mp;
2708 xfs_efd_log_format_t *efd_formatp; 2753 xfs_efd_log_format_t *efd_formatp;
2709 xfs_efi_log_item_t *efip = NULL; 2754 xfs_efi_log_item_t *efip = NULL;
2710 xfs_log_item_t *lip; 2755 xfs_log_item_t *lip;
2711 int gen; 2756 int gen;
2712 __uint64_t efi_id; 2757 __uint64_t efi_id;
2713 SPLDECL(s); 2758 SPLDECL(s);
2714 2759
2715 if (pass == XLOG_RECOVER_PASS1) { 2760 if (pass == XLOG_RECOVER_PASS1) {
2716 return; 2761 return;
2717 } 2762 }
2718 2763
2719 efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; 2764 efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr;
2720 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 2765 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
2721 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 2766 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
2722 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + 2767 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
2723 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); 2768 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
2724 efi_id = efd_formatp->efd_efi_id; 2769 efi_id = efd_formatp->efd_efi_id;
2725 2770
2726 /* 2771 /*
2727 * Search for the efi with the id in the efd format structure 2772 * Search for the efi with the id in the efd format structure
2728 * in the AIL. 2773 * in the AIL.
2729 */ 2774 */
2730 mp = log->l_mp; 2775 mp = log->l_mp;
2731 AIL_LOCK(mp,s); 2776 AIL_LOCK(mp,s);
2732 lip = xfs_trans_first_ail(mp, &gen); 2777 lip = xfs_trans_first_ail(mp, &gen);
2733 while (lip != NULL) { 2778 while (lip != NULL) {
2734 if (lip->li_type == XFS_LI_EFI) { 2779 if (lip->li_type == XFS_LI_EFI) {
2735 efip = (xfs_efi_log_item_t *)lip; 2780 efip = (xfs_efi_log_item_t *)lip;
2736 if (efip->efi_format.efi_id == efi_id) { 2781 if (efip->efi_format.efi_id == efi_id) {
2737 /* 2782 /*
2738 * xfs_trans_delete_ail() drops the 2783 * xfs_trans_delete_ail() drops the
2739 * AIL lock. 2784 * AIL lock.
2740 */ 2785 */
2741 xfs_trans_delete_ail(mp, lip, s); 2786 xfs_trans_delete_ail(mp, lip, s);
2742 break; 2787 break;
2743 } 2788 }
2744 } 2789 }
2745 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 2790 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
2746 } 2791 }
2747 2792
2748 /* 2793 /*
2749 * If we found it, then free it up. If it wasn't there, it 2794 * If we found it, then free it up. If it wasn't there, it
2750 * must have been overwritten in the log. Oh well. 2795 * must have been overwritten in the log. Oh well.
2751 */ 2796 */
2752 if (lip != NULL) { 2797 if (lip != NULL) {
2753 xfs_efi_item_free(efip); 2798 xfs_efi_item_free(efip);
2754 } else { 2799 } else {
2755 AIL_UNLOCK(mp, s); 2800 AIL_UNLOCK(mp, s);
2756 } 2801 }
2757 } 2802 }
2758 2803
2759 /* 2804 /*
2760 * Perform the transaction 2805 * Perform the transaction
2761 * 2806 *
2762 * If the transaction modifies a buffer or inode, do it now. Otherwise, 2807 * If the transaction modifies a buffer or inode, do it now. Otherwise,
2763 * EFIs and EFDs get queued up by adding entries into the AIL for them. 2808 * EFIs and EFDs get queued up by adding entries into the AIL for them.
2764 */ 2809 */
2765 STATIC int 2810 STATIC int
2766 xlog_recover_do_trans( 2811 xlog_recover_do_trans(
2767 xlog_t *log, 2812 xlog_t *log,
2768 xlog_recover_t *trans, 2813 xlog_recover_t *trans,
2769 int pass) 2814 int pass)
2770 { 2815 {
2771 int error = 0; 2816 int error = 0;
2772 xlog_recover_item_t *item, *first_item; 2817 xlog_recover_item_t *item, *first_item;
2773 2818
2774 if ((error = xlog_recover_reorder_trans(trans))) 2819 if ((error = xlog_recover_reorder_trans(trans)))
2775 return error; 2820 return error;
2776 first_item = item = trans->r_itemq; 2821 first_item = item = trans->r_itemq;
2777 do { 2822 do {
2778 /* 2823 /*
2779 * we don't need to worry about the block number being 2824 * we don't need to worry about the block number being
2780 * truncated in > 1 TB buffers because in user-land, 2825 * truncated in > 1 TB buffers because in user-land,
2781 * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so 2826 * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so
2782 * the blknos will get through the user-mode buffer 2827 * the blknos will get through the user-mode buffer
2783 * cache properly. The only bad case is o32 kernels 2828 * cache properly. The only bad case is o32 kernels
2784 * where xfs_daddr_t is 32-bits but mount will warn us 2829 * where xfs_daddr_t is 32-bits but mount will warn us
2785 * off a > 1 TB filesystem before we get here. 2830 * off a > 1 TB filesystem before we get here.
2786 */ 2831 */
2787 if ((ITEM_TYPE(item) == XFS_LI_BUF)) { 2832 if ((ITEM_TYPE(item) == XFS_LI_BUF)) {
2788 if ((error = xlog_recover_do_buffer_trans(log, item, 2833 if ((error = xlog_recover_do_buffer_trans(log, item,
2789 pass))) 2834 pass)))
2790 break; 2835 break;
2791 } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) { 2836 } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) {
2792 if ((error = xlog_recover_do_inode_trans(log, item, 2837 if ((error = xlog_recover_do_inode_trans(log, item,
2793 pass))) 2838 pass)))
2794 break; 2839 break;
2795 } else if (ITEM_TYPE(item) == XFS_LI_EFI) { 2840 } else if (ITEM_TYPE(item) == XFS_LI_EFI) {
2796 if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn, 2841 if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn,
2797 pass))) 2842 pass)))
2798 break; 2843 break;
2799 } else if (ITEM_TYPE(item) == XFS_LI_EFD) { 2844 } else if (ITEM_TYPE(item) == XFS_LI_EFD) {
2800 xlog_recover_do_efd_trans(log, item, pass); 2845 xlog_recover_do_efd_trans(log, item, pass);
2801 } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { 2846 } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) {
2802 if ((error = xlog_recover_do_dquot_trans(log, item, 2847 if ((error = xlog_recover_do_dquot_trans(log, item,
2803 pass))) 2848 pass)))
2804 break; 2849 break;
2805 } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) { 2850 } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) {
2806 if ((error = xlog_recover_do_quotaoff_trans(log, item, 2851 if ((error = xlog_recover_do_quotaoff_trans(log, item,
2807 pass))) 2852 pass)))
2808 break; 2853 break;
2809 } else { 2854 } else {
2810 xlog_warn("XFS: xlog_recover_do_trans"); 2855 xlog_warn("XFS: xlog_recover_do_trans");
2811 ASSERT(0); 2856 ASSERT(0);
2812 error = XFS_ERROR(EIO); 2857 error = XFS_ERROR(EIO);
2813 break; 2858 break;
2814 } 2859 }
2815 item = item->ri_next; 2860 item = item->ri_next;
2816 } while (first_item != item); 2861 } while (first_item != item);
2817 2862
2818 return error; 2863 return error;
2819 } 2864 }
2820 2865
2821 /* 2866 /*
2822 * Free up any resources allocated by the transaction 2867 * Free up any resources allocated by the transaction
2823 * 2868 *
2824 * Remember that EFIs, EFDs, and IUNLINKs are handled later. 2869 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
2825 */ 2870 */
2826 STATIC void 2871 STATIC void
2827 xlog_recover_free_trans( 2872 xlog_recover_free_trans(
2828 xlog_recover_t *trans) 2873 xlog_recover_t *trans)
2829 { 2874 {
2830 xlog_recover_item_t *first_item, *item, *free_item; 2875 xlog_recover_item_t *first_item, *item, *free_item;
2831 int i; 2876 int i;
2832 2877
2833 item = first_item = trans->r_itemq; 2878 item = first_item = trans->r_itemq;
2834 do { 2879 do {
2835 free_item = item; 2880 free_item = item;
2836 item = item->ri_next; 2881 item = item->ri_next;
2837 /* Free the regions in the item. */ 2882 /* Free the regions in the item. */
2838 for (i = 0; i < free_item->ri_cnt; i++) { 2883 for (i = 0; i < free_item->ri_cnt; i++) {
2839 kmem_free(free_item->ri_buf[i].i_addr, 2884 kmem_free(free_item->ri_buf[i].i_addr,
2840 free_item->ri_buf[i].i_len); 2885 free_item->ri_buf[i].i_len);
2841 } 2886 }
2842 /* Free the item itself */ 2887 /* Free the item itself */
2843 kmem_free(free_item->ri_buf, 2888 kmem_free(free_item->ri_buf,
2844 (free_item->ri_total * sizeof(xfs_log_iovec_t))); 2889 (free_item->ri_total * sizeof(xfs_log_iovec_t)));
2845 kmem_free(free_item, sizeof(xlog_recover_item_t)); 2890 kmem_free(free_item, sizeof(xlog_recover_item_t));
2846 } while (first_item != item); 2891 } while (first_item != item);
2847 /* Free the transaction recover structure */ 2892 /* Free the transaction recover structure */
2848 kmem_free(trans, sizeof(xlog_recover_t)); 2893 kmem_free(trans, sizeof(xlog_recover_t));
2849 } 2894 }
2850 2895
2851 STATIC int 2896 STATIC int
2852 xlog_recover_commit_trans( 2897 xlog_recover_commit_trans(
2853 xlog_t *log, 2898 xlog_t *log,
2854 xlog_recover_t **q, 2899 xlog_recover_t **q,
2855 xlog_recover_t *trans, 2900 xlog_recover_t *trans,
2856 int pass) 2901 int pass)
2857 { 2902 {
2858 int error; 2903 int error;
2859 2904
2860 if ((error = xlog_recover_unlink_tid(q, trans))) 2905 if ((error = xlog_recover_unlink_tid(q, trans)))
2861 return error; 2906 return error;
2862 if ((error = xlog_recover_do_trans(log, trans, pass))) 2907 if ((error = xlog_recover_do_trans(log, trans, pass)))
2863 return error; 2908 return error;
2864 xlog_recover_free_trans(trans); /* no error */ 2909 xlog_recover_free_trans(trans); /* no error */
2865 return 0; 2910 return 0;
2866 } 2911 }
2867 2912
2868 STATIC int 2913 STATIC int
2869 xlog_recover_unmount_trans( 2914 xlog_recover_unmount_trans(
2870 xlog_recover_t *trans) 2915 xlog_recover_t *trans)
2871 { 2916 {
2872 /* Do nothing now */ 2917 /* Do nothing now */
2873 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); 2918 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR");
2874 return 0; 2919 return 0;
2875 } 2920 }
2876 2921
2877 /* 2922 /*
2878 * There are two valid states of the r_state field. 0 indicates that the 2923 * There are two valid states of the r_state field. 0 indicates that the
2879 * transaction structure is in a normal state. We have either seen the 2924 * transaction structure is in a normal state. We have either seen the
2880 * start of the transaction or the last operation we added was not a partial 2925 * start of the transaction or the last operation we added was not a partial
2881 * operation. If the last operation we added to the transaction was a 2926 * operation. If the last operation we added to the transaction was a
2882 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. 2927 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS.
2883 * 2928 *
2884 * NOTE: skip LRs with 0 data length. 2929 * NOTE: skip LRs with 0 data length.
2885 */ 2930 */
2886 STATIC int 2931 STATIC int
2887 xlog_recover_process_data( 2932 xlog_recover_process_data(
2888 xlog_t *log, 2933 xlog_t *log,
2889 xlog_recover_t *rhash[], 2934 xlog_recover_t *rhash[],
2890 xlog_rec_header_t *rhead, 2935 xlog_rec_header_t *rhead,
2891 xfs_caddr_t dp, 2936 xfs_caddr_t dp,
2892 int pass) 2937 int pass)
2893 { 2938 {
2894 xfs_caddr_t lp; 2939 xfs_caddr_t lp;
2895 int num_logops; 2940 int num_logops;
2896 xlog_op_header_t *ohead; 2941 xlog_op_header_t *ohead;
2897 xlog_recover_t *trans; 2942 xlog_recover_t *trans;
2898 xlog_tid_t tid; 2943 xlog_tid_t tid;
2899 int error; 2944 int error;
2900 unsigned long hash; 2945 unsigned long hash;
2901 uint flags; 2946 uint flags;
2902 2947
2903 lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT); 2948 lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT);
2904 num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT); 2949 num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
2905 2950
2906 /* check the log format matches our own - else we can't recover */ 2951 /* check the log format matches our own - else we can't recover */
2907 if (xlog_header_check_recover(log->l_mp, rhead)) 2952 if (xlog_header_check_recover(log->l_mp, rhead))
2908 return (XFS_ERROR(EIO)); 2953 return (XFS_ERROR(EIO));
2909 2954
2910 while ((dp < lp) && num_logops) { 2955 while ((dp < lp) && num_logops) {
2911 ASSERT(dp + sizeof(xlog_op_header_t) <= lp); 2956 ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
2912 ohead = (xlog_op_header_t *)dp; 2957 ohead = (xlog_op_header_t *)dp;
2913 dp += sizeof(xlog_op_header_t); 2958 dp += sizeof(xlog_op_header_t);
2914 if (ohead->oh_clientid != XFS_TRANSACTION && 2959 if (ohead->oh_clientid != XFS_TRANSACTION &&
2915 ohead->oh_clientid != XFS_LOG) { 2960 ohead->oh_clientid != XFS_LOG) {
2916 xlog_warn( 2961 xlog_warn(
2917 "XFS: xlog_recover_process_data: bad clientid"); 2962 "XFS: xlog_recover_process_data: bad clientid");
2918 ASSERT(0); 2963 ASSERT(0);
2919 return (XFS_ERROR(EIO)); 2964 return (XFS_ERROR(EIO));
2920 } 2965 }
2921 tid = INT_GET(ohead->oh_tid, ARCH_CONVERT); 2966 tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
2922 hash = XLOG_RHASH(tid); 2967 hash = XLOG_RHASH(tid);
2923 trans = xlog_recover_find_tid(rhash[hash], tid); 2968 trans = xlog_recover_find_tid(rhash[hash], tid);
2924 if (trans == NULL) { /* not found; add new tid */ 2969 if (trans == NULL) { /* not found; add new tid */
2925 if (ohead->oh_flags & XLOG_START_TRANS) 2970 if (ohead->oh_flags & XLOG_START_TRANS)
2926 xlog_recover_new_tid(&rhash[hash], tid, 2971 xlog_recover_new_tid(&rhash[hash], tid,
2927 INT_GET(rhead->h_lsn, ARCH_CONVERT)); 2972 INT_GET(rhead->h_lsn, ARCH_CONVERT));
2928 } else { 2973 } else {
2929 ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp); 2974 ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
2930 flags = ohead->oh_flags & ~XLOG_END_TRANS; 2975 flags = ohead->oh_flags & ~XLOG_END_TRANS;
2931 if (flags & XLOG_WAS_CONT_TRANS) 2976 if (flags & XLOG_WAS_CONT_TRANS)
2932 flags &= ~XLOG_CONTINUE_TRANS; 2977 flags &= ~XLOG_CONTINUE_TRANS;
2933 switch (flags) { 2978 switch (flags) {
2934 case XLOG_COMMIT_TRANS: 2979 case XLOG_COMMIT_TRANS:
2935 error = xlog_recover_commit_trans(log, 2980 error = xlog_recover_commit_trans(log,
2936 &rhash[hash], trans, pass); 2981 &rhash[hash], trans, pass);
2937 break; 2982 break;
2938 case XLOG_UNMOUNT_TRANS: 2983 case XLOG_UNMOUNT_TRANS:
2939 error = xlog_recover_unmount_trans(trans); 2984 error = xlog_recover_unmount_trans(trans);
2940 break; 2985 break;
2941 case XLOG_WAS_CONT_TRANS: 2986 case XLOG_WAS_CONT_TRANS:
2942 error = xlog_recover_add_to_cont_trans(trans, 2987 error = xlog_recover_add_to_cont_trans(trans,
2943 dp, INT_GET(ohead->oh_len, 2988 dp, INT_GET(ohead->oh_len,
2944 ARCH_CONVERT)); 2989 ARCH_CONVERT));
2945 break; 2990 break;
2946 case XLOG_START_TRANS: 2991 case XLOG_START_TRANS:
2947 xlog_warn( 2992 xlog_warn(
2948 "XFS: xlog_recover_process_data: bad transaction"); 2993 "XFS: xlog_recover_process_data: bad transaction");
2949 ASSERT(0); 2994 ASSERT(0);
2950 error = XFS_ERROR(EIO); 2995 error = XFS_ERROR(EIO);
2951 break; 2996 break;
2952 case 0: 2997 case 0:
2953 case XLOG_CONTINUE_TRANS: 2998 case XLOG_CONTINUE_TRANS:
2954 error = xlog_recover_add_to_trans(trans, 2999 error = xlog_recover_add_to_trans(trans,
2955 dp, INT_GET(ohead->oh_len, 3000 dp, INT_GET(ohead->oh_len,
2956 ARCH_CONVERT)); 3001 ARCH_CONVERT));
2957 break; 3002 break;
2958 default: 3003 default:
2959 xlog_warn( 3004 xlog_warn(
2960 "XFS: xlog_recover_process_data: bad flag"); 3005 "XFS: xlog_recover_process_data: bad flag");
2961 ASSERT(0); 3006 ASSERT(0);
2962 error = XFS_ERROR(EIO); 3007 error = XFS_ERROR(EIO);
2963 break; 3008 break;
2964 } 3009 }
2965 if (error) 3010 if (error)
2966 return error; 3011 return error;
2967 } 3012 }
2968 dp += INT_GET(ohead->oh_len, ARCH_CONVERT); 3013 dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
2969 num_logops--; 3014 num_logops--;
2970 } 3015 }
2971 return 0; 3016 return 0;
2972 } 3017 }
2973 3018
2974 /* 3019 /*
2975 * Process an extent free intent item that was recovered from 3020 * Process an extent free intent item that was recovered from
2976 * the log. We need to free the extents that it describes. 3021 * the log. We need to free the extents that it describes.
2977 */ 3022 */
2978 STATIC void 3023 STATIC void
2979 xlog_recover_process_efi( 3024 xlog_recover_process_efi(
2980 xfs_mount_t *mp, 3025 xfs_mount_t *mp,
2981 xfs_efi_log_item_t *efip) 3026 xfs_efi_log_item_t *efip)
2982 { 3027 {
2983 xfs_efd_log_item_t *efdp; 3028 xfs_efd_log_item_t *efdp;
2984 xfs_trans_t *tp; 3029 xfs_trans_t *tp;
2985 int i; 3030 int i;
2986 xfs_extent_t *extp; 3031 xfs_extent_t *extp;
2987 xfs_fsblock_t startblock_fsb; 3032 xfs_fsblock_t startblock_fsb;
2988 3033
2989 ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); 3034 ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED));
2990 3035
2991 /* 3036 /*
2992 * First check the validity of the extents described by the 3037 * First check the validity of the extents described by the
2993 * EFI. If any are bad, then assume that all are bad and 3038 * EFI. If any are bad, then assume that all are bad and
2994 * just toss the EFI. 3039 * just toss the EFI.
2995 */ 3040 */
2996 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 3041 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
2997 extp = &(efip->efi_format.efi_extents[i]); 3042 extp = &(efip->efi_format.efi_extents[i]);
2998 startblock_fsb = XFS_BB_TO_FSB(mp, 3043 startblock_fsb = XFS_BB_TO_FSB(mp,
2999 XFS_FSB_TO_DADDR(mp, extp->ext_start)); 3044 XFS_FSB_TO_DADDR(mp, extp->ext_start));
3000 if ((startblock_fsb == 0) || 3045 if ((startblock_fsb == 0) ||
3001 (extp->ext_len == 0) || 3046 (extp->ext_len == 0) ||
3002 (startblock_fsb >= mp->m_sb.sb_dblocks) || 3047 (startblock_fsb >= mp->m_sb.sb_dblocks) ||
3003 (extp->ext_len >= mp->m_sb.sb_agblocks)) { 3048 (extp->ext_len >= mp->m_sb.sb_agblocks)) {
3004 /* 3049 /*
3005 * This will pull the EFI from the AIL and 3050 * This will pull the EFI from the AIL and
3006 * free the memory associated with it. 3051 * free the memory associated with it.
3007 */ 3052 */
3008 xfs_efi_release(efip, efip->efi_format.efi_nextents); 3053 xfs_efi_release(efip, efip->efi_format.efi_nextents);
3009 return; 3054 return;
3010 } 3055 }
3011 } 3056 }
3012 3057
3013 tp = xfs_trans_alloc(mp, 0); 3058 tp = xfs_trans_alloc(mp, 0);
3014 xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); 3059 xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
3015 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 3060 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
3016 3061
3017 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 3062 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
3018 extp = &(efip->efi_format.efi_extents[i]); 3063 extp = &(efip->efi_format.efi_extents[i]);
3019 xfs_free_extent(tp, extp->ext_start, extp->ext_len); 3064 xfs_free_extent(tp, extp->ext_start, extp->ext_len);
3020 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, 3065 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start,
3021 extp->ext_len); 3066 extp->ext_len);
3022 } 3067 }
3023 3068
3024 efip->efi_flags |= XFS_EFI_RECOVERED; 3069 efip->efi_flags |= XFS_EFI_RECOVERED;
3025 xfs_trans_commit(tp, 0); 3070 xfs_trans_commit(tp, 0);
3026 } 3071 }
3027 3072
3028 /* 3073 /*
3029 * Verify that once we've encountered something other than an EFI 3074 * Verify that once we've encountered something other than an EFI
3030 * in the AIL that there are no more EFIs in the AIL. 3075 * in the AIL that there are no more EFIs in the AIL.
3031 */ 3076 */
3032 #if defined(DEBUG) 3077 #if defined(DEBUG)
3033 STATIC void 3078 STATIC void
3034 xlog_recover_check_ail( 3079 xlog_recover_check_ail(
3035 xfs_mount_t *mp, 3080 xfs_mount_t *mp,
3036 xfs_log_item_t *lip, 3081 xfs_log_item_t *lip,
3037 int gen) 3082 int gen)
3038 { 3083 {
3039 int orig_gen = gen; 3084 int orig_gen = gen;
3040 3085
3041 do { 3086 do {
3042 ASSERT(lip->li_type != XFS_LI_EFI); 3087 ASSERT(lip->li_type != XFS_LI_EFI);
3043 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 3088 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3044 /* 3089 /*
3045 * The check will be bogus if we restart from the 3090 * The check will be bogus if we restart from the
3046 * beginning of the AIL, so ASSERT that we don't. 3091 * beginning of the AIL, so ASSERT that we don't.
3047 * We never should since we're holding the AIL lock 3092 * We never should since we're holding the AIL lock
3048 * the entire time. 3093 * the entire time.
3049 */ 3094 */
3050 ASSERT(gen == orig_gen); 3095 ASSERT(gen == orig_gen);
3051 } while (lip != NULL); 3096 } while (lip != NULL);
3052 } 3097 }
3053 #endif /* DEBUG */ 3098 #endif /* DEBUG */
3054 3099
3055 /* 3100 /*
3056 * When this is called, all of the EFIs which did not have 3101 * When this is called, all of the EFIs which did not have
3057 * corresponding EFDs should be in the AIL. What we do now 3102 * corresponding EFDs should be in the AIL. What we do now
3058 * is free the extents associated with each one. 3103 * is free the extents associated with each one.
3059 * 3104 *
3060 * Since we process the EFIs in normal transactions, they 3105 * Since we process the EFIs in normal transactions, they
3061 * will be removed at some point after the commit. This prevents 3106 * will be removed at some point after the commit. This prevents
3062 * us from just walking down the list processing each one. 3107 * us from just walking down the list processing each one.
3063 * We'll use a flag in the EFI to skip those that we've already 3108 * We'll use a flag in the EFI to skip those that we've already
3064 * processed and use the AIL iteration mechanism's generation 3109 * processed and use the AIL iteration mechanism's generation
3065 * count to try to speed this up at least a bit. 3110 * count to try to speed this up at least a bit.
3066 * 3111 *
3067 * When we start, we know that the EFIs are the only things in 3112 * When we start, we know that the EFIs are the only things in
3068 * the AIL. As we process them, however, other items are added 3113 * the AIL. As we process them, however, other items are added
3069 * to the AIL. Since everything added to the AIL must come after 3114 * to the AIL. Since everything added to the AIL must come after
3070 * everything already in the AIL, we stop processing as soon as 3115 * everything already in the AIL, we stop processing as soon as
3071 * we see something other than an EFI in the AIL. 3116 * we see something other than an EFI in the AIL.
3072 */ 3117 */
3073 STATIC void 3118 STATIC void
3074 xlog_recover_process_efis( 3119 xlog_recover_process_efis(
3075 xlog_t *log) 3120 xlog_t *log)
3076 { 3121 {
3077 xfs_log_item_t *lip; 3122 xfs_log_item_t *lip;
3078 xfs_efi_log_item_t *efip; 3123 xfs_efi_log_item_t *efip;
3079 int gen; 3124 int gen;
3080 xfs_mount_t *mp; 3125 xfs_mount_t *mp;
3081 SPLDECL(s); 3126 SPLDECL(s);
3082 3127
3083 mp = log->l_mp; 3128 mp = log->l_mp;
3084 AIL_LOCK(mp,s); 3129 AIL_LOCK(mp,s);
3085 3130
3086 lip = xfs_trans_first_ail(mp, &gen); 3131 lip = xfs_trans_first_ail(mp, &gen);
3087 while (lip != NULL) { 3132 while (lip != NULL) {
3088 /* 3133 /*
3089 * We're done when we see something other than an EFI. 3134 * We're done when we see something other than an EFI.
3090 */ 3135 */
3091 if (lip->li_type != XFS_LI_EFI) { 3136 if (lip->li_type != XFS_LI_EFI) {
3092 xlog_recover_check_ail(mp, lip, gen); 3137 xlog_recover_check_ail(mp, lip, gen);
3093 break; 3138 break;
3094 } 3139 }
3095 3140
3096 /* 3141 /*
3097 * Skip EFIs that we've already processed. 3142 * Skip EFIs that we've already processed.
3098 */ 3143 */
3099 efip = (xfs_efi_log_item_t *)lip; 3144 efip = (xfs_efi_log_item_t *)lip;
3100 if (efip->efi_flags & XFS_EFI_RECOVERED) { 3145 if (efip->efi_flags & XFS_EFI_RECOVERED) {
3101 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 3146 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3102 continue; 3147 continue;
3103 } 3148 }
3104 3149
3105 AIL_UNLOCK(mp, s); 3150 AIL_UNLOCK(mp, s);
3106 xlog_recover_process_efi(mp, efip); 3151 xlog_recover_process_efi(mp, efip);
3107 AIL_LOCK(mp,s); 3152 AIL_LOCK(mp,s);
3108 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 3153 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3109 } 3154 }
3110 AIL_UNLOCK(mp, s); 3155 AIL_UNLOCK(mp, s);
3111 } 3156 }
3112 3157
3113 /* 3158 /*
3114 * This routine performs a transaction to null out a bad inode pointer 3159 * This routine performs a transaction to null out a bad inode pointer
3115 * in an agi unlinked inode hash bucket. 3160 * in an agi unlinked inode hash bucket.
3116 */ 3161 */
3117 STATIC void 3162 STATIC void
3118 xlog_recover_clear_agi_bucket( 3163 xlog_recover_clear_agi_bucket(
3119 xfs_mount_t *mp, 3164 xfs_mount_t *mp,
3120 xfs_agnumber_t agno, 3165 xfs_agnumber_t agno,
3121 int bucket) 3166 int bucket)
3122 { 3167 {
3123 xfs_trans_t *tp; 3168 xfs_trans_t *tp;
3124 xfs_agi_t *agi; 3169 xfs_agi_t *agi;
3125 xfs_buf_t *agibp; 3170 xfs_buf_t *agibp;
3126 int offset; 3171 int offset;
3127 int error; 3172 int error;
3128 3173
3129 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); 3174 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
3130 xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); 3175 xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0);
3131 3176
3132 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 3177 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3133 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 3178 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
3134 XFS_FSS_TO_BB(mp, 1), 0, &agibp); 3179 XFS_FSS_TO_BB(mp, 1), 0, &agibp);
3135 if (error) { 3180 if (error) {
3136 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3181 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3137 return; 3182 return;
3138 } 3183 }
3139 3184
3140 agi = XFS_BUF_TO_AGI(agibp); 3185 agi = XFS_BUF_TO_AGI(agibp);
3141 if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) { 3186 if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) {
3142 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3187 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3143 return; 3188 return;
3144 } 3189 }
3145 3190
3146 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 3191 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
3147 offset = offsetof(xfs_agi_t, agi_unlinked) + 3192 offset = offsetof(xfs_agi_t, agi_unlinked) +
3148 (sizeof(xfs_agino_t) * bucket); 3193 (sizeof(xfs_agino_t) * bucket);
3149 xfs_trans_log_buf(tp, agibp, offset, 3194 xfs_trans_log_buf(tp, agibp, offset,
3150 (offset + sizeof(xfs_agino_t) - 1)); 3195 (offset + sizeof(xfs_agino_t) - 1));
3151 3196
3152 (void) xfs_trans_commit(tp, 0); 3197 (void) xfs_trans_commit(tp, 0);
3153 } 3198 }
3154 3199
3155 /* 3200 /*
3156 * xlog_iunlink_recover 3201 * xlog_iunlink_recover
3157 * 3202 *
3158 * This is called during recovery to process any inodes which 3203 * This is called during recovery to process any inodes which
3159 * we unlinked but not freed when the system crashed. These 3204 * we unlinked but not freed when the system crashed. These
3160 * inodes will be on the lists in the AGI blocks. What we do 3205 * inodes will be on the lists in the AGI blocks. What we do
3161 * here is scan all the AGIs and fully truncate and free any 3206 * here is scan all the AGIs and fully truncate and free any
3162 * inodes found on the lists. Each inode is removed from the 3207 * inodes found on the lists. Each inode is removed from the
3163 * lists when it has been fully truncated and is freed. The 3208 * lists when it has been fully truncated and is freed. The
3164 * freeing of the inode and its removal from the list must be 3209 * freeing of the inode and its removal from the list must be
3165 * atomic. 3210 * atomic.
3166 */ 3211 */
3167 void 3212 void
3168 xlog_recover_process_iunlinks( 3213 xlog_recover_process_iunlinks(
3169 xlog_t *log) 3214 xlog_t *log)
3170 { 3215 {
3171 xfs_mount_t *mp; 3216 xfs_mount_t *mp;
3172 xfs_agnumber_t agno; 3217 xfs_agnumber_t agno;
3173 xfs_agi_t *agi; 3218 xfs_agi_t *agi;
3174 xfs_buf_t *agibp; 3219 xfs_buf_t *agibp;
3175 xfs_buf_t *ibp; 3220 xfs_buf_t *ibp;
3176 xfs_dinode_t *dip; 3221 xfs_dinode_t *dip;
3177 xfs_inode_t *ip; 3222 xfs_inode_t *ip;
3178 xfs_agino_t agino; 3223 xfs_agino_t agino;
3179 xfs_ino_t ino; 3224 xfs_ino_t ino;
3180 int bucket; 3225 int bucket;
3181 int error; 3226 int error;
3182 uint mp_dmevmask; 3227 uint mp_dmevmask;
3183 3228
3184 mp = log->l_mp; 3229 mp = log->l_mp;
3185 3230
3186 /* 3231 /*
3187 * Prevent any DMAPI event from being sent while in this function. 3232 * Prevent any DMAPI event from being sent while in this function.
3188 */ 3233 */
3189 mp_dmevmask = mp->m_dmevmask; 3234 mp_dmevmask = mp->m_dmevmask;
3190 mp->m_dmevmask = 0; 3235 mp->m_dmevmask = 0;
3191 3236
3192 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3237 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
3193 /* 3238 /*
3194 * Find the agi for this ag. 3239 * Find the agi for this ag.
3195 */ 3240 */
3196 agibp = xfs_buf_read(mp->m_ddev_targp, 3241 agibp = xfs_buf_read(mp->m_ddev_targp,
3197 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 3242 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
3198 XFS_FSS_TO_BB(mp, 1), 0); 3243 XFS_FSS_TO_BB(mp, 1), 0);
3199 if (XFS_BUF_ISERROR(agibp)) { 3244 if (XFS_BUF_ISERROR(agibp)) {
3200 xfs_ioerror_alert("xlog_recover_process_iunlinks(#1)", 3245 xfs_ioerror_alert("xlog_recover_process_iunlinks(#1)",
3201 log->l_mp, agibp, 3246 log->l_mp, agibp,
3202 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp))); 3247 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)));
3203 } 3248 }
3204 agi = XFS_BUF_TO_AGI(agibp); 3249 agi = XFS_BUF_TO_AGI(agibp);
3205 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum)); 3250 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum));
3206 3251
3207 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { 3252 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
3208 3253
3209 agino = be32_to_cpu(agi->agi_unlinked[bucket]); 3254 agino = be32_to_cpu(agi->agi_unlinked[bucket]);
3210 while (agino != NULLAGINO) { 3255 while (agino != NULLAGINO) {
3211 3256
3212 /* 3257 /*
3213 * Release the agi buffer so that it can 3258 * Release the agi buffer so that it can
3214 * be acquired in the normal course of the 3259 * be acquired in the normal course of the
3215 * transaction to truncate and free the inode. 3260 * transaction to truncate and free the inode.
3216 */ 3261 */
3217 xfs_buf_relse(agibp); 3262 xfs_buf_relse(agibp);
3218 3263
3219 ino = XFS_AGINO_TO_INO(mp, agno, agino); 3264 ino = XFS_AGINO_TO_INO(mp, agno, agino);
3220 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); 3265 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
3221 ASSERT(error || (ip != NULL)); 3266 ASSERT(error || (ip != NULL));
3222 3267
3223 if (!error) { 3268 if (!error) {
3224 /* 3269 /*
3225 * Get the on disk inode to find the 3270 * Get the on disk inode to find the
3226 * next inode in the bucket. 3271 * next inode in the bucket.
3227 */ 3272 */
3228 error = xfs_itobp(mp, NULL, ip, &dip, 3273 error = xfs_itobp(mp, NULL, ip, &dip,
3229 &ibp, 0, 0); 3274 &ibp, 0, 0);
3230 ASSERT(error || (dip != NULL)); 3275 ASSERT(error || (dip != NULL));
3231 } 3276 }
3232 3277
3233 if (!error) { 3278 if (!error) {
3234 ASSERT(ip->i_d.di_nlink == 0); 3279 ASSERT(ip->i_d.di_nlink == 0);
3235 3280
3236 /* setup for the next pass */ 3281 /* setup for the next pass */
3237 agino = INT_GET(dip->di_next_unlinked, 3282 agino = INT_GET(dip->di_next_unlinked,
3238 ARCH_CONVERT); 3283 ARCH_CONVERT);
3239 xfs_buf_relse(ibp); 3284 xfs_buf_relse(ibp);
3240 /* 3285 /*
3241 * Prevent any DMAPI event from 3286 * Prevent any DMAPI event from
3242 * being sent when the 3287 * being sent when the
3243 * reference on the inode is 3288 * reference on the inode is
3244 * dropped. 3289 * dropped.
3245 */ 3290 */
3246 ip->i_d.di_dmevmask = 0; 3291 ip->i_d.di_dmevmask = 0;
3247 3292
3248 /* 3293 /*
3249 * If this is a new inode, handle 3294 * If this is a new inode, handle
3250 * it specially. Otherwise, 3295 * it specially. Otherwise,
3251 * just drop our reference to the 3296 * just drop our reference to the
3252 * inode. If there are no 3297 * inode. If there are no
3253 * other references, this will 3298 * other references, this will
3254 * send the inode to 3299 * send the inode to
3255 * xfs_inactive() which will 3300 * xfs_inactive() which will
3256 * truncate the file and free 3301 * truncate the file and free
3257 * the inode. 3302 * the inode.
3258 */ 3303 */
3259 if (ip->i_d.di_mode == 0) 3304 if (ip->i_d.di_mode == 0)
3260 xfs_iput_new(ip, 0); 3305 xfs_iput_new(ip, 0);
3261 else 3306 else
3262 VN_RELE(XFS_ITOV(ip)); 3307 VN_RELE(XFS_ITOV(ip));
3263 } else { 3308 } else {
3264 /* 3309 /*
3265 * We can't read in the inode 3310 * We can't read in the inode
3266 * this bucket points to, or 3311 * this bucket points to, or
3267 * this inode is messed up. Just 3312 * this inode is messed up. Just
3268 * ditch this bucket of inodes. We 3313 * ditch this bucket of inodes. We
3269 * will lose some inodes and space, 3314 * will lose some inodes and space,
3270 * but at least we won't hang. Call 3315 * but at least we won't hang. Call
3271 * xlog_recover_clear_agi_bucket() 3316 * xlog_recover_clear_agi_bucket()
3272 * to perform a transaction to clear 3317 * to perform a transaction to clear
3273 * the inode pointer in the bucket. 3318 * the inode pointer in the bucket.
3274 */ 3319 */
3275 xlog_recover_clear_agi_bucket(mp, agno, 3320 xlog_recover_clear_agi_bucket(mp, agno,
3276 bucket); 3321 bucket);
3277 3322
3278 agino = NULLAGINO; 3323 agino = NULLAGINO;
3279 } 3324 }
3280 3325
3281 /* 3326 /*
3282 * Reacquire the agibuffer and continue around 3327 * Reacquire the agibuffer and continue around
3283 * the loop. 3328 * the loop.
3284 */ 3329 */
3285 agibp = xfs_buf_read(mp->m_ddev_targp, 3330 agibp = xfs_buf_read(mp->m_ddev_targp,
3286 XFS_AG_DADDR(mp, agno, 3331 XFS_AG_DADDR(mp, agno,
3287 XFS_AGI_DADDR(mp)), 3332 XFS_AGI_DADDR(mp)),
3288 XFS_FSS_TO_BB(mp, 1), 0); 3333 XFS_FSS_TO_BB(mp, 1), 0);
3289 if (XFS_BUF_ISERROR(agibp)) { 3334 if (XFS_BUF_ISERROR(agibp)) {
3290 xfs_ioerror_alert( 3335 xfs_ioerror_alert(
3291 "xlog_recover_process_iunlinks(#2)", 3336 "xlog_recover_process_iunlinks(#2)",
3292 log->l_mp, agibp, 3337 log->l_mp, agibp,
3293 XFS_AG_DADDR(mp, agno, 3338 XFS_AG_DADDR(mp, agno,
3294 XFS_AGI_DADDR(mp))); 3339 XFS_AGI_DADDR(mp)));
3295 } 3340 }
3296 agi = XFS_BUF_TO_AGI(agibp); 3341 agi = XFS_BUF_TO_AGI(agibp);
3297 ASSERT(XFS_AGI_MAGIC == be32_to_cpu( 3342 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(
3298 agi->agi_magicnum)); 3343 agi->agi_magicnum));
3299 } 3344 }
3300 } 3345 }
3301 3346
3302 /* 3347 /*
3303 * Release the buffer for the current agi so we can 3348 * Release the buffer for the current agi so we can
3304 * go on to the next one. 3349 * go on to the next one.
3305 */ 3350 */
3306 xfs_buf_relse(agibp); 3351 xfs_buf_relse(agibp);
3307 } 3352 }
3308 3353
3309 mp->m_dmevmask = mp_dmevmask; 3354 mp->m_dmevmask = mp_dmevmask;
3310 } 3355 }
3311 3356
3312 3357
3313 #ifdef DEBUG 3358 #ifdef DEBUG
3314 STATIC void 3359 STATIC void
3315 xlog_pack_data_checksum( 3360 xlog_pack_data_checksum(
3316 xlog_t *log, 3361 xlog_t *log,
3317 xlog_in_core_t *iclog, 3362 xlog_in_core_t *iclog,
3318 int size) 3363 int size)
3319 { 3364 {
3320 int i; 3365 int i;
3321 uint *up; 3366 uint *up;
3322 uint chksum = 0; 3367 uint chksum = 0;
3323 3368
3324 up = (uint *)iclog->ic_datap; 3369 up = (uint *)iclog->ic_datap;
3325 /* divide length by 4 to get # words */ 3370 /* divide length by 4 to get # words */
3326 for (i = 0; i < (size >> 2); i++) { 3371 for (i = 0; i < (size >> 2); i++) {
3327 chksum ^= INT_GET(*up, ARCH_CONVERT); 3372 chksum ^= INT_GET(*up, ARCH_CONVERT);
3328 up++; 3373 up++;
3329 } 3374 }
3330 INT_SET(iclog->ic_header.h_chksum, ARCH_CONVERT, chksum); 3375 INT_SET(iclog->ic_header.h_chksum, ARCH_CONVERT, chksum);
3331 } 3376 }
3332 #else 3377 #else
3333 #define xlog_pack_data_checksum(log, iclog, size) 3378 #define xlog_pack_data_checksum(log, iclog, size)
3334 #endif 3379 #endif
3335 3380
3336 /* 3381 /*
3337 * Stamp cycle number in every block 3382 * Stamp cycle number in every block
3338 */ 3383 */
3339 void 3384 void
3340 xlog_pack_data( 3385 xlog_pack_data(
3341 xlog_t *log, 3386 xlog_t *log,
3342 xlog_in_core_t *iclog, 3387 xlog_in_core_t *iclog,
3343 int roundoff) 3388 int roundoff)
3344 { 3389 {
3345 int i, j, k; 3390 int i, j, k;
3346 int size = iclog->ic_offset + roundoff; 3391 int size = iclog->ic_offset + roundoff;
3347 uint cycle_lsn; 3392 uint cycle_lsn;
3348 xfs_caddr_t dp; 3393 xfs_caddr_t dp;
3349 xlog_in_core_2_t *xhdr; 3394 xlog_in_core_2_t *xhdr;
3350 3395
3351 xlog_pack_data_checksum(log, iclog, size); 3396 xlog_pack_data_checksum(log, iclog, size);
3352 3397
3353 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); 3398 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
3354 3399
3355 dp = iclog->ic_datap; 3400 dp = iclog->ic_datap;
3356 for (i = 0; i < BTOBB(size) && 3401 for (i = 0; i < BTOBB(size) &&
3357 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3402 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
3358 iclog->ic_header.h_cycle_data[i] = *(uint *)dp; 3403 iclog->ic_header.h_cycle_data[i] = *(uint *)dp;
3359 *(uint *)dp = cycle_lsn; 3404 *(uint *)dp = cycle_lsn;
3360 dp += BBSIZE; 3405 dp += BBSIZE;
3361 } 3406 }
3362 3407
3363 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 3408 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
3364 xhdr = (xlog_in_core_2_t *)&iclog->ic_header; 3409 xhdr = (xlog_in_core_2_t *)&iclog->ic_header;
3365 for ( ; i < BTOBB(size); i++) { 3410 for ( ; i < BTOBB(size); i++) {
3366 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3411 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3367 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3412 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3368 xhdr[j].hic_xheader.xh_cycle_data[k] = *(uint *)dp; 3413 xhdr[j].hic_xheader.xh_cycle_data[k] = *(uint *)dp;
3369 *(uint *)dp = cycle_lsn; 3414 *(uint *)dp = cycle_lsn;
3370 dp += BBSIZE; 3415 dp += BBSIZE;
3371 } 3416 }
3372 3417
3373 for (i = 1; i < log->l_iclog_heads; i++) { 3418 for (i = 1; i < log->l_iclog_heads; i++) {
3374 xhdr[i].hic_xheader.xh_cycle = cycle_lsn; 3419 xhdr[i].hic_xheader.xh_cycle = cycle_lsn;
3375 } 3420 }
3376 } 3421 }
3377 } 3422 }
3378 3423
3379 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) 3424 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
3380 STATIC void 3425 STATIC void
3381 xlog_unpack_data_checksum( 3426 xlog_unpack_data_checksum(
3382 xlog_rec_header_t *rhead, 3427 xlog_rec_header_t *rhead,
3383 xfs_caddr_t dp, 3428 xfs_caddr_t dp,
3384 xlog_t *log) 3429 xlog_t *log)
3385 { 3430 {
3386 uint *up = (uint *)dp; 3431 uint *up = (uint *)dp;
3387 uint chksum = 0; 3432 uint chksum = 0;
3388 int i; 3433 int i;
3389 3434
3390 /* divide length by 4 to get # words */ 3435 /* divide length by 4 to get # words */
3391 for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) { 3436 for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) {
3392 chksum ^= INT_GET(*up, ARCH_CONVERT); 3437 chksum ^= INT_GET(*up, ARCH_CONVERT);
3393 up++; 3438 up++;
3394 } 3439 }
3395 if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) { 3440 if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) {
3396 if (rhead->h_chksum || 3441 if (rhead->h_chksum ||
3397 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { 3442 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
3398 cmn_err(CE_DEBUG, 3443 cmn_err(CE_DEBUG,
3399 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", 3444 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
3400 INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum); 3445 INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
3401 cmn_err(CE_DEBUG, 3446 cmn_err(CE_DEBUG,
3402 "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); 3447 "XFS: Disregard message if filesystem was created with non-DEBUG kernel");
3403 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 3448 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
3404 cmn_err(CE_DEBUG, 3449 cmn_err(CE_DEBUG,
3405 "XFS: LogR this is a LogV2 filesystem\n"); 3450 "XFS: LogR this is a LogV2 filesystem\n");
3406 } 3451 }
3407 log->l_flags |= XLOG_CHKSUM_MISMATCH; 3452 log->l_flags |= XLOG_CHKSUM_MISMATCH;
3408 } 3453 }
3409 } 3454 }
3410 } 3455 }
3411 #else 3456 #else
3412 #define xlog_unpack_data_checksum(rhead, dp, log) 3457 #define xlog_unpack_data_checksum(rhead, dp, log)
3413 #endif 3458 #endif
3414 3459
3415 STATIC void 3460 STATIC void
3416 xlog_unpack_data( 3461 xlog_unpack_data(
3417 xlog_rec_header_t *rhead, 3462 xlog_rec_header_t *rhead,
3418 xfs_caddr_t dp, 3463 xfs_caddr_t dp,
3419 xlog_t *log) 3464 xlog_t *log)
3420 { 3465 {
3421 int i, j, k; 3466 int i, j, k;
3422 xlog_in_core_2_t *xhdr; 3467 xlog_in_core_2_t *xhdr;
3423 3468
3424 for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) && 3469 for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
3425 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3470 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
3426 *(uint *)dp = *(uint *)&rhead->h_cycle_data[i]; 3471 *(uint *)dp = *(uint *)&rhead->h_cycle_data[i];
3427 dp += BBSIZE; 3472 dp += BBSIZE;
3428 } 3473 }
3429 3474
3430 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 3475 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
3431 xhdr = (xlog_in_core_2_t *)rhead; 3476 xhdr = (xlog_in_core_2_t *)rhead;
3432 for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) { 3477 for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
3433 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3478 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3434 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3479 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3435 *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; 3480 *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
3436 dp += BBSIZE; 3481 dp += BBSIZE;
3437 } 3482 }
3438 } 3483 }
3439 3484
3440 xlog_unpack_data_checksum(rhead, dp, log); 3485 xlog_unpack_data_checksum(rhead, dp, log);
3441 } 3486 }
3442 3487
3443 STATIC int 3488 STATIC int
3444 xlog_valid_rec_header( 3489 xlog_valid_rec_header(
3445 xlog_t *log, 3490 xlog_t *log,
3446 xlog_rec_header_t *rhead, 3491 xlog_rec_header_t *rhead,
3447 xfs_daddr_t blkno) 3492 xfs_daddr_t blkno)
3448 { 3493 {
3449 int hlen; 3494 int hlen;
3450 3495
3451 if (unlikely( 3496 if (unlikely(
3452 (INT_GET(rhead->h_magicno, ARCH_CONVERT) != 3497 (INT_GET(rhead->h_magicno, ARCH_CONVERT) !=
3453 XLOG_HEADER_MAGIC_NUM))) { 3498 XLOG_HEADER_MAGIC_NUM))) {
3454 XFS_ERROR_REPORT("xlog_valid_rec_header(1)", 3499 XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
3455 XFS_ERRLEVEL_LOW, log->l_mp); 3500 XFS_ERRLEVEL_LOW, log->l_mp);
3456 return XFS_ERROR(EFSCORRUPTED); 3501 return XFS_ERROR(EFSCORRUPTED);
3457 } 3502 }
3458 if (unlikely( 3503 if (unlikely(
3459 (!rhead->h_version || 3504 (!rhead->h_version ||
3460 (INT_GET(rhead->h_version, ARCH_CONVERT) & 3505 (INT_GET(rhead->h_version, ARCH_CONVERT) &
3461 (~XLOG_VERSION_OKBITS)) != 0))) { 3506 (~XLOG_VERSION_OKBITS)) != 0))) {
3462 xlog_warn("XFS: %s: unrecognised log version (%d).", 3507 xlog_warn("XFS: %s: unrecognised log version (%d).",
3463 __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT)); 3508 __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT));
3464 return XFS_ERROR(EIO); 3509 return XFS_ERROR(EIO);
3465 } 3510 }
3466 3511
3467 /* LR body must have data or it wouldn't have been written */ 3512 /* LR body must have data or it wouldn't have been written */
3468 hlen = INT_GET(rhead->h_len, ARCH_CONVERT); 3513 hlen = INT_GET(rhead->h_len, ARCH_CONVERT);
3469 if (unlikely( hlen <= 0 || hlen > INT_MAX )) { 3514 if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
3470 XFS_ERROR_REPORT("xlog_valid_rec_header(2)", 3515 XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
3471 XFS_ERRLEVEL_LOW, log->l_mp); 3516 XFS_ERRLEVEL_LOW, log->l_mp);
3472 return XFS_ERROR(EFSCORRUPTED); 3517 return XFS_ERROR(EFSCORRUPTED);
3473 } 3518 }
3474 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { 3519 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
3475 XFS_ERROR_REPORT("xlog_valid_rec_header(3)", 3520 XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
3476 XFS_ERRLEVEL_LOW, log->l_mp); 3521 XFS_ERRLEVEL_LOW, log->l_mp);
3477 return XFS_ERROR(EFSCORRUPTED); 3522 return XFS_ERROR(EFSCORRUPTED);
3478 } 3523 }
3479 return 0; 3524 return 0;
3480 } 3525 }
3481 3526
3482 /* 3527 /*
3483 * Read the log from tail to head and process the log records found. 3528 * Read the log from tail to head and process the log records found.
3484 * Handle the two cases where the tail and head are in the same cycle 3529 * Handle the two cases where the tail and head are in the same cycle
3485 * and where the active portion of the log wraps around the end of 3530 * and where the active portion of the log wraps around the end of
3486 * the physical log separately. The pass parameter is passed through 3531 * the physical log separately. The pass parameter is passed through
3487 * to the routines called to process the data and is not looked at 3532 * to the routines called to process the data and is not looked at
3488 * here. 3533 * here.
3489 */ 3534 */
3490 STATIC int 3535 STATIC int
3491 xlog_do_recovery_pass( 3536 xlog_do_recovery_pass(
3492 xlog_t *log, 3537 xlog_t *log,
3493 xfs_daddr_t head_blk, 3538 xfs_daddr_t head_blk,
3494 xfs_daddr_t tail_blk, 3539 xfs_daddr_t tail_blk,
3495 int pass) 3540 int pass)
3496 { 3541 {
3497 xlog_rec_header_t *rhead; 3542 xlog_rec_header_t *rhead;
3498 xfs_daddr_t blk_no; 3543 xfs_daddr_t blk_no;
3499 xfs_caddr_t bufaddr, offset; 3544 xfs_caddr_t bufaddr, offset;
3500 xfs_buf_t *hbp, *dbp; 3545 xfs_buf_t *hbp, *dbp;
3501 int error = 0, h_size; 3546 int error = 0, h_size;
3502 int bblks, split_bblks; 3547 int bblks, split_bblks;
3503 int hblks, split_hblks, wrapped_hblks; 3548 int hblks, split_hblks, wrapped_hblks;
3504 xlog_recover_t *rhash[XLOG_RHASH_SIZE]; 3549 xlog_recover_t *rhash[XLOG_RHASH_SIZE];
3505 3550
3506 ASSERT(head_blk != tail_blk); 3551 ASSERT(head_blk != tail_blk);
3507 3552
3508 /* 3553 /*
3509 * Read the header of the tail block and get the iclog buffer size from 3554 * Read the header of the tail block and get the iclog buffer size from
3510 * h_size. Use this to tell how many sectors make up the log header. 3555 * h_size. Use this to tell how many sectors make up the log header.
3511 */ 3556 */
3512 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 3557 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
3513 /* 3558 /*
3514 * When using variable length iclogs, read first sector of 3559 * When using variable length iclogs, read first sector of
3515 * iclog header and extract the header size from it. Get a 3560 * iclog header and extract the header size from it. Get a
3516 * new hbp that is the correct size. 3561 * new hbp that is the correct size.
3517 */ 3562 */
3518 hbp = xlog_get_bp(log, 1); 3563 hbp = xlog_get_bp(log, 1);
3519 if (!hbp) 3564 if (!hbp)
3520 return ENOMEM; 3565 return ENOMEM;
3521 if ((error = xlog_bread(log, tail_blk, 1, hbp))) 3566 if ((error = xlog_bread(log, tail_blk, 1, hbp)))
3522 goto bread_err1; 3567 goto bread_err1;
3523 offset = xlog_align(log, tail_blk, 1, hbp); 3568 offset = xlog_align(log, tail_blk, 1, hbp);
3524 rhead = (xlog_rec_header_t *)offset; 3569 rhead = (xlog_rec_header_t *)offset;
3525 error = xlog_valid_rec_header(log, rhead, tail_blk); 3570 error = xlog_valid_rec_header(log, rhead, tail_blk);
3526 if (error) 3571 if (error)
3527 goto bread_err1; 3572 goto bread_err1;
3528 h_size = INT_GET(rhead->h_size, ARCH_CONVERT); 3573 h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
3529 if ((INT_GET(rhead->h_version, ARCH_CONVERT) 3574 if ((INT_GET(rhead->h_version, ARCH_CONVERT)
3530 & XLOG_VERSION_2) && 3575 & XLOG_VERSION_2) &&
3531 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 3576 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
3532 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 3577 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
3533 if (h_size % XLOG_HEADER_CYCLE_SIZE) 3578 if (h_size % XLOG_HEADER_CYCLE_SIZE)
3534 hblks++; 3579 hblks++;
3535 xlog_put_bp(hbp); 3580 xlog_put_bp(hbp);
3536 hbp = xlog_get_bp(log, hblks); 3581 hbp = xlog_get_bp(log, hblks);
3537 } else { 3582 } else {
3538 hblks = 1; 3583 hblks = 1;
3539 } 3584 }
3540 } else { 3585 } else {
3541 ASSERT(log->l_sectbb_log == 0); 3586 ASSERT(log->l_sectbb_log == 0);
3542 hblks = 1; 3587 hblks = 1;
3543 hbp = xlog_get_bp(log, 1); 3588 hbp = xlog_get_bp(log, 1);
3544 h_size = XLOG_BIG_RECORD_BSIZE; 3589 h_size = XLOG_BIG_RECORD_BSIZE;
3545 } 3590 }
3546 3591
3547 if (!hbp) 3592 if (!hbp)
3548 return ENOMEM; 3593 return ENOMEM;
3549 dbp = xlog_get_bp(log, BTOBB(h_size)); 3594 dbp = xlog_get_bp(log, BTOBB(h_size));
3550 if (!dbp) { 3595 if (!dbp) {
3551 xlog_put_bp(hbp); 3596 xlog_put_bp(hbp);
3552 return ENOMEM; 3597 return ENOMEM;
3553 } 3598 }
3554 3599
3555 memset(rhash, 0, sizeof(rhash)); 3600 memset(rhash, 0, sizeof(rhash));
3556 if (tail_blk <= head_blk) { 3601 if (tail_blk <= head_blk) {
3557 for (blk_no = tail_blk; blk_no < head_blk; ) { 3602 for (blk_no = tail_blk; blk_no < head_blk; ) {
3558 if ((error = xlog_bread(log, blk_no, hblks, hbp))) 3603 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
3559 goto bread_err2; 3604 goto bread_err2;
3560 offset = xlog_align(log, blk_no, hblks, hbp); 3605 offset = xlog_align(log, blk_no, hblks, hbp);
3561 rhead = (xlog_rec_header_t *)offset; 3606 rhead = (xlog_rec_header_t *)offset;
3562 error = xlog_valid_rec_header(log, rhead, blk_no); 3607 error = xlog_valid_rec_header(log, rhead, blk_no);
3563 if (error) 3608 if (error)
3564 goto bread_err2; 3609 goto bread_err2;
3565 3610
3566 /* blocks in data section */ 3611 /* blocks in data section */
3567 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); 3612 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
3568 error = xlog_bread(log, blk_no + hblks, bblks, dbp); 3613 error = xlog_bread(log, blk_no + hblks, bblks, dbp);
3569 if (error) 3614 if (error)
3570 goto bread_err2; 3615 goto bread_err2;
3571 offset = xlog_align(log, blk_no + hblks, bblks, dbp); 3616 offset = xlog_align(log, blk_no + hblks, bblks, dbp);
3572 xlog_unpack_data(rhead, offset, log); 3617 xlog_unpack_data(rhead, offset, log);
3573 if ((error = xlog_recover_process_data(log, 3618 if ((error = xlog_recover_process_data(log,
3574 rhash, rhead, offset, pass))) 3619 rhash, rhead, offset, pass)))
3575 goto bread_err2; 3620 goto bread_err2;
3576 blk_no += bblks + hblks; 3621 blk_no += bblks + hblks;
3577 } 3622 }
3578 } else { 3623 } else {
3579 /* 3624 /*
3580 * Perform recovery around the end of the physical log. 3625 * Perform recovery around the end of the physical log.
3581 * When the head is not on the same cycle number as the tail, 3626 * When the head is not on the same cycle number as the tail,
3582 * we can't do a sequential recovery as above. 3627 * we can't do a sequential recovery as above.
3583 */ 3628 */
3584 blk_no = tail_blk; 3629 blk_no = tail_blk;
3585 while (blk_no < log->l_logBBsize) { 3630 while (blk_no < log->l_logBBsize) {
3586 /* 3631 /*
3587 * Check for header wrapping around physical end-of-log 3632 * Check for header wrapping around physical end-of-log
3588 */ 3633 */
3589 offset = NULL; 3634 offset = NULL;
3590 split_hblks = 0; 3635 split_hblks = 0;
3591 wrapped_hblks = 0; 3636 wrapped_hblks = 0;
3592 if (blk_no + hblks <= log->l_logBBsize) { 3637 if (blk_no + hblks <= log->l_logBBsize) {
3593 /* Read header in one read */ 3638 /* Read header in one read */
3594 error = xlog_bread(log, blk_no, hblks, hbp); 3639 error = xlog_bread(log, blk_no, hblks, hbp);
3595 if (error) 3640 if (error)
3596 goto bread_err2; 3641 goto bread_err2;
3597 offset = xlog_align(log, blk_no, hblks, hbp); 3642 offset = xlog_align(log, blk_no, hblks, hbp);
3598 } else { 3643 } else {
3599 /* This LR is split across physical log end */ 3644 /* This LR is split across physical log end */
3600 if (blk_no != log->l_logBBsize) { 3645 if (blk_no != log->l_logBBsize) {
3601 /* some data before physical log end */ 3646 /* some data before physical log end */
3602 ASSERT(blk_no <= INT_MAX); 3647 ASSERT(blk_no <= INT_MAX);
3603 split_hblks = log->l_logBBsize - (int)blk_no; 3648 split_hblks = log->l_logBBsize - (int)blk_no;
3604 ASSERT(split_hblks > 0); 3649 ASSERT(split_hblks > 0);
3605 if ((error = xlog_bread(log, blk_no, 3650 if ((error = xlog_bread(log, blk_no,
3606 split_hblks, hbp))) 3651 split_hblks, hbp)))
3607 goto bread_err2; 3652 goto bread_err2;
3608 offset = xlog_align(log, blk_no, 3653 offset = xlog_align(log, blk_no,
3609 split_hblks, hbp); 3654 split_hblks, hbp);
3610 } 3655 }
3611 /* 3656 /*
3612 * Note: this black magic still works with 3657 * Note: this black magic still works with
3613 * large sector sizes (non-512) only because: 3658 * large sector sizes (non-512) only because:
3614 * - we increased the buffer size originally 3659 * - we increased the buffer size originally
3615 * by 1 sector giving us enough extra space 3660 * by 1 sector giving us enough extra space
3616 * for the second read; 3661 * for the second read;
3617 * - the log start is guaranteed to be sector 3662 * - the log start is guaranteed to be sector
3618 * aligned; 3663 * aligned;
3619 * - we read the log end (LR header start) 3664 * - we read the log end (LR header start)
3620 * _first_, then the log start (LR header end) 3665 * _first_, then the log start (LR header end)
3621 * - order is important. 3666 * - order is important.
3622 */ 3667 */
3623 bufaddr = XFS_BUF_PTR(hbp); 3668 bufaddr = XFS_BUF_PTR(hbp);
3624 XFS_BUF_SET_PTR(hbp, 3669 XFS_BUF_SET_PTR(hbp,
3625 bufaddr + BBTOB(split_hblks), 3670 bufaddr + BBTOB(split_hblks),
3626 BBTOB(hblks - split_hblks)); 3671 BBTOB(hblks - split_hblks));
3627 wrapped_hblks = hblks - split_hblks; 3672 wrapped_hblks = hblks - split_hblks;
3628 error = xlog_bread(log, 0, wrapped_hblks, hbp); 3673 error = xlog_bread(log, 0, wrapped_hblks, hbp);
3629 if (error) 3674 if (error)
3630 goto bread_err2; 3675 goto bread_err2;
3631 XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks)); 3676 XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks));
3632 if (!offset) 3677 if (!offset)
3633 offset = xlog_align(log, 0, 3678 offset = xlog_align(log, 0,
3634 wrapped_hblks, hbp); 3679 wrapped_hblks, hbp);
3635 } 3680 }
3636 rhead = (xlog_rec_header_t *)offset; 3681 rhead = (xlog_rec_header_t *)offset;
3637 error = xlog_valid_rec_header(log, rhead, 3682 error = xlog_valid_rec_header(log, rhead,
3638 split_hblks ? blk_no : 0); 3683 split_hblks ? blk_no : 0);
3639 if (error) 3684 if (error)
3640 goto bread_err2; 3685 goto bread_err2;
3641 3686
3642 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); 3687 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
3643 blk_no += hblks; 3688 blk_no += hblks;
3644 3689
3645 /* Read in data for log record */ 3690 /* Read in data for log record */
3646 if (blk_no + bblks <= log->l_logBBsize) { 3691 if (blk_no + bblks <= log->l_logBBsize) {
3647 error = xlog_bread(log, blk_no, bblks, dbp); 3692 error = xlog_bread(log, blk_no, bblks, dbp);
3648 if (error) 3693 if (error)
3649 goto bread_err2; 3694 goto bread_err2;
3650 offset = xlog_align(log, blk_no, bblks, dbp); 3695 offset = xlog_align(log, blk_no, bblks, dbp);
3651 } else { 3696 } else {
3652 /* This log record is split across the 3697 /* This log record is split across the
3653 * physical end of log */ 3698 * physical end of log */
3654 offset = NULL; 3699 offset = NULL;
3655 split_bblks = 0; 3700 split_bblks = 0;
3656 if (blk_no != log->l_logBBsize) { 3701 if (blk_no != log->l_logBBsize) {
3657 /* some data is before the physical 3702 /* some data is before the physical
3658 * end of log */ 3703 * end of log */
3659 ASSERT(!wrapped_hblks); 3704 ASSERT(!wrapped_hblks);
3660 ASSERT(blk_no <= INT_MAX); 3705 ASSERT(blk_no <= INT_MAX);
3661 split_bblks = 3706 split_bblks =
3662 log->l_logBBsize - (int)blk_no; 3707 log->l_logBBsize - (int)blk_no;
3663 ASSERT(split_bblks > 0); 3708 ASSERT(split_bblks > 0);
3664 if ((error = xlog_bread(log, blk_no, 3709 if ((error = xlog_bread(log, blk_no,
3665 split_bblks, dbp))) 3710 split_bblks, dbp)))
3666 goto bread_err2; 3711 goto bread_err2;
3667 offset = xlog_align(log, blk_no, 3712 offset = xlog_align(log, blk_no,
3668 split_bblks, dbp); 3713 split_bblks, dbp);
3669 } 3714 }
3670 /* 3715 /*
3671 * Note: this black magic still works with 3716 * Note: this black magic still works with
3672 * large sector sizes (non-512) only because: 3717 * large sector sizes (non-512) only because:
3673 * - we increased the buffer size originally 3718 * - we increased the buffer size originally
3674 * by 1 sector giving us enough extra space 3719 * by 1 sector giving us enough extra space
3675 * for the second read; 3720 * for the second read;
3676 * - the log start is guaranteed to be sector 3721 * - the log start is guaranteed to be sector
3677 * aligned; 3722 * aligned;
3678 * - we read the log end (LR header start) 3723 * - we read the log end (LR header start)
3679 * _first_, then the log start (LR header end) 3724 * _first_, then the log start (LR header end)
3680 * - order is important. 3725 * - order is important.
3681 */ 3726 */
3682 bufaddr = XFS_BUF_PTR(dbp); 3727 bufaddr = XFS_BUF_PTR(dbp);
3683 XFS_BUF_SET_PTR(dbp, 3728 XFS_BUF_SET_PTR(dbp,
3684 bufaddr + BBTOB(split_bblks), 3729 bufaddr + BBTOB(split_bblks),
3685 BBTOB(bblks - split_bblks)); 3730 BBTOB(bblks - split_bblks));
3686 if ((error = xlog_bread(log, wrapped_hblks, 3731 if ((error = xlog_bread(log, wrapped_hblks,
3687 bblks - split_bblks, dbp))) 3732 bblks - split_bblks, dbp)))
3688 goto bread_err2; 3733 goto bread_err2;
3689 XFS_BUF_SET_PTR(dbp, bufaddr, h_size); 3734 XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
3690 if (!offset) 3735 if (!offset)
3691 offset = xlog_align(log, wrapped_hblks, 3736 offset = xlog_align(log, wrapped_hblks,
3692 bblks - split_bblks, dbp); 3737 bblks - split_bblks, dbp);
3693 } 3738 }
3694 xlog_unpack_data(rhead, offset, log); 3739 xlog_unpack_data(rhead, offset, log);
3695 if ((error = xlog_recover_process_data(log, rhash, 3740 if ((error = xlog_recover_process_data(log, rhash,
3696 rhead, offset, pass))) 3741 rhead, offset, pass)))
3697 goto bread_err2; 3742 goto bread_err2;
3698 blk_no += bblks; 3743 blk_no += bblks;
3699 } 3744 }
3700 3745
3701 ASSERT(blk_no >= log->l_logBBsize); 3746 ASSERT(blk_no >= log->l_logBBsize);
3702 blk_no -= log->l_logBBsize; 3747 blk_no -= log->l_logBBsize;
3703 3748
3704 /* read first part of physical log */ 3749 /* read first part of physical log */
3705 while (blk_no < head_blk) { 3750 while (blk_no < head_blk) {
3706 if ((error = xlog_bread(log, blk_no, hblks, hbp))) 3751 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
3707 goto bread_err2; 3752 goto bread_err2;
3708 offset = xlog_align(log, blk_no, hblks, hbp); 3753 offset = xlog_align(log, blk_no, hblks, hbp);
3709 rhead = (xlog_rec_header_t *)offset; 3754 rhead = (xlog_rec_header_t *)offset;
3710 error = xlog_valid_rec_header(log, rhead, blk_no); 3755 error = xlog_valid_rec_header(log, rhead, blk_no);
3711 if (error) 3756 if (error)
3712 goto bread_err2; 3757 goto bread_err2;
3713 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); 3758 bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
3714 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) 3759 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
3715 goto bread_err2; 3760 goto bread_err2;
3716 offset = xlog_align(log, blk_no+hblks, bblks, dbp); 3761 offset = xlog_align(log, blk_no+hblks, bblks, dbp);
3717 xlog_unpack_data(rhead, offset, log); 3762 xlog_unpack_data(rhead, offset, log);
3718 if ((error = xlog_recover_process_data(log, rhash, 3763 if ((error = xlog_recover_process_data(log, rhash,
3719 rhead, offset, pass))) 3764 rhead, offset, pass)))
3720 goto bread_err2; 3765 goto bread_err2;
3721 blk_no += bblks + hblks; 3766 blk_no += bblks + hblks;
3722 } 3767 }
3723 } 3768 }
3724 3769
3725 bread_err2: 3770 bread_err2:
3726 xlog_put_bp(dbp); 3771 xlog_put_bp(dbp);
3727 bread_err1: 3772 bread_err1:
3728 xlog_put_bp(hbp); 3773 xlog_put_bp(hbp);
3729 return error; 3774 return error;
3730 } 3775 }
3731 3776
3732 /* 3777 /*
3733 * Do the recovery of the log. We actually do this in two phases. 3778 * Do the recovery of the log. We actually do this in two phases.
3734 * The two passes are necessary in order to implement the function 3779 * The two passes are necessary in order to implement the function
3735 * of cancelling a record written into the log. The first pass 3780 * of cancelling a record written into the log. The first pass
3736 * determines those things which have been cancelled, and the 3781 * determines those things which have been cancelled, and the
3737 * second pass replays log items normally except for those which 3782 * second pass replays log items normally except for those which
3738 * have been cancelled. The handling of the replay and cancellations 3783 * have been cancelled. The handling of the replay and cancellations
3739 * takes place in the log item type specific routines. 3784 * takes place in the log item type specific routines.
3740 * 3785 *
3741 * The table of items which have cancel records in the log is allocated 3786 * The table of items which have cancel records in the log is allocated
3742 * and freed at this level, since only here do we know when all of 3787 * and freed at this level, since only here do we know when all of
3743 * the log recovery has been completed. 3788 * the log recovery has been completed.
3744 */ 3789 */
3745 STATIC int 3790 STATIC int
3746 xlog_do_log_recovery( 3791 xlog_do_log_recovery(
3747 xlog_t *log, 3792 xlog_t *log,
3748 xfs_daddr_t head_blk, 3793 xfs_daddr_t head_blk,
3749 xfs_daddr_t tail_blk) 3794 xfs_daddr_t tail_blk)
3750 { 3795 {
3751 int error; 3796 int error;
3752 3797
3753 ASSERT(head_blk != tail_blk); 3798 ASSERT(head_blk != tail_blk);
3754 3799
3755 /* 3800 /*
3756 * First do a pass to find all of the cancelled buf log items. 3801 * First do a pass to find all of the cancelled buf log items.
3757 * Store them in the buf_cancel_table for use in the second pass. 3802 * Store them in the buf_cancel_table for use in the second pass.
3758 */ 3803 */
3759 log->l_buf_cancel_table = 3804 log->l_buf_cancel_table =
3760 (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * 3805 (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE *
3761 sizeof(xfs_buf_cancel_t*), 3806 sizeof(xfs_buf_cancel_t*),
3762 KM_SLEEP); 3807 KM_SLEEP);
3763 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3808 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3764 XLOG_RECOVER_PASS1); 3809 XLOG_RECOVER_PASS1);
3765 if (error != 0) { 3810 if (error != 0) {
3766 kmem_free(log->l_buf_cancel_table, 3811 kmem_free(log->l_buf_cancel_table,
3767 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*)); 3812 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
3768 log->l_buf_cancel_table = NULL; 3813 log->l_buf_cancel_table = NULL;
3769 return error; 3814 return error;
3770 } 3815 }
3771 /* 3816 /*
3772 * Then do a second pass to actually recover the items in the log. 3817 * Then do a second pass to actually recover the items in the log.
3773 * When it is complete free the table of buf cancel items. 3818 * When it is complete free the table of buf cancel items.
3774 */ 3819 */
3775 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3820 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3776 XLOG_RECOVER_PASS2); 3821 XLOG_RECOVER_PASS2);
3777 #ifdef DEBUG 3822 #ifdef DEBUG
3778 if (!error) { 3823 if (!error) {
3779 int i; 3824 int i;
3780 3825
3781 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) 3826 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
3782 ASSERT(log->l_buf_cancel_table[i] == NULL); 3827 ASSERT(log->l_buf_cancel_table[i] == NULL);
3783 } 3828 }
3784 #endif /* DEBUG */ 3829 #endif /* DEBUG */
3785 3830
3786 kmem_free(log->l_buf_cancel_table, 3831 kmem_free(log->l_buf_cancel_table,
3787 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*)); 3832 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
3788 log->l_buf_cancel_table = NULL; 3833 log->l_buf_cancel_table = NULL;
3789 3834
3790 return error; 3835 return error;
3791 } 3836 }
3792 3837
3793 /* 3838 /*
3794 * Do the actual recovery 3839 * Do the actual recovery
3795 */ 3840 */
3796 STATIC int 3841 STATIC int
3797 xlog_do_recover( 3842 xlog_do_recover(
3798 xlog_t *log, 3843 xlog_t *log,
3799 xfs_daddr_t head_blk, 3844 xfs_daddr_t head_blk,
3800 xfs_daddr_t tail_blk) 3845 xfs_daddr_t tail_blk)
3801 { 3846 {
3802 int error; 3847 int error;
3803 xfs_buf_t *bp; 3848 xfs_buf_t *bp;
3804 xfs_sb_t *sbp; 3849 xfs_sb_t *sbp;
3805 3850
3806 /* 3851 /*
3807 * First replay the images in the log. 3852 * First replay the images in the log.
3808 */ 3853 */
3809 error = xlog_do_log_recovery(log, head_blk, tail_blk); 3854 error = xlog_do_log_recovery(log, head_blk, tail_blk);
3810 if (error) { 3855 if (error) {
3811 return error; 3856 return error;
3812 } 3857 }
3813 3858
3814 XFS_bflush(log->l_mp->m_ddev_targp); 3859 XFS_bflush(log->l_mp->m_ddev_targp);
3815 3860
3816 /* 3861 /*
3817 * If IO errors happened during recovery, bail out. 3862 * If IO errors happened during recovery, bail out.
3818 */ 3863 */
3819 if (XFS_FORCED_SHUTDOWN(log->l_mp)) { 3864 if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
3820 return (EIO); 3865 return (EIO);
3821 } 3866 }
3822 3867
3823 /* 3868 /*
3824 * We now update the tail_lsn since much of the recovery has completed 3869 * We now update the tail_lsn since much of the recovery has completed
3825 * and there may be space available to use. If there were no extent 3870 * and there may be space available to use. If there were no extent
3826 * or iunlinks, we can free up the entire log and set the tail_lsn to 3871 * or iunlinks, we can free up the entire log and set the tail_lsn to
3827 * be the last_sync_lsn. This was set in xlog_find_tail to be the 3872 * be the last_sync_lsn. This was set in xlog_find_tail to be the
3828 * lsn of the last known good LR on disk. If there are extent frees 3873 * lsn of the last known good LR on disk. If there are extent frees
3829 * or iunlinks they will have some entries in the AIL; so we look at 3874 * or iunlinks they will have some entries in the AIL; so we look at
3830 * the AIL to determine how to set the tail_lsn. 3875 * the AIL to determine how to set the tail_lsn.
3831 */ 3876 */
3832 xlog_assign_tail_lsn(log->l_mp); 3877 xlog_assign_tail_lsn(log->l_mp);
3833 3878
3834 /* 3879 /*
3835 * Now that we've finished replaying all buffer and inode 3880 * Now that we've finished replaying all buffer and inode
3836 * updates, re-read in the superblock. 3881 * updates, re-read in the superblock.
3837 */ 3882 */
3838 bp = xfs_getsb(log->l_mp, 0); 3883 bp = xfs_getsb(log->l_mp, 0);
3839 XFS_BUF_UNDONE(bp); 3884 XFS_BUF_UNDONE(bp);
3840 XFS_BUF_READ(bp); 3885 XFS_BUF_READ(bp);
3841 xfsbdstrat(log->l_mp, bp); 3886 xfsbdstrat(log->l_mp, bp);
3842 if ((error = xfs_iowait(bp))) { 3887 if ((error = xfs_iowait(bp))) {
3843 xfs_ioerror_alert("xlog_do_recover", 3888 xfs_ioerror_alert("xlog_do_recover",
3844 log->l_mp, bp, XFS_BUF_ADDR(bp)); 3889 log->l_mp, bp, XFS_BUF_ADDR(bp));
3845 ASSERT(0); 3890 ASSERT(0);
3846 xfs_buf_relse(bp); 3891 xfs_buf_relse(bp);
3847 return error; 3892 return error;
3848 } 3893 }
3849 3894
3850 /* Convert superblock from on-disk format */ 3895 /* Convert superblock from on-disk format */
3851 sbp = &log->l_mp->m_sb; 3896 sbp = &log->l_mp->m_sb;
3852 xfs_xlatesb(XFS_BUF_TO_SBP(bp), sbp, 1, XFS_SB_ALL_BITS); 3897 xfs_xlatesb(XFS_BUF_TO_SBP(bp), sbp, 1, XFS_SB_ALL_BITS);
3853 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 3898 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
3854 ASSERT(XFS_SB_GOOD_VERSION(sbp)); 3899 ASSERT(XFS_SB_GOOD_VERSION(sbp));
3855 xfs_buf_relse(bp); 3900 xfs_buf_relse(bp);
3856 3901
3857 /* We've re-read the superblock so re-initialize per-cpu counters */ 3902 /* We've re-read the superblock so re-initialize per-cpu counters */
3858 xfs_icsb_reinit_counters(log->l_mp); 3903 xfs_icsb_reinit_counters(log->l_mp);
3859 3904
3860 xlog_recover_check_summary(log); 3905 xlog_recover_check_summary(log);
3861 3906
3862 /* Normal transactions can now occur */ 3907 /* Normal transactions can now occur */
3863 log->l_flags &= ~XLOG_ACTIVE_RECOVERY; 3908 log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
3864 return 0; 3909 return 0;
3865 } 3910 }
3866 3911
3867 /* 3912 /*
3868 * Perform recovery and re-initialize some log variables in xlog_find_tail. 3913 * Perform recovery and re-initialize some log variables in xlog_find_tail.
3869 * 3914 *
3870 * Return error or zero. 3915 * Return error or zero.
3871 */ 3916 */
3872 int 3917 int
3873 xlog_recover( 3918 xlog_recover(
3874 xlog_t *log) 3919 xlog_t *log)
3875 { 3920 {
3876 xfs_daddr_t head_blk, tail_blk; 3921 xfs_daddr_t head_blk, tail_blk;
3877 int error; 3922 int error;
3878 3923
3879 /* find the tail of the log */ 3924 /* find the tail of the log */
3880 if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) 3925 if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
3881 return error; 3926 return error;
3882 3927
3883 if (tail_blk != head_blk) { 3928 if (tail_blk != head_blk) {
3884 /* There used to be a comment here: 3929 /* There used to be a comment here:
3885 * 3930 *
3886 * disallow recovery on read-only mounts. note -- mount 3931 * disallow recovery on read-only mounts. note -- mount
3887 * checks for ENOSPC and turns it into an intelligent 3932 * checks for ENOSPC and turns it into an intelligent
3888 * error message. 3933 * error message.
3889 * ...but this is no longer true. Now, unless you specify 3934 * ...but this is no longer true. Now, unless you specify
3890 * NORECOVERY (in which case this function would never be 3935 * NORECOVERY (in which case this function would never be
3891 * called), we just go ahead and recover. We do this all 3936 * called), we just go ahead and recover. We do this all
3892 * under the vfs layer, so we can get away with it unless 3937 * under the vfs layer, so we can get away with it unless
3893 * the device itself is read-only, in which case we fail. 3938 * the device itself is read-only, in which case we fail.
3894 */ 3939 */
3895 if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { 3940 if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) {
3896 return error; 3941 return error;
3897 } 3942 }
3898 3943
3899 cmn_err(CE_NOTE, 3944 cmn_err(CE_NOTE,
3900 "Starting XFS recovery on filesystem: %s (logdev: %s)", 3945 "Starting XFS recovery on filesystem: %s (logdev: %s)",
3901 log->l_mp->m_fsname, log->l_mp->m_logname ? 3946 log->l_mp->m_fsname, log->l_mp->m_logname ?
3902 log->l_mp->m_logname : "internal"); 3947 log->l_mp->m_logname : "internal");
3903 3948
3904 error = xlog_do_recover(log, head_blk, tail_blk); 3949 error = xlog_do_recover(log, head_blk, tail_blk);
3905 log->l_flags |= XLOG_RECOVERY_NEEDED; 3950 log->l_flags |= XLOG_RECOVERY_NEEDED;
3906 } 3951 }
3907 return error; 3952 return error;
3908 } 3953 }
3909 3954
3910 /* 3955 /*
3911 * In the first part of recovery we replay inodes and buffers and build 3956 * In the first part of recovery we replay inodes and buffers and build
3912 * up the list of extent free items which need to be processed. Here 3957 * up the list of extent free items which need to be processed. Here
3913 * we process the extent free items and clean up the on disk unlinked 3958 * we process the extent free items and clean up the on disk unlinked
3914 * inode lists. This is separated from the first part of recovery so 3959 * inode lists. This is separated from the first part of recovery so
3915 * that the root and real-time bitmap inodes can be read in from disk in 3960 * that the root and real-time bitmap inodes can be read in from disk in
3916 * between the two stages. This is necessary so that we can free space 3961 * between the two stages. This is necessary so that we can free space
3917 * in the real-time portion of the file system. 3962 * in the real-time portion of the file system.
3918 */ 3963 */
3919 int 3964 int
3920 xlog_recover_finish( 3965 xlog_recover_finish(
3921 xlog_t *log, 3966 xlog_t *log,
3922 int mfsi_flags) 3967 int mfsi_flags)
3923 { 3968 {
3924 /* 3969 /*
3925 * Now we're ready to do the transactions needed for the 3970 * Now we're ready to do the transactions needed for the
3926 * rest of recovery. Start with completing all the extent 3971 * rest of recovery. Start with completing all the extent
3927 * free intent records and then process the unlinked inode 3972 * free intent records and then process the unlinked inode
3928 * lists. At this point, we essentially run in normal mode 3973 * lists. At this point, we essentially run in normal mode
3929 * except that we're still performing recovery actions 3974 * except that we're still performing recovery actions
3930 * rather than accepting new requests. 3975 * rather than accepting new requests.
3931 */ 3976 */
3932 if (log->l_flags & XLOG_RECOVERY_NEEDED) { 3977 if (log->l_flags & XLOG_RECOVERY_NEEDED) {
3933 xlog_recover_process_efis(log); 3978 xlog_recover_process_efis(log);
3934 /* 3979 /*
3935 * Sync the log to get all the EFIs out of the AIL. 3980 * Sync the log to get all the EFIs out of the AIL.
3936 * This isn't absolutely necessary, but it helps in 3981 * This isn't absolutely necessary, but it helps in
3937 * case the unlink transactions would have problems 3982 * case the unlink transactions would have problems
3938 * pushing the EFIs out of the way. 3983 * pushing the EFIs out of the way.
3939 */ 3984 */
3940 xfs_log_force(log->l_mp, (xfs_lsn_t)0, 3985 xfs_log_force(log->l_mp, (xfs_lsn_t)0,
3941 (XFS_LOG_FORCE | XFS_LOG_SYNC)); 3986 (XFS_LOG_FORCE | XFS_LOG_SYNC));
3942 3987
3943 if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) { 3988 if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) {
3944 xlog_recover_process_iunlinks(log); 3989 xlog_recover_process_iunlinks(log);
3945 } 3990 }
3946 3991
3947 xlog_recover_check_summary(log); 3992 xlog_recover_check_summary(log);
3948 3993
3949 cmn_err(CE_NOTE, 3994 cmn_err(CE_NOTE,
3950 "Ending XFS recovery on filesystem: %s (logdev: %s)", 3995 "Ending XFS recovery on filesystem: %s (logdev: %s)",
3951 log->l_mp->m_fsname, log->l_mp->m_logname ? 3996 log->l_mp->m_fsname, log->l_mp->m_logname ?
3952 log->l_mp->m_logname : "internal"); 3997 log->l_mp->m_logname : "internal");
3953 log->l_flags &= ~XLOG_RECOVERY_NEEDED; 3998 log->l_flags &= ~XLOG_RECOVERY_NEEDED;
3954 } else { 3999 } else {
3955 cmn_err(CE_DEBUG, 4000 cmn_err(CE_DEBUG,
3956 "!Ending clean XFS mount for filesystem: %s\n", 4001 "!Ending clean XFS mount for filesystem: %s\n",
3957 log->l_mp->m_fsname); 4002 log->l_mp->m_fsname);
3958 } 4003 }
3959 return 0; 4004 return 0;
3960 } 4005 }
3961 4006
3962 4007
3963 #if defined(DEBUG) 4008 #if defined(DEBUG)
3964 /* 4009 /*
3965 * Read all of the agf and agi counters and check that they 4010 * Read all of the agf and agi counters and check that they
3966 * are consistent with the superblock counters. 4011 * are consistent with the superblock counters.
3967 */ 4012 */
3968 void 4013 void
3969 xlog_recover_check_summary( 4014 xlog_recover_check_summary(
3970 xlog_t *log) 4015 xlog_t *log)
3971 { 4016 {
3972 xfs_mount_t *mp; 4017 xfs_mount_t *mp;
3973 xfs_agf_t *agfp; 4018 xfs_agf_t *agfp;
3974 xfs_agi_t *agip; 4019 xfs_agi_t *agip;
3975 xfs_buf_t *agfbp; 4020 xfs_buf_t *agfbp;
3976 xfs_buf_t *agibp; 4021 xfs_buf_t *agibp;
3977 xfs_daddr_t agfdaddr; 4022 xfs_daddr_t agfdaddr;
3978 xfs_daddr_t agidaddr; 4023 xfs_daddr_t agidaddr;
3979 xfs_buf_t *sbbp; 4024 xfs_buf_t *sbbp;
3980 #ifdef XFS_LOUD_RECOVERY 4025 #ifdef XFS_LOUD_RECOVERY
3981 xfs_sb_t *sbp; 4026 xfs_sb_t *sbp;
3982 #endif 4027 #endif
3983 xfs_agnumber_t agno; 4028 xfs_agnumber_t agno;
3984 __uint64_t freeblks; 4029 __uint64_t freeblks;
3985 __uint64_t itotal; 4030 __uint64_t itotal;
3986 __uint64_t ifree; 4031 __uint64_t ifree;
3987 4032
3988 mp = log->l_mp; 4033 mp = log->l_mp;
3989 4034
3990 freeblks = 0LL; 4035 freeblks = 0LL;
3991 itotal = 0LL; 4036 itotal = 0LL;
3992 ifree = 0LL; 4037 ifree = 0LL;
3993 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 4038 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
3994 agfdaddr = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)); 4039 agfdaddr = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp));
3995 agfbp = xfs_buf_read(mp->m_ddev_targp, agfdaddr, 4040 agfbp = xfs_buf_read(mp->m_ddev_targp, agfdaddr,
3996 XFS_FSS_TO_BB(mp, 1), 0); 4041 XFS_FSS_TO_BB(mp, 1), 0);
3997 if (XFS_BUF_ISERROR(agfbp)) { 4042 if (XFS_BUF_ISERROR(agfbp)) {
3998 xfs_ioerror_alert("xlog_recover_check_summary(agf)", 4043 xfs_ioerror_alert("xlog_recover_check_summary(agf)",
3999 mp, agfbp, agfdaddr); 4044 mp, agfbp, agfdaddr);
4000 } 4045 }
4001 agfp = XFS_BUF_TO_AGF(agfbp); 4046 agfp = XFS_BUF_TO_AGF(agfbp);
4002 ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum)); 4047 ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum));
4003 ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum))); 4048 ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum)));
4004 ASSERT(be32_to_cpu(agfp->agf_seqno) == agno); 4049 ASSERT(be32_to_cpu(agfp->agf_seqno) == agno);
4005 4050
4006 freeblks += be32_to_cpu(agfp->agf_freeblks) + 4051 freeblks += be32_to_cpu(agfp->agf_freeblks) +
4007 be32_to_cpu(agfp->agf_flcount); 4052 be32_to_cpu(agfp->agf_flcount);
4008 xfs_buf_relse(agfbp); 4053 xfs_buf_relse(agfbp);
4009 4054
4010 agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); 4055 agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
4011 agibp = xfs_buf_read(mp->m_ddev_targp, agidaddr, 4056 agibp = xfs_buf_read(mp->m_ddev_targp, agidaddr,
4012 XFS_FSS_TO_BB(mp, 1), 0); 4057 XFS_FSS_TO_BB(mp, 1), 0);
4013 if (XFS_BUF_ISERROR(agibp)) { 4058 if (XFS_BUF_ISERROR(agibp)) {
4014 xfs_ioerror_alert("xlog_recover_check_summary(agi)", 4059 xfs_ioerror_alert("xlog_recover_check_summary(agi)",
4015 mp, agibp, agidaddr); 4060 mp, agibp, agidaddr);
4016 } 4061 }
4017 agip = XFS_BUF_TO_AGI(agibp); 4062 agip = XFS_BUF_TO_AGI(agibp);
4018 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum)); 4063 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum));
4019 ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum))); 4064 ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum)));
4020 ASSERT(be32_to_cpu(agip->agi_seqno) == agno); 4065 ASSERT(be32_to_cpu(agip->agi_seqno) == agno);
4021 4066
4022 itotal += be32_to_cpu(agip->agi_count); 4067 itotal += be32_to_cpu(agip->agi_count);
4023 ifree += be32_to_cpu(agip->agi_freecount); 4068 ifree += be32_to_cpu(agip->agi_freecount);
4024 xfs_buf_relse(agibp); 4069 xfs_buf_relse(agibp);
4025 } 4070 }
4026 4071
4027 sbbp = xfs_getsb(mp, 0); 4072 sbbp = xfs_getsb(mp, 0);
4028 #ifdef XFS_LOUD_RECOVERY 4073 #ifdef XFS_LOUD_RECOVERY
4029 sbp = &mp->m_sb; 4074 sbp = &mp->m_sb;
4030 xfs_xlatesb(XFS_BUF_TO_SBP(sbbp), sbp, 1, XFS_SB_ALL_BITS); 4075 xfs_xlatesb(XFS_BUF_TO_SBP(sbbp), sbp, 1, XFS_SB_ALL_BITS);
4031 cmn_err(CE_NOTE, 4076 cmn_err(CE_NOTE,
4032 "xlog_recover_check_summary: sb_icount %Lu itotal %Lu", 4077 "xlog_recover_check_summary: sb_icount %Lu itotal %Lu",
4033 sbp->sb_icount, itotal); 4078 sbp->sb_icount, itotal);
4034 cmn_err(CE_NOTE, 4079 cmn_err(CE_NOTE,
4035 "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu", 4080 "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu",
4036 sbp->sb_ifree, ifree); 4081 sbp->sb_ifree, ifree);
4037 cmn_err(CE_NOTE, 4082 cmn_err(CE_NOTE,
4038 "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu", 4083 "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu",
4039 sbp->sb_fdblocks, freeblks); 4084 sbp->sb_fdblocks, freeblks);
4040 #if 0 4085 #if 0
4041 /* 4086 /*
4042 * This is turned off until I account for the allocation 4087 * This is turned off until I account for the allocation
4043 * btree blocks which live in free space. 4088 * btree blocks which live in free space.
4044 */ 4089 */
4045 ASSERT(sbp->sb_icount == itotal); 4090 ASSERT(sbp->sb_icount == itotal);
4046 ASSERT(sbp->sb_ifree == ifree); 4091 ASSERT(sbp->sb_ifree == ifree);
4047 ASSERT(sbp->sb_fdblocks == freeblks); 4092 ASSERT(sbp->sb_fdblocks == freeblks);
4048 #endif 4093 #endif
4049 #endif 4094 #endif
4050 xfs_buf_relse(sbbp); 4095 xfs_buf_relse(sbbp);
4051 } 4096 }
4052 #endif /* DEBUG */ 4097 #endif /* DEBUG */
4053 4098
fs/xfs/xfs_trans_buf.c
1 /* 1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_sb.h" 25 #include "xfs_sb.h"
26 #include "xfs_ag.h" 26 #include "xfs_ag.h"
27 #include "xfs_dir2.h" 27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h" 28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h" 29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h" 31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h" 32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h" 33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h" 34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h" 35 #include "xfs_dinode.h"
36 #include "xfs_inode.h" 36 #include "xfs_inode.h"
37 #include "xfs_buf_item.h" 37 #include "xfs_buf_item.h"
38 #include "xfs_trans_priv.h" 38 #include "xfs_trans_priv.h"
39 #include "xfs_error.h" 39 #include "xfs_error.h"
40 #include "xfs_rw.h" 40 #include "xfs_rw.h"
41 41
42 42
43 STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, 43 STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
44 xfs_daddr_t, int); 44 xfs_daddr_t, int);
45 STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *, 45 STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *,
46 xfs_daddr_t, int); 46 xfs_daddr_t, int);
47 47
48 48
49 /* 49 /*
50 * Get and lock the buffer for the caller if it is not already 50 * Get and lock the buffer for the caller if it is not already
51 * locked within the given transaction. If it is already locked 51 * locked within the given transaction. If it is already locked
52 * within the transaction, just increment its lock recursion count 52 * within the transaction, just increment its lock recursion count
53 * and return a pointer to it. 53 * and return a pointer to it.
54 * 54 *
55 * Use the fast path function xfs_trans_buf_item_match() or the buffer 55 * Use the fast path function xfs_trans_buf_item_match() or the buffer
56 * cache routine incore_match() to find the buffer 56 * cache routine incore_match() to find the buffer
57 * if it is already owned by this transaction. 57 * if it is already owned by this transaction.
58 * 58 *
59 * If we don't already own the buffer, use get_buf() to get it. 59 * If we don't already own the buffer, use get_buf() to get it.
60 * If it doesn't yet have an associated xfs_buf_log_item structure, 60 * If it doesn't yet have an associated xfs_buf_log_item structure,
61 * then allocate one and add the item to this transaction. 61 * then allocate one and add the item to this transaction.
62 * 62 *
63 * If the transaction pointer is NULL, make this just a normal 63 * If the transaction pointer is NULL, make this just a normal
64 * get_buf() call. 64 * get_buf() call.
65 */ 65 */
66 xfs_buf_t * 66 xfs_buf_t *
67 xfs_trans_get_buf(xfs_trans_t *tp, 67 xfs_trans_get_buf(xfs_trans_t *tp,
68 xfs_buftarg_t *target_dev, 68 xfs_buftarg_t *target_dev,
69 xfs_daddr_t blkno, 69 xfs_daddr_t blkno,
70 int len, 70 int len,
71 uint flags) 71 uint flags)
72 { 72 {
73 xfs_buf_t *bp; 73 xfs_buf_t *bp;
74 xfs_buf_log_item_t *bip; 74 xfs_buf_log_item_t *bip;
75 75
76 if (flags == 0) 76 if (flags == 0)
77 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 77 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED;
78 78
79 /* 79 /*
80 * Default to a normal get_buf() call if the tp is NULL. 80 * Default to a normal get_buf() call if the tp is NULL.
81 */ 81 */
82 if (tp == NULL) { 82 if (tp == NULL) {
83 bp = xfs_buf_get_flags(target_dev, blkno, len, 83 bp = xfs_buf_get_flags(target_dev, blkno, len,
84 flags | BUF_BUSY); 84 flags | BUF_BUSY);
85 return(bp); 85 return(bp);
86 } 86 }
87 87
88 /* 88 /*
89 * If we find the buffer in the cache with this transaction 89 * If we find the buffer in the cache with this transaction
90 * pointer in its b_fsprivate2 field, then we know we already 90 * pointer in its b_fsprivate2 field, then we know we already
91 * have it locked. In this case we just increment the lock 91 * have it locked. In this case we just increment the lock
92 * recursion count and return the buffer to the caller. 92 * recursion count and return the buffer to the caller.
93 */ 93 */
94 if (tp->t_items.lic_next == NULL) { 94 if (tp->t_items.lic_next == NULL) {
95 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); 95 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
96 } else { 96 } else {
97 bp = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len); 97 bp = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len);
98 } 98 }
99 if (bp != NULL) { 99 if (bp != NULL) {
100 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 100 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
101 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { 101 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
102 xfs_buftrace("TRANS GET RECUR SHUT", bp); 102 xfs_buftrace("TRANS GET RECUR SHUT", bp);
103 XFS_BUF_SUPER_STALE(bp); 103 XFS_BUF_SUPER_STALE(bp);
104 } 104 }
105 /* 105 /*
106 * If the buffer is stale then it was binval'ed 106 * If the buffer is stale then it was binval'ed
107 * since last read. This doesn't matter since the 107 * since last read. This doesn't matter since the
108 * caller isn't allowed to use the data anyway. 108 * caller isn't allowed to use the data anyway.
109 */ 109 */
110 else if (XFS_BUF_ISSTALE(bp)) { 110 else if (XFS_BUF_ISSTALE(bp)) {
111 xfs_buftrace("TRANS GET RECUR STALE", bp); 111 xfs_buftrace("TRANS GET RECUR STALE", bp);
112 ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); 112 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
113 } 113 }
114 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 114 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
115 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 115 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
116 ASSERT(bip != NULL); 116 ASSERT(bip != NULL);
117 ASSERT(atomic_read(&bip->bli_refcount) > 0); 117 ASSERT(atomic_read(&bip->bli_refcount) > 0);
118 bip->bli_recur++; 118 bip->bli_recur++;
119 xfs_buftrace("TRANS GET RECUR", bp); 119 xfs_buftrace("TRANS GET RECUR", bp);
120 xfs_buf_item_trace("GET RECUR", bip); 120 xfs_buf_item_trace("GET RECUR", bip);
121 return (bp); 121 return (bp);
122 } 122 }
123 123
124 /* 124 /*
125 * We always specify the BUF_BUSY flag within a transaction so 125 * We always specify the BUF_BUSY flag within a transaction so
126 * that get_buf does not try to push out a delayed write buffer 126 * that get_buf does not try to push out a delayed write buffer
127 * which might cause another transaction to take place (if the 127 * which might cause another transaction to take place (if the
128 * buffer was delayed alloc). Such recursive transactions can 128 * buffer was delayed alloc). Such recursive transactions can
129 * easily deadlock with our current transaction as well as cause 129 * easily deadlock with our current transaction as well as cause
130 * us to run out of stack space. 130 * us to run out of stack space.
131 */ 131 */
132 bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY); 132 bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY);
133 if (bp == NULL) { 133 if (bp == NULL) {
134 return NULL; 134 return NULL;
135 } 135 }
136 136
137 ASSERT(!XFS_BUF_GETERROR(bp)); 137 ASSERT(!XFS_BUF_GETERROR(bp));
138 138
139 /* 139 /*
140 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 140 * The xfs_buf_log_item pointer is stored in b_fsprivate. If
141 * it doesn't have one yet, then allocate one and initialize it. 141 * it doesn't have one yet, then allocate one and initialize it.
142 * The checks to see if one is there are in xfs_buf_item_init(). 142 * The checks to see if one is there are in xfs_buf_item_init().
143 */ 143 */
144 xfs_buf_item_init(bp, tp->t_mountp); 144 xfs_buf_item_init(bp, tp->t_mountp);
145 145
146 /* 146 /*
147 * Set the recursion count for the buffer within this transaction 147 * Set the recursion count for the buffer within this transaction
148 * to 0. 148 * to 0.
149 */ 149 */
150 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 150 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
151 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 151 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
152 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 152 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
153 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 153 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
154 bip->bli_recur = 0; 154 bip->bli_recur = 0;
155 155
156 /* 156 /*
157 * Take a reference for this transaction on the buf item. 157 * Take a reference for this transaction on the buf item.
158 */ 158 */
159 atomic_inc(&bip->bli_refcount); 159 atomic_inc(&bip->bli_refcount);
160 160
161 /* 161 /*
162 * Get a log_item_desc to point at the new item. 162 * Get a log_item_desc to point at the new item.
163 */ 163 */
164 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 164 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
165 165
166 /* 166 /*
167 * Initialize b_fsprivate2 so we can find it with incore_match() 167 * Initialize b_fsprivate2 so we can find it with incore_match()
168 * above. 168 * above.
169 */ 169 */
170 XFS_BUF_SET_FSPRIVATE2(bp, tp); 170 XFS_BUF_SET_FSPRIVATE2(bp, tp);
171 171
172 xfs_buftrace("TRANS GET", bp); 172 xfs_buftrace("TRANS GET", bp);
173 xfs_buf_item_trace("GET", bip); 173 xfs_buf_item_trace("GET", bip);
174 return (bp); 174 return (bp);
175 } 175 }
176 176
177 /* 177 /*
178 * Get and lock the superblock buffer of this file system for the 178 * Get and lock the superblock buffer of this file system for the
179 * given transaction. 179 * given transaction.
180 * 180 *
181 * We don't need to use incore_match() here, because the superblock 181 * We don't need to use incore_match() here, because the superblock
182 * buffer is a private buffer which we keep a pointer to in the 182 * buffer is a private buffer which we keep a pointer to in the
183 * mount structure. 183 * mount structure.
184 */ 184 */
185 xfs_buf_t * 185 xfs_buf_t *
186 xfs_trans_getsb(xfs_trans_t *tp, 186 xfs_trans_getsb(xfs_trans_t *tp,
187 struct xfs_mount *mp, 187 struct xfs_mount *mp,
188 int flags) 188 int flags)
189 { 189 {
190 xfs_buf_t *bp; 190 xfs_buf_t *bp;
191 xfs_buf_log_item_t *bip; 191 xfs_buf_log_item_t *bip;
192 192
193 /* 193 /*
194 * Default to just trying to lock the superblock buffer 194 * Default to just trying to lock the superblock buffer
195 * if tp is NULL. 195 * if tp is NULL.
196 */ 196 */
197 if (tp == NULL) { 197 if (tp == NULL) {
198 return (xfs_getsb(mp, flags)); 198 return (xfs_getsb(mp, flags));
199 } 199 }
200 200
201 /* 201 /*
202 * If the superblock buffer already has this transaction 202 * If the superblock buffer already has this transaction
203 * pointer in its b_fsprivate2 field, then we know we already 203 * pointer in its b_fsprivate2 field, then we know we already
204 * have it locked. In this case we just increment the lock 204 * have it locked. In this case we just increment the lock
205 * recursion count and return the buffer to the caller. 205 * recursion count and return the buffer to the caller.
206 */ 206 */
207 bp = mp->m_sb_bp; 207 bp = mp->m_sb_bp;
208 if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) { 208 if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) {
209 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 209 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
210 ASSERT(bip != NULL); 210 ASSERT(bip != NULL);
211 ASSERT(atomic_read(&bip->bli_refcount) > 0); 211 ASSERT(atomic_read(&bip->bli_refcount) > 0);
212 bip->bli_recur++; 212 bip->bli_recur++;
213 xfs_buf_item_trace("GETSB RECUR", bip); 213 xfs_buf_item_trace("GETSB RECUR", bip);
214 return (bp); 214 return (bp);
215 } 215 }
216 216
217 bp = xfs_getsb(mp, flags); 217 bp = xfs_getsb(mp, flags);
218 if (bp == NULL) { 218 if (bp == NULL) {
219 return NULL; 219 return NULL;
220 } 220 }
221 221
222 /* 222 /*
223 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 223 * The xfs_buf_log_item pointer is stored in b_fsprivate. If
224 * it doesn't have one yet, then allocate one and initialize it. 224 * it doesn't have one yet, then allocate one and initialize it.
225 * The checks to see if one is there are in xfs_buf_item_init(). 225 * The checks to see if one is there are in xfs_buf_item_init().
226 */ 226 */
227 xfs_buf_item_init(bp, mp); 227 xfs_buf_item_init(bp, mp);
228 228
229 /* 229 /*
230 * Set the recursion count for the buffer within this transaction 230 * Set the recursion count for the buffer within this transaction
231 * to 0. 231 * to 0.
232 */ 232 */
233 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 233 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
234 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 234 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
235 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 235 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
236 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 236 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
237 bip->bli_recur = 0; 237 bip->bli_recur = 0;
238 238
239 /* 239 /*
240 * Take a reference for this transaction on the buf item. 240 * Take a reference for this transaction on the buf item.
241 */ 241 */
242 atomic_inc(&bip->bli_refcount); 242 atomic_inc(&bip->bli_refcount);
243 243
244 /* 244 /*
245 * Get a log_item_desc to point at the new item. 245 * Get a log_item_desc to point at the new item.
246 */ 246 */
247 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 247 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
248 248
249 /* 249 /*
250 * Initialize b_fsprivate2 so we can find it with incore_match() 250 * Initialize b_fsprivate2 so we can find it with incore_match()
251 * above. 251 * above.
252 */ 252 */
253 XFS_BUF_SET_FSPRIVATE2(bp, tp); 253 XFS_BUF_SET_FSPRIVATE2(bp, tp);
254 254
255 xfs_buf_item_trace("GETSB", bip); 255 xfs_buf_item_trace("GETSB", bip);
256 return (bp); 256 return (bp);
257 } 257 }
258 258
259 #ifdef DEBUG 259 #ifdef DEBUG
260 xfs_buftarg_t *xfs_error_target; 260 xfs_buftarg_t *xfs_error_target;
261 int xfs_do_error; 261 int xfs_do_error;
262 int xfs_req_num; 262 int xfs_req_num;
263 int xfs_error_mod = 33; 263 int xfs_error_mod = 33;
264 #endif 264 #endif
265 265
266 /* 266 /*
267 * Get and lock the buffer for the caller if it is not already 267 * Get and lock the buffer for the caller if it is not already
268 * locked within the given transaction. If it has not yet been 268 * locked within the given transaction. If it has not yet been
269 * read in, read it from disk. If it is already locked 269 * read in, read it from disk. If it is already locked
270 * within the transaction and already read in, just increment its 270 * within the transaction and already read in, just increment its
271 * lock recursion count and return a pointer to it. 271 * lock recursion count and return a pointer to it.
272 * 272 *
273 * Use the fast path function xfs_trans_buf_item_match() or the buffer 273 * Use the fast path function xfs_trans_buf_item_match() or the buffer
274 * cache routine incore_match() to find the buffer 274 * cache routine incore_match() to find the buffer
275 * if it is already owned by this transaction. 275 * if it is already owned by this transaction.
276 * 276 *
277 * If we don't already own the buffer, use read_buf() to get it. 277 * If we don't already own the buffer, use read_buf() to get it.
278 * If it doesn't yet have an associated xfs_buf_log_item structure, 278 * If it doesn't yet have an associated xfs_buf_log_item structure,
279 * then allocate one and add the item to this transaction. 279 * then allocate one and add the item to this transaction.
280 * 280 *
281 * If the transaction pointer is NULL, make this just a normal 281 * If the transaction pointer is NULL, make this just a normal
282 * read_buf() call. 282 * read_buf() call.
283 */ 283 */
284 int 284 int
285 xfs_trans_read_buf( 285 xfs_trans_read_buf(
286 xfs_mount_t *mp, 286 xfs_mount_t *mp,
287 xfs_trans_t *tp, 287 xfs_trans_t *tp,
288 xfs_buftarg_t *target, 288 xfs_buftarg_t *target,
289 xfs_daddr_t blkno, 289 xfs_daddr_t blkno,
290 int len, 290 int len,
291 uint flags, 291 uint flags,
292 xfs_buf_t **bpp) 292 xfs_buf_t **bpp)
293 { 293 {
294 xfs_buf_t *bp; 294 xfs_buf_t *bp;
295 xfs_buf_log_item_t *bip; 295 xfs_buf_log_item_t *bip;
296 int error; 296 int error;
297 297
298 if (flags == 0) 298 if (flags == 0)
299 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 299 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED;
300 300
301 /* 301 /*
302 * Default to a normal get_buf() call if the tp is NULL. 302 * Default to a normal get_buf() call if the tp is NULL.
303 */ 303 */
304 if (tp == NULL) { 304 if (tp == NULL) {
305 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 305 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
306 if (!bp) 306 if (!bp)
307 return XFS_ERROR(ENOMEM); 307 return XFS_ERROR(ENOMEM);
308 308
309 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { 309 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) {
310 xfs_ioerror_alert("xfs_trans_read_buf", mp, 310 xfs_ioerror_alert("xfs_trans_read_buf", mp,
311 bp, blkno); 311 bp, blkno);
312 error = XFS_BUF_GETERROR(bp); 312 error = XFS_BUF_GETERROR(bp);
313 xfs_buf_relse(bp); 313 xfs_buf_relse(bp);
314 return error; 314 return error;
315 } 315 }
316 #ifdef DEBUG 316 #ifdef DEBUG
317 if (xfs_do_error && (bp != NULL)) { 317 if (xfs_do_error && (bp != NULL)) {
318 if (xfs_error_target == target) { 318 if (xfs_error_target == target) {
319 if (((xfs_req_num++) % xfs_error_mod) == 0) { 319 if (((xfs_req_num++) % xfs_error_mod) == 0) {
320 xfs_buf_relse(bp); 320 xfs_buf_relse(bp);
321 cmn_err(CE_DEBUG, "Returning error!\n"); 321 cmn_err(CE_DEBUG, "Returning error!\n");
322 return XFS_ERROR(EIO); 322 return XFS_ERROR(EIO);
323 } 323 }
324 } 324 }
325 } 325 }
326 #endif 326 #endif
327 if (XFS_FORCED_SHUTDOWN(mp)) 327 if (XFS_FORCED_SHUTDOWN(mp))
328 goto shutdown_abort; 328 goto shutdown_abort;
329 *bpp = bp; 329 *bpp = bp;
330 return 0; 330 return 0;
331 } 331 }
332 332
333 /* 333 /*
334 * If we find the buffer in the cache with this transaction 334 * If we find the buffer in the cache with this transaction
335 * pointer in its b_fsprivate2 field, then we know we already 335 * pointer in its b_fsprivate2 field, then we know we already
336 * have it locked. If it is already read in we just increment 336 * have it locked. If it is already read in we just increment
337 * the lock recursion count and return the buffer to the caller. 337 * the lock recursion count and return the buffer to the caller.
338 * If the buffer is not yet read in, then we read it in, increment 338 * If the buffer is not yet read in, then we read it in, increment
339 * the lock recursion count, and return it to the caller. 339 * the lock recursion count, and return it to the caller.
340 */ 340 */
341 if (tp->t_items.lic_next == NULL) { 341 if (tp->t_items.lic_next == NULL) {
342 bp = xfs_trans_buf_item_match(tp, target, blkno, len); 342 bp = xfs_trans_buf_item_match(tp, target, blkno, len);
343 } else { 343 } else {
344 bp = xfs_trans_buf_item_match_all(tp, target, blkno, len); 344 bp = xfs_trans_buf_item_match_all(tp, target, blkno, len);
345 } 345 }
346 if (bp != NULL) { 346 if (bp != NULL) {
347 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 347 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
348 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 348 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
349 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 349 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
350 ASSERT((XFS_BUF_ISERROR(bp)) == 0); 350 ASSERT((XFS_BUF_ISERROR(bp)) == 0);
351 if (!(XFS_BUF_ISDONE(bp))) { 351 if (!(XFS_BUF_ISDONE(bp))) {
352 xfs_buftrace("READ_BUF_INCORE !DONE", bp); 352 xfs_buftrace("READ_BUF_INCORE !DONE", bp);
353 ASSERT(!XFS_BUF_ISASYNC(bp)); 353 ASSERT(!XFS_BUF_ISASYNC(bp));
354 XFS_BUF_READ(bp); 354 XFS_BUF_READ(bp);
355 xfsbdstrat(tp->t_mountp, bp); 355 xfsbdstrat(tp->t_mountp, bp);
356 xfs_iowait(bp); 356 xfs_iowait(bp);
357 if (XFS_BUF_GETERROR(bp) != 0) { 357 if (XFS_BUF_GETERROR(bp) != 0) {
358 xfs_ioerror_alert("xfs_trans_read_buf", mp, 358 xfs_ioerror_alert("xfs_trans_read_buf", mp,
359 bp, blkno); 359 bp, blkno);
360 error = XFS_BUF_GETERROR(bp); 360 error = XFS_BUF_GETERROR(bp);
361 xfs_buf_relse(bp); 361 xfs_buf_relse(bp);
362 /* 362 /*
363 * We can gracefully recover from most 363 * We can gracefully recover from most
364 * read errors. Ones we can't are those 364 * read errors. Ones we can't are those
365 * that happen after the transaction's 365 * that happen after the transaction's
366 * already dirty. 366 * already dirty.
367 */ 367 */
368 if (tp->t_flags & XFS_TRANS_DIRTY) 368 if (tp->t_flags & XFS_TRANS_DIRTY)
369 xfs_force_shutdown(tp->t_mountp, 369 xfs_force_shutdown(tp->t_mountp,
370 SHUTDOWN_META_IO_ERROR); 370 SHUTDOWN_META_IO_ERROR);
371 return error; 371 return error;
372 } 372 }
373 } 373 }
374 /* 374 /*
375 * We never locked this buf ourselves, so we shouldn't 375 * We never locked this buf ourselves, so we shouldn't
376 * brelse it either. Just get out. 376 * brelse it either. Just get out.
377 */ 377 */
378 if (XFS_FORCED_SHUTDOWN(mp)) { 378 if (XFS_FORCED_SHUTDOWN(mp)) {
379 xfs_buftrace("READ_BUF_INCORE XFSSHUTDN", bp); 379 xfs_buftrace("READ_BUF_INCORE XFSSHUTDN", bp);
380 *bpp = NULL; 380 *bpp = NULL;
381 return XFS_ERROR(EIO); 381 return XFS_ERROR(EIO);
382 } 382 }
383 383
384 384
385 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 385 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
386 bip->bli_recur++; 386 bip->bli_recur++;
387 387
388 ASSERT(atomic_read(&bip->bli_refcount) > 0); 388 ASSERT(atomic_read(&bip->bli_refcount) > 0);
389 xfs_buf_item_trace("READ RECUR", bip); 389 xfs_buf_item_trace("READ RECUR", bip);
390 *bpp = bp; 390 *bpp = bp;
391 return 0; 391 return 0;
392 } 392 }
393 393
394 /* 394 /*
395 * We always specify the BUF_BUSY flag within a transaction so 395 * We always specify the BUF_BUSY flag within a transaction so
396 * that get_buf does not try to push out a delayed write buffer 396 * that get_buf does not try to push out a delayed write buffer
397 * which might cause another transaction to take place (if the 397 * which might cause another transaction to take place (if the
398 * buffer was delayed alloc). Such recursive transactions can 398 * buffer was delayed alloc). Such recursive transactions can
399 * easily deadlock with our current transaction as well as cause 399 * easily deadlock with our current transaction as well as cause
400 * us to run out of stack space. 400 * us to run out of stack space.
401 */ 401 */
402 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 402 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
403 if (bp == NULL) { 403 if (bp == NULL) {
404 *bpp = NULL; 404 *bpp = NULL;
405 return 0; 405 return 0;
406 } 406 }
407 if (XFS_BUF_GETERROR(bp) != 0) { 407 if (XFS_BUF_GETERROR(bp) != 0) {
408 XFS_BUF_SUPER_STALE(bp); 408 XFS_BUF_SUPER_STALE(bp);
409 xfs_buftrace("READ ERROR", bp); 409 xfs_buftrace("READ ERROR", bp);
410 error = XFS_BUF_GETERROR(bp); 410 error = XFS_BUF_GETERROR(bp);
411 411
412 xfs_ioerror_alert("xfs_trans_read_buf", mp, 412 xfs_ioerror_alert("xfs_trans_read_buf", mp,
413 bp, blkno); 413 bp, blkno);
414 if (tp->t_flags & XFS_TRANS_DIRTY) 414 if (tp->t_flags & XFS_TRANS_DIRTY)
415 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); 415 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
416 xfs_buf_relse(bp); 416 xfs_buf_relse(bp);
417 return error; 417 return error;
418 } 418 }
419 #ifdef DEBUG 419 #ifdef DEBUG
420 if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) { 420 if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) {
421 if (xfs_error_target == target) { 421 if (xfs_error_target == target) {
422 if (((xfs_req_num++) % xfs_error_mod) == 0) { 422 if (((xfs_req_num++) % xfs_error_mod) == 0) {
423 xfs_force_shutdown(tp->t_mountp, 423 xfs_force_shutdown(tp->t_mountp,
424 SHUTDOWN_META_IO_ERROR); 424 SHUTDOWN_META_IO_ERROR);
425 xfs_buf_relse(bp); 425 xfs_buf_relse(bp);
426 cmn_err(CE_DEBUG, "Returning trans error!\n"); 426 cmn_err(CE_DEBUG, "Returning trans error!\n");
427 return XFS_ERROR(EIO); 427 return XFS_ERROR(EIO);
428 } 428 }
429 } 429 }
430 } 430 }
431 #endif 431 #endif
432 if (XFS_FORCED_SHUTDOWN(mp)) 432 if (XFS_FORCED_SHUTDOWN(mp))
433 goto shutdown_abort; 433 goto shutdown_abort;
434 434
435 /* 435 /*
436 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 436 * The xfs_buf_log_item pointer is stored in b_fsprivate. If
437 * it doesn't have one yet, then allocate one and initialize it. 437 * it doesn't have one yet, then allocate one and initialize it.
438 * The checks to see if one is there are in xfs_buf_item_init(). 438 * The checks to see if one is there are in xfs_buf_item_init().
439 */ 439 */
440 xfs_buf_item_init(bp, tp->t_mountp); 440 xfs_buf_item_init(bp, tp->t_mountp);
441 441
442 /* 442 /*
443 * Set the recursion count for the buffer within this transaction 443 * Set the recursion count for the buffer within this transaction
444 * to 0. 444 * to 0.
445 */ 445 */
446 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 446 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
447 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 447 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
448 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 448 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
449 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 449 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
450 bip->bli_recur = 0; 450 bip->bli_recur = 0;
451 451
452 /* 452 /*
453 * Take a reference for this transaction on the buf item. 453 * Take a reference for this transaction on the buf item.
454 */ 454 */
455 atomic_inc(&bip->bli_refcount); 455 atomic_inc(&bip->bli_refcount);
456 456
457 /* 457 /*
458 * Get a log_item_desc to point at the new item. 458 * Get a log_item_desc to point at the new item.
459 */ 459 */
460 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 460 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
461 461
462 /* 462 /*
463 * Initialize b_fsprivate2 so we can find it with incore_match() 463 * Initialize b_fsprivate2 so we can find it with incore_match()
464 * above. 464 * above.
465 */ 465 */
466 XFS_BUF_SET_FSPRIVATE2(bp, tp); 466 XFS_BUF_SET_FSPRIVATE2(bp, tp);
467 467
468 xfs_buftrace("TRANS READ", bp); 468 xfs_buftrace("TRANS READ", bp);
469 xfs_buf_item_trace("READ", bip); 469 xfs_buf_item_trace("READ", bip);
470 *bpp = bp; 470 *bpp = bp;
471 return 0; 471 return 0;
472 472
473 shutdown_abort: 473 shutdown_abort:
474 /* 474 /*
475 * the theory here is that buffer is good but we're 475 * the theory here is that buffer is good but we're
476 * bailing out because the filesystem is being forcibly 476 * bailing out because the filesystem is being forcibly
477 * shut down. So we should leave the b_flags alone since 477 * shut down. So we should leave the b_flags alone since
478 * the buffer's not staled and just get out. 478 * the buffer's not staled and just get out.
479 */ 479 */
480 #if defined(DEBUG) 480 #if defined(DEBUG)
481 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) 481 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
482 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); 482 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp);
483 #endif 483 #endif
484 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != 484 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) !=
485 (XFS_B_STALE|XFS_B_DELWRI)); 485 (XFS_B_STALE|XFS_B_DELWRI));
486 486
487 xfs_buftrace("READ_BUF XFSSHUTDN", bp); 487 xfs_buftrace("READ_BUF XFSSHUTDN", bp);
488 xfs_buf_relse(bp); 488 xfs_buf_relse(bp);
489 *bpp = NULL; 489 *bpp = NULL;
490 return XFS_ERROR(EIO); 490 return XFS_ERROR(EIO);
491 } 491 }
492 492
493 493
494 /* 494 /*
495 * Release the buffer bp which was previously acquired with one of the 495 * Release the buffer bp which was previously acquired with one of the
496 * xfs_trans_... buffer allocation routines if the buffer has not 496 * xfs_trans_... buffer allocation routines if the buffer has not
497 * been modified within this transaction. If the buffer is modified 497 * been modified within this transaction. If the buffer is modified
498 * within this transaction, do decrement the recursion count but do 498 * within this transaction, do decrement the recursion count but do
499 * not release the buffer even if the count goes to 0. If the buffer is not 499 * not release the buffer even if the count goes to 0. If the buffer is not
500 * modified within the transaction, decrement the recursion count and 500 * modified within the transaction, decrement the recursion count and
501 * release the buffer if the recursion count goes to 0. 501 * release the buffer if the recursion count goes to 0.
502 * 502 *
503 * If the buffer is to be released and it was not modified before 503 * If the buffer is to be released and it was not modified before
504 * this transaction began, then free the buf_log_item associated with it. 504 * this transaction began, then free the buf_log_item associated with it.
505 * 505 *
506 * If the transaction pointer is NULL, make this just a normal 506 * If the transaction pointer is NULL, make this just a normal
507 * brelse() call. 507 * brelse() call.
508 */ 508 */
509 void 509 void
510 xfs_trans_brelse(xfs_trans_t *tp, 510 xfs_trans_brelse(xfs_trans_t *tp,
511 xfs_buf_t *bp) 511 xfs_buf_t *bp)
512 { 512 {
513 xfs_buf_log_item_t *bip; 513 xfs_buf_log_item_t *bip;
514 xfs_log_item_t *lip; 514 xfs_log_item_t *lip;
515 xfs_log_item_desc_t *lidp; 515 xfs_log_item_desc_t *lidp;
516 516
517 /* 517 /*
518 * Default to a normal brelse() call if the tp is NULL. 518 * Default to a normal brelse() call if the tp is NULL.
519 */ 519 */
520 if (tp == NULL) { 520 if (tp == NULL) {
521 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 521 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
522 /* 522 /*
523 * If there's a buf log item attached to the buffer, 523 * If there's a buf log item attached to the buffer,
524 * then let the AIL know that the buffer is being 524 * then let the AIL know that the buffer is being
525 * unlocked. 525 * unlocked.
526 */ 526 */
527 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 527 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
528 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 528 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
529 if (lip->li_type == XFS_LI_BUF) { 529 if (lip->li_type == XFS_LI_BUF) {
530 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); 530 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
531 xfs_trans_unlocked_item( 531 xfs_trans_unlocked_item(
532 bip->bli_item.li_mountp, 532 bip->bli_item.li_mountp,
533 lip); 533 lip);
534 } 534 }
535 } 535 }
536 xfs_buf_relse(bp); 536 xfs_buf_relse(bp);
537 return; 537 return;
538 } 538 }
539 539
540 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 540 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
541 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 541 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
542 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 542 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
543 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 543 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
544 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 544 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
545 ASSERT(atomic_read(&bip->bli_refcount) > 0); 545 ASSERT(atomic_read(&bip->bli_refcount) > 0);
546 546
547 /* 547 /*
548 * Find the item descriptor pointing to this buffer's 548 * Find the item descriptor pointing to this buffer's
549 * log item. It must be there. 549 * log item. It must be there.
550 */ 550 */
551 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 551 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
552 ASSERT(lidp != NULL); 552 ASSERT(lidp != NULL);
553 553
554 /* 554 /*
555 * If the release is just for a recursive lock, 555 * If the release is just for a recursive lock,
556 * then decrement the count and return. 556 * then decrement the count and return.
557 */ 557 */
558 if (bip->bli_recur > 0) { 558 if (bip->bli_recur > 0) {
559 bip->bli_recur--; 559 bip->bli_recur--;
560 xfs_buf_item_trace("RELSE RECUR", bip); 560 xfs_buf_item_trace("RELSE RECUR", bip);
561 return; 561 return;
562 } 562 }
563 563
564 /* 564 /*
565 * If the buffer is dirty within this transaction, we can't 565 * If the buffer is dirty within this transaction, we can't
566 * release it until we commit. 566 * release it until we commit.
567 */ 567 */
568 if (lidp->lid_flags & XFS_LID_DIRTY) { 568 if (lidp->lid_flags & XFS_LID_DIRTY) {
569 xfs_buf_item_trace("RELSE DIRTY", bip); 569 xfs_buf_item_trace("RELSE DIRTY", bip);
570 return; 570 return;
571 } 571 }
572 572
573 /* 573 /*
574 * If the buffer has been invalidated, then we can't release 574 * If the buffer has been invalidated, then we can't release
575 * it until the transaction commits to disk unless it is re-dirtied 575 * it until the transaction commits to disk unless it is re-dirtied
576 * as part of this transaction. This prevents us from pulling 576 * as part of this transaction. This prevents us from pulling
577 * the item from the AIL before we should. 577 * the item from the AIL before we should.
578 */ 578 */
579 if (bip->bli_flags & XFS_BLI_STALE) { 579 if (bip->bli_flags & XFS_BLI_STALE) {
580 xfs_buf_item_trace("RELSE STALE", bip); 580 xfs_buf_item_trace("RELSE STALE", bip);
581 return; 581 return;
582 } 582 }
583 583
584 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 584 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
585 xfs_buf_item_trace("RELSE", bip); 585 xfs_buf_item_trace("RELSE", bip);
586 586
587 /* 587 /*
588 * Free up the log item descriptor tracking the released item. 588 * Free up the log item descriptor tracking the released item.
589 */ 589 */
590 xfs_trans_free_item(tp, lidp); 590 xfs_trans_free_item(tp, lidp);
591 591
592 /* 592 /*
593 * Clear the hold flag in the buf log item if it is set. 593 * Clear the hold flag in the buf log item if it is set.
594 * We wouldn't want the next user of the buffer to 594 * We wouldn't want the next user of the buffer to
595 * get confused. 595 * get confused.
596 */ 596 */
597 if (bip->bli_flags & XFS_BLI_HOLD) { 597 if (bip->bli_flags & XFS_BLI_HOLD) {
598 bip->bli_flags &= ~XFS_BLI_HOLD; 598 bip->bli_flags &= ~XFS_BLI_HOLD;
599 } 599 }
600 600
601 /* 601 /*
602 * Drop our reference to the buf log item. 602 * Drop our reference to the buf log item.
603 */ 603 */
604 atomic_dec(&bip->bli_refcount); 604 atomic_dec(&bip->bli_refcount);
605 605
606 /* 606 /*
607 * If the buf item is not tracking data in the log, then 607 * If the buf item is not tracking data in the log, then
608 * we must free it before releasing the buffer back to the 608 * we must free it before releasing the buffer back to the
609 * free pool. Before releasing the buffer to the free pool, 609 * free pool. Before releasing the buffer to the free pool,
610 * clear the transaction pointer in b_fsprivate2 to dissolve 610 * clear the transaction pointer in b_fsprivate2 to dissolve
611 * its relation to this transaction. 611 * its relation to this transaction.
612 */ 612 */
613 if (!xfs_buf_item_dirty(bip)) { 613 if (!xfs_buf_item_dirty(bip)) {
614 /*** 614 /***
615 ASSERT(bp->b_pincount == 0); 615 ASSERT(bp->b_pincount == 0);
616 ***/ 616 ***/
617 ASSERT(atomic_read(&bip->bli_refcount) == 0); 617 ASSERT(atomic_read(&bip->bli_refcount) == 0);
618 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 618 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
619 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); 619 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
620 xfs_buf_item_relse(bp); 620 xfs_buf_item_relse(bp);
621 bip = NULL; 621 bip = NULL;
622 } 622 }
623 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 623 XFS_BUF_SET_FSPRIVATE2(bp, NULL);
624 624
625 /* 625 /*
626 * If we've still got a buf log item on the buffer, then 626 * If we've still got a buf log item on the buffer, then
627 * tell the AIL that the buffer is being unlocked. 627 * tell the AIL that the buffer is being unlocked.
628 */ 628 */
629 if (bip != NULL) { 629 if (bip != NULL) {
630 xfs_trans_unlocked_item(bip->bli_item.li_mountp, 630 xfs_trans_unlocked_item(bip->bli_item.li_mountp,
631 (xfs_log_item_t*)bip); 631 (xfs_log_item_t*)bip);
632 } 632 }
633 633
634 xfs_buf_relse(bp); 634 xfs_buf_relse(bp);
635 return; 635 return;
636 } 636 }
637 637
638 /* 638 /*
639 * Add the locked buffer to the transaction. 639 * Add the locked buffer to the transaction.
640 * The buffer must be locked, and it cannot be associated with any 640 * The buffer must be locked, and it cannot be associated with any
641 * transaction. 641 * transaction.
642 * 642 *
643 * If the buffer does not yet have a buf log item associated with it, 643 * If the buffer does not yet have a buf log item associated with it,
644 * then allocate one for it. Then add the buf item to the transaction. 644 * then allocate one for it. Then add the buf item to the transaction.
645 */ 645 */
646 void 646 void
647 xfs_trans_bjoin(xfs_trans_t *tp, 647 xfs_trans_bjoin(xfs_trans_t *tp,
648 xfs_buf_t *bp) 648 xfs_buf_t *bp)
649 { 649 {
650 xfs_buf_log_item_t *bip; 650 xfs_buf_log_item_t *bip;
651 651
652 ASSERT(XFS_BUF_ISBUSY(bp)); 652 ASSERT(XFS_BUF_ISBUSY(bp));
653 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 653 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
654 654
655 /* 655 /*
656 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 656 * The xfs_buf_log_item pointer is stored in b_fsprivate. If
657 * it doesn't have one yet, then allocate one and initialize it. 657 * it doesn't have one yet, then allocate one and initialize it.
658 * The checks to see if one is there are in xfs_buf_item_init(). 658 * The checks to see if one is there are in xfs_buf_item_init().
659 */ 659 */
660 xfs_buf_item_init(bp, tp->t_mountp); 660 xfs_buf_item_init(bp, tp->t_mountp);
661 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 661 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
662 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 662 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
663 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 663 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
664 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 664 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
665 665
666 /* 666 /*
667 * Take a reference for this transaction on the buf item. 667 * Take a reference for this transaction on the buf item.
668 */ 668 */
669 atomic_inc(&bip->bli_refcount); 669 atomic_inc(&bip->bli_refcount);
670 670
671 /* 671 /*
672 * Get a log_item_desc to point at the new item. 672 * Get a log_item_desc to point at the new item.
673 */ 673 */
674 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip); 674 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
675 675
676 /* 676 /*
677 * Initialize b_fsprivate2 so we can find it with incore_match() 677 * Initialize b_fsprivate2 so we can find it with incore_match()
678 * in xfs_trans_get_buf() and friends above. 678 * in xfs_trans_get_buf() and friends above.
679 */ 679 */
680 XFS_BUF_SET_FSPRIVATE2(bp, tp); 680 XFS_BUF_SET_FSPRIVATE2(bp, tp);
681 681
682 xfs_buf_item_trace("BJOIN", bip); 682 xfs_buf_item_trace("BJOIN", bip);
683 } 683 }
684 684
685 /* 685 /*
686 * Mark the buffer as not needing to be unlocked when the buf item's 686 * Mark the buffer as not needing to be unlocked when the buf item's
687 * IOP_UNLOCK() routine is called. The buffer must already be locked 687 * IOP_UNLOCK() routine is called. The buffer must already be locked
688 * and associated with the given transaction. 688 * and associated with the given transaction.
689 */ 689 */
690 /* ARGSUSED */ 690 /* ARGSUSED */
691 void 691 void
692 xfs_trans_bhold(xfs_trans_t *tp, 692 xfs_trans_bhold(xfs_trans_t *tp,
693 xfs_buf_t *bp) 693 xfs_buf_t *bp)
694 { 694 {
695 xfs_buf_log_item_t *bip; 695 xfs_buf_log_item_t *bip;
696 696
697 ASSERT(XFS_BUF_ISBUSY(bp)); 697 ASSERT(XFS_BUF_ISBUSY(bp));
698 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 698 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
699 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 699 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
700 700
701 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 701 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
702 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 702 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
703 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 703 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
704 ASSERT(atomic_read(&bip->bli_refcount) > 0); 704 ASSERT(atomic_read(&bip->bli_refcount) > 0);
705 bip->bli_flags |= XFS_BLI_HOLD; 705 bip->bli_flags |= XFS_BLI_HOLD;
706 xfs_buf_item_trace("BHOLD", bip); 706 xfs_buf_item_trace("BHOLD", bip);
707 } 707 }
708 708
709 /* 709 /*
710 * Cancel the previous buffer hold request made on this buffer 710 * Cancel the previous buffer hold request made on this buffer
711 * for this transaction. 711 * for this transaction.
712 */ 712 */
713 void 713 void
714 xfs_trans_bhold_release(xfs_trans_t *tp, 714 xfs_trans_bhold_release(xfs_trans_t *tp,
715 xfs_buf_t *bp) 715 xfs_buf_t *bp)
716 { 716 {
717 xfs_buf_log_item_t *bip; 717 xfs_buf_log_item_t *bip;
718 718
719 ASSERT(XFS_BUF_ISBUSY(bp)); 719 ASSERT(XFS_BUF_ISBUSY(bp));
720 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 720 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
721 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 721 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
722 722
723 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 723 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
724 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 724 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
725 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 725 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
726 ASSERT(atomic_read(&bip->bli_refcount) > 0); 726 ASSERT(atomic_read(&bip->bli_refcount) > 0);
727 ASSERT(bip->bli_flags & XFS_BLI_HOLD); 727 ASSERT(bip->bli_flags & XFS_BLI_HOLD);
728 bip->bli_flags &= ~XFS_BLI_HOLD; 728 bip->bli_flags &= ~XFS_BLI_HOLD;
729 xfs_buf_item_trace("BHOLD RELEASE", bip); 729 xfs_buf_item_trace("BHOLD RELEASE", bip);
730 } 730 }
731 731
732 /* 732 /*
733 * This is called to mark bytes first through last inclusive of the given 733 * This is called to mark bytes first through last inclusive of the given
734 * buffer as needing to be logged when the transaction is committed. 734 * buffer as needing to be logged when the transaction is committed.
735 * The buffer must already be associated with the given transaction. 735 * The buffer must already be associated with the given transaction.
736 * 736 *
737 * First and last are numbers relative to the beginning of this buffer, 737 * First and last are numbers relative to the beginning of this buffer,
738 * so the first byte in the buffer is numbered 0 regardless of the 738 * so the first byte in the buffer is numbered 0 regardless of the
739 * value of b_blkno. 739 * value of b_blkno.
740 */ 740 */
741 void 741 void
742 xfs_trans_log_buf(xfs_trans_t *tp, 742 xfs_trans_log_buf(xfs_trans_t *tp,
743 xfs_buf_t *bp, 743 xfs_buf_t *bp,
744 uint first, 744 uint first,
745 uint last) 745 uint last)
746 { 746 {
747 xfs_buf_log_item_t *bip; 747 xfs_buf_log_item_t *bip;
748 xfs_log_item_desc_t *lidp; 748 xfs_log_item_desc_t *lidp;
749 749
750 ASSERT(XFS_BUF_ISBUSY(bp)); 750 ASSERT(XFS_BUF_ISBUSY(bp));
751 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 751 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
752 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 752 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
753 ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); 753 ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
754 ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) || 754 ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) ||
755 (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks)); 755 (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks));
756 756
757 /* 757 /*
758 * Mark the buffer as needing to be written out eventually, 758 * Mark the buffer as needing to be written out eventually,
759 * and set its iodone function to remove the buffer's buf log 759 * and set its iodone function to remove the buffer's buf log
760 * item from the AIL and free it when the buffer is flushed 760 * item from the AIL and free it when the buffer is flushed
761 * to disk. See xfs_buf_attach_iodone() for more details 761 * to disk. See xfs_buf_attach_iodone() for more details
762 * on li_cb and xfs_buf_iodone_callbacks(). 762 * on li_cb and xfs_buf_iodone_callbacks().
763 * If we end up aborting this transaction, we trap this buffer 763 * If we end up aborting this transaction, we trap this buffer
764 * inside the b_bdstrat callback so that this won't get written to 764 * inside the b_bdstrat callback so that this won't get written to
765 * disk. 765 * disk.
766 */ 766 */
767 XFS_BUF_DELAYWRITE(bp); 767 XFS_BUF_DELAYWRITE(bp);
768 XFS_BUF_DONE(bp); 768 XFS_BUF_DONE(bp);
769 769
770 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 770 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
771 ASSERT(atomic_read(&bip->bli_refcount) > 0); 771 ASSERT(atomic_read(&bip->bli_refcount) > 0);
772 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 772 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
773 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; 773 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone;
774 774
775 /* 775 /*
776 * If we invalidated the buffer within this transaction, then 776 * If we invalidated the buffer within this transaction, then
777 * cancel the invalidation now that we're dirtying the buffer 777 * cancel the invalidation now that we're dirtying the buffer
778 * again. There are no races with the code in xfs_buf_item_unpin(), 778 * again. There are no races with the code in xfs_buf_item_unpin(),
779 * because we have a reference to the buffer this entire time. 779 * because we have a reference to the buffer this entire time.
780 */ 780 */
781 if (bip->bli_flags & XFS_BLI_STALE) { 781 if (bip->bli_flags & XFS_BLI_STALE) {
782 xfs_buf_item_trace("BLOG UNSTALE", bip); 782 xfs_buf_item_trace("BLOG UNSTALE", bip);
783 bip->bli_flags &= ~XFS_BLI_STALE; 783 bip->bli_flags &= ~XFS_BLI_STALE;
784 ASSERT(XFS_BUF_ISSTALE(bp)); 784 ASSERT(XFS_BUF_ISSTALE(bp));
785 XFS_BUF_UNSTALE(bp); 785 XFS_BUF_UNSTALE(bp);
786 bip->bli_format.blf_flags &= ~XFS_BLI_CANCEL; 786 bip->bli_format.blf_flags &= ~XFS_BLI_CANCEL;
787 } 787 }
788 788
789 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 789 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
790 ASSERT(lidp != NULL); 790 ASSERT(lidp != NULL);
791 791
792 tp->t_flags |= XFS_TRANS_DIRTY; 792 tp->t_flags |= XFS_TRANS_DIRTY;
793 lidp->lid_flags |= XFS_LID_DIRTY; 793 lidp->lid_flags |= XFS_LID_DIRTY;
794 lidp->lid_flags &= ~XFS_LID_BUF_STALE; 794 lidp->lid_flags &= ~XFS_LID_BUF_STALE;
795 bip->bli_flags |= XFS_BLI_LOGGED; 795 bip->bli_flags |= XFS_BLI_LOGGED;
796 xfs_buf_item_log(bip, first, last); 796 xfs_buf_item_log(bip, first, last);
797 xfs_buf_item_trace("BLOG", bip); 797 xfs_buf_item_trace("BLOG", bip);
798 } 798 }
799 799
800 800
801 /* 801 /*
802 * This called to invalidate a buffer that is being used within 802 * This called to invalidate a buffer that is being used within
803 * a transaction. Typically this is because the blocks in the 803 * a transaction. Typically this is because the blocks in the
804 * buffer are being freed, so we need to prevent it from being 804 * buffer are being freed, so we need to prevent it from being
805 * written out when we're done. Allowing it to be written again 805 * written out when we're done. Allowing it to be written again
806 * might overwrite data in the free blocks if they are reallocated 806 * might overwrite data in the free blocks if they are reallocated
807 * to a file. 807 * to a file.
808 * 808 *
809 * We prevent the buffer from being written out by clearing the 809 * We prevent the buffer from being written out by clearing the
810 * B_DELWRI flag. We can't always 810 * B_DELWRI flag. We can't always
811 * get rid of the buf log item at this point, though, because 811 * get rid of the buf log item at this point, though, because
812 * the buffer may still be pinned by another transaction. If that 812 * the buffer may still be pinned by another transaction. If that
813 * is the case, then we'll wait until the buffer is committed to 813 * is the case, then we'll wait until the buffer is committed to
814 * disk for the last time (we can tell by the ref count) and 814 * disk for the last time (we can tell by the ref count) and
815 * free it in xfs_buf_item_unpin(). Until it is cleaned up we 815 * free it in xfs_buf_item_unpin(). Until it is cleaned up we
816 * will keep the buffer locked so that the buffer and buf log item 816 * will keep the buffer locked so that the buffer and buf log item
817 * are not reused. 817 * are not reused.
818 */ 818 */
819 void 819 void
820 xfs_trans_binval( 820 xfs_trans_binval(
821 xfs_trans_t *tp, 821 xfs_trans_t *tp,
822 xfs_buf_t *bp) 822 xfs_buf_t *bp)
823 { 823 {
824 xfs_log_item_desc_t *lidp; 824 xfs_log_item_desc_t *lidp;
825 xfs_buf_log_item_t *bip; 825 xfs_buf_log_item_t *bip;
826 826
827 ASSERT(XFS_BUF_ISBUSY(bp)); 827 ASSERT(XFS_BUF_ISBUSY(bp));
828 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 828 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
829 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 829 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
830 830
831 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 831 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
832 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 832 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
833 ASSERT(lidp != NULL); 833 ASSERT(lidp != NULL);
834 ASSERT(atomic_read(&bip->bli_refcount) > 0); 834 ASSERT(atomic_read(&bip->bli_refcount) > 0);
835 835
836 if (bip->bli_flags & XFS_BLI_STALE) { 836 if (bip->bli_flags & XFS_BLI_STALE) {
837 /* 837 /*
838 * If the buffer is already invalidated, then 838 * If the buffer is already invalidated, then
839 * just return. 839 * just return.
840 */ 840 */
841 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 841 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
842 ASSERT(XFS_BUF_ISSTALE(bp)); 842 ASSERT(XFS_BUF_ISSTALE(bp));
843 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); 843 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
844 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_INODE_BUF)); 844 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_INODE_BUF));
845 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 845 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
846 ASSERT(lidp->lid_flags & XFS_LID_DIRTY); 846 ASSERT(lidp->lid_flags & XFS_LID_DIRTY);
847 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 847 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
848 xfs_buftrace("XFS_BINVAL RECUR", bp); 848 xfs_buftrace("XFS_BINVAL RECUR", bp);
849 xfs_buf_item_trace("BINVAL RECUR", bip); 849 xfs_buf_item_trace("BINVAL RECUR", bip);
850 return; 850 return;
851 } 851 }
852 852
853 /* 853 /*
854 * Clear the dirty bit in the buffer and set the STALE flag 854 * Clear the dirty bit in the buffer and set the STALE flag
855 * in the buf log item. The STALE flag will be used in 855 * in the buf log item. The STALE flag will be used in
856 * xfs_buf_item_unpin() to determine if it should clean up 856 * xfs_buf_item_unpin() to determine if it should clean up
857 * when the last reference to the buf item is given up. 857 * when the last reference to the buf item is given up.
858 * We set the XFS_BLI_CANCEL flag in the buf log format structure 858 * We set the XFS_BLI_CANCEL flag in the buf log format structure
859 * and log the buf item. This will be used at recovery time 859 * and log the buf item. This will be used at recovery time
860 * to determine that copies of the buffer in the log before 860 * to determine that copies of the buffer in the log before
861 * this should not be replayed. 861 * this should not be replayed.
862 * We mark the item descriptor and the transaction dirty so 862 * We mark the item descriptor and the transaction dirty so
863 * that we'll hold the buffer until after the commit. 863 * that we'll hold the buffer until after the commit.
864 * 864 *
865 * Since we're invalidating the buffer, we also clear the state 865 * Since we're invalidating the buffer, we also clear the state
866 * about which parts of the buffer have been logged. We also 866 * about which parts of the buffer have been logged. We also
867 * clear the flag indicating that this is an inode buffer since 867 * clear the flag indicating that this is an inode buffer since
868 * the data in the buffer will no longer be valid. 868 * the data in the buffer will no longer be valid.
869 * 869 *
870 * We set the stale bit in the buffer as well since we're getting 870 * We set the stale bit in the buffer as well since we're getting
871 * rid of it. 871 * rid of it.
872 */ 872 */
873 XFS_BUF_UNDELAYWRITE(bp); 873 XFS_BUF_UNDELAYWRITE(bp);
874 XFS_BUF_STALE(bp); 874 XFS_BUF_STALE(bp);
875 bip->bli_flags |= XFS_BLI_STALE; 875 bip->bli_flags |= XFS_BLI_STALE;
876 bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY); 876 bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY);
877 bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF; 877 bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF;
878 bip->bli_format.blf_flags |= XFS_BLI_CANCEL; 878 bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
879 memset((char *)(bip->bli_format.blf_data_map), 0, 879 memset((char *)(bip->bli_format.blf_data_map), 0,
880 (bip->bli_format.blf_map_size * sizeof(uint))); 880 (bip->bli_format.blf_map_size * sizeof(uint)));
881 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; 881 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE;
882 tp->t_flags |= XFS_TRANS_DIRTY; 882 tp->t_flags |= XFS_TRANS_DIRTY;
883 xfs_buftrace("XFS_BINVAL", bp); 883 xfs_buftrace("XFS_BINVAL", bp);
884 xfs_buf_item_trace("BINVAL", bip); 884 xfs_buf_item_trace("BINVAL", bip);
885 } 885 }
886 886
887 /* 887 /*
888 * This call is used to indicate that the buffer contains on-disk 888 * This call is used to indicate that the buffer contains on-disk
889 * inodes which must be handled specially during recovery. They 889 * inodes which must be handled specially during recovery. They
890 * require special handling because only the di_next_unlinked from 890 * require special handling because only the di_next_unlinked from
891 * the inodes in the buffer should be recovered. The rest of the 891 * the inodes in the buffer should be recovered. The rest of the
892 * data in the buffer is logged via the inodes themselves. 892 * data in the buffer is logged via the inodes themselves.
893 * 893 *
894 * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log 894 * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log
895 * format structure so that we'll know what to do at recovery time. 895 * format structure so that we'll know what to do at recovery time.
896 */ 896 */
897 /* ARGSUSED */ 897 /* ARGSUSED */
898 void 898 void
899 xfs_trans_inode_buf( 899 xfs_trans_inode_buf(
900 xfs_trans_t *tp, 900 xfs_trans_t *tp,
901 xfs_buf_t *bp) 901 xfs_buf_t *bp)
902 { 902 {
903 xfs_buf_log_item_t *bip; 903 xfs_buf_log_item_t *bip;
904 904
905 ASSERT(XFS_BUF_ISBUSY(bp)); 905 ASSERT(XFS_BUF_ISBUSY(bp));
906 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 906 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
907 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 907 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
908 908
909 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 909 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
910 ASSERT(atomic_read(&bip->bli_refcount) > 0); 910 ASSERT(atomic_read(&bip->bli_refcount) > 0);
911 911
912 bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF; 912 bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF;
913 } 913 }
914 914
915 /* 915 /*
916 * This call is used to indicate that the buffer is going to 916 * This call is used to indicate that the buffer is going to
917 * be staled and was an inode buffer. This means it gets 917 * be staled and was an inode buffer. This means it gets
918 * special processing during unpin - where any inodes 918 * special processing during unpin - where any inodes
919 * associated with the buffer should be removed from ail. 919 * associated with the buffer should be removed from ail.
920 * There is also special processing during recovery, 920 * There is also special processing during recovery,
921 * any replay of the inodes in the buffer needs to be 921 * any replay of the inodes in the buffer needs to be
922 * prevented as the buffer may have been reused. 922 * prevented as the buffer may have been reused.
923 */ 923 */
924 void 924 void
925 xfs_trans_stale_inode_buf( 925 xfs_trans_stale_inode_buf(
926 xfs_trans_t *tp, 926 xfs_trans_t *tp,
927 xfs_buf_t *bp) 927 xfs_buf_t *bp)
928 { 928 {
929 xfs_buf_log_item_t *bip; 929 xfs_buf_log_item_t *bip;
930 930
931 ASSERT(XFS_BUF_ISBUSY(bp)); 931 ASSERT(XFS_BUF_ISBUSY(bp));
932 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 932 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
933 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 933 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
934 934
935 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 935 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
936 ASSERT(atomic_read(&bip->bli_refcount) > 0); 936 ASSERT(atomic_read(&bip->bli_refcount) > 0);
937 937
938 bip->bli_flags |= XFS_BLI_STALE_INODE; 938 bip->bli_flags |= XFS_BLI_STALE_INODE;
939 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) 939 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))
940 xfs_buf_iodone; 940 xfs_buf_iodone;
941 } 941 }
942 942
943 943
944 944
945 /* 945 /*
946 * Mark the buffer as being one which contains newly allocated 946 * Mark the buffer as being one which contains newly allocated
947 * inodes. We need to make sure that even if this buffer is 947 * inodes. We need to make sure that even if this buffer is
948 * relogged as an 'inode buf' we still recover all of the inode 948 * relogged as an 'inode buf' we still recover all of the inode
949 * images in the face of a crash. This works in coordination with 949 * images in the face of a crash. This works in coordination with
950 * xfs_buf_item_committed() to ensure that the buffer remains in the 950 * xfs_buf_item_committed() to ensure that the buffer remains in the
951 * AIL at its original location even after it has been relogged. 951 * AIL at its original location even after it has been relogged.
952 */ 952 */
953 /* ARGSUSED */ 953 /* ARGSUSED */
954 void 954 void
955 xfs_trans_inode_alloc_buf( 955 xfs_trans_inode_alloc_buf(
956 xfs_trans_t *tp, 956 xfs_trans_t *tp,
957 xfs_buf_t *bp) 957 xfs_buf_t *bp)
958 { 958 {
959 xfs_buf_log_item_t *bip; 959 xfs_buf_log_item_t *bip;
960 960
961 ASSERT(XFS_BUF_ISBUSY(bp)); 961 ASSERT(XFS_BUF_ISBUSY(bp));
962 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 962 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
963 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 963 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
964 964
965 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 965 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
966 ASSERT(atomic_read(&bip->bli_refcount) > 0); 966 ASSERT(atomic_read(&bip->bli_refcount) > 0);
967 967
968 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 968 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
969 bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF;
969 } 970 }
970 971
971 972
972 /* 973 /*
973 * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of 974 * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
974 * dquots. However, unlike in inode buffer recovery, dquot buffers get 975 * dquots. However, unlike in inode buffer recovery, dquot buffers get
975 * recovered in their entirety. (Hence, no XFS_BLI_DQUOT_ALLOC_BUF flag). 976 * recovered in their entirety. (Hence, no XFS_BLI_DQUOT_ALLOC_BUF flag).
976 * The only thing that makes dquot buffers different from regular 977 * The only thing that makes dquot buffers different from regular
977 * buffers is that we must not replay dquot bufs when recovering 978 * buffers is that we must not replay dquot bufs when recovering
978 * if a _corresponding_ quotaoff has happened. We also have to distinguish 979 * if a _corresponding_ quotaoff has happened. We also have to distinguish
979 * between usr dquot bufs and grp dquot bufs, because usr and grp quotas 980 * between usr dquot bufs and grp dquot bufs, because usr and grp quotas
980 * can be turned off independently. 981 * can be turned off independently.
981 */ 982 */
982 /* ARGSUSED */ 983 /* ARGSUSED */
983 void 984 void
984 xfs_trans_dquot_buf( 985 xfs_trans_dquot_buf(
985 xfs_trans_t *tp, 986 xfs_trans_t *tp,
986 xfs_buf_t *bp, 987 xfs_buf_t *bp,
987 uint type) 988 uint type)
988 { 989 {
989 xfs_buf_log_item_t *bip; 990 xfs_buf_log_item_t *bip;
990 991
991 ASSERT(XFS_BUF_ISBUSY(bp)); 992 ASSERT(XFS_BUF_ISBUSY(bp));
992 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 993 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
993 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 994 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
994 ASSERT(type == XFS_BLI_UDQUOT_BUF || 995 ASSERT(type == XFS_BLI_UDQUOT_BUF ||
995 type == XFS_BLI_PDQUOT_BUF || 996 type == XFS_BLI_PDQUOT_BUF ||
996 type == XFS_BLI_GDQUOT_BUF); 997 type == XFS_BLI_GDQUOT_BUF);
997 998
998 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 999 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
999 ASSERT(atomic_read(&bip->bli_refcount) > 0); 1000 ASSERT(atomic_read(&bip->bli_refcount) > 0);
1000 1001
1001 bip->bli_format.blf_flags |= type; 1002 bip->bli_format.blf_flags |= type;
1002 } 1003 }
1003 1004
1004 /* 1005 /*
1005 * Check to see if a buffer matching the given parameters is already 1006 * Check to see if a buffer matching the given parameters is already
1006 * a part of the given transaction. Only check the first, embedded 1007 * a part of the given transaction. Only check the first, embedded
1007 * chunk, since we don't want to spend all day scanning large transactions. 1008 * chunk, since we don't want to spend all day scanning large transactions.
1008 */ 1009 */
1009 STATIC xfs_buf_t * 1010 STATIC xfs_buf_t *
1010 xfs_trans_buf_item_match( 1011 xfs_trans_buf_item_match(
1011 xfs_trans_t *tp, 1012 xfs_trans_t *tp,
1012 xfs_buftarg_t *target, 1013 xfs_buftarg_t *target,
1013 xfs_daddr_t blkno, 1014 xfs_daddr_t blkno,
1014 int len) 1015 int len)
1015 { 1016 {
1016 xfs_log_item_chunk_t *licp; 1017 xfs_log_item_chunk_t *licp;
1017 xfs_log_item_desc_t *lidp; 1018 xfs_log_item_desc_t *lidp;
1018 xfs_buf_log_item_t *blip; 1019 xfs_buf_log_item_t *blip;
1019 xfs_buf_t *bp; 1020 xfs_buf_t *bp;
1020 int i; 1021 int i;
1021 1022
1022 bp = NULL; 1023 bp = NULL;
1023 len = BBTOB(len); 1024 len = BBTOB(len);
1024 licp = &tp->t_items; 1025 licp = &tp->t_items;
1025 if (!XFS_LIC_ARE_ALL_FREE(licp)) { 1026 if (!XFS_LIC_ARE_ALL_FREE(licp)) {
1026 for (i = 0; i < licp->lic_unused; i++) { 1027 for (i = 0; i < licp->lic_unused; i++) {
1027 /* 1028 /*
1028 * Skip unoccupied slots. 1029 * Skip unoccupied slots.
1029 */ 1030 */
1030 if (XFS_LIC_ISFREE(licp, i)) { 1031 if (XFS_LIC_ISFREE(licp, i)) {
1031 continue; 1032 continue;
1032 } 1033 }
1033 1034
1034 lidp = XFS_LIC_SLOT(licp, i); 1035 lidp = XFS_LIC_SLOT(licp, i);
1035 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1036 blip = (xfs_buf_log_item_t *)lidp->lid_item;
1036 if (blip->bli_item.li_type != XFS_LI_BUF) { 1037 if (blip->bli_item.li_type != XFS_LI_BUF) {
1037 continue; 1038 continue;
1038 } 1039 }
1039 1040
1040 bp = blip->bli_buf; 1041 bp = blip->bli_buf;
1041 if ((XFS_BUF_TARGET(bp) == target) && 1042 if ((XFS_BUF_TARGET(bp) == target) &&
1042 (XFS_BUF_ADDR(bp) == blkno) && 1043 (XFS_BUF_ADDR(bp) == blkno) &&
1043 (XFS_BUF_COUNT(bp) == len)) { 1044 (XFS_BUF_COUNT(bp) == len)) {
1044 /* 1045 /*
1045 * We found it. Break out and 1046 * We found it. Break out and
1046 * return the pointer to the buffer. 1047 * return the pointer to the buffer.
1047 */ 1048 */
1048 break; 1049 break;
1049 } else { 1050 } else {
1050 bp = NULL; 1051 bp = NULL;
1051 } 1052 }
1052 } 1053 }
1053 } 1054 }
1054 return bp; 1055 return bp;
1055 } 1056 }
1056 1057
1057 /* 1058 /*
1058 * Check to see if a buffer matching the given parameters is already 1059 * Check to see if a buffer matching the given parameters is already
1059 * a part of the given transaction. Check all the chunks, we 1060 * a part of the given transaction. Check all the chunks, we
1060 * want to be thorough. 1061 * want to be thorough.
1061 */ 1062 */
1062 STATIC xfs_buf_t * 1063 STATIC xfs_buf_t *
1063 xfs_trans_buf_item_match_all( 1064 xfs_trans_buf_item_match_all(
1064 xfs_trans_t *tp, 1065 xfs_trans_t *tp,
1065 xfs_buftarg_t *target, 1066 xfs_buftarg_t *target,
1066 xfs_daddr_t blkno, 1067 xfs_daddr_t blkno,
1067 int len) 1068 int len)
1068 { 1069 {
1069 xfs_log_item_chunk_t *licp; 1070 xfs_log_item_chunk_t *licp;
1070 xfs_log_item_desc_t *lidp; 1071 xfs_log_item_desc_t *lidp;
1071 xfs_buf_log_item_t *blip; 1072 xfs_buf_log_item_t *blip;
1072 xfs_buf_t *bp; 1073 xfs_buf_t *bp;
1073 int i; 1074 int i;
1074 1075
1075 bp = NULL; 1076 bp = NULL;
1076 len = BBTOB(len); 1077 len = BBTOB(len);
1077 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { 1078 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
1078 if (XFS_LIC_ARE_ALL_FREE(licp)) { 1079 if (XFS_LIC_ARE_ALL_FREE(licp)) {
1079 ASSERT(licp == &tp->t_items); 1080 ASSERT(licp == &tp->t_items);
1080 ASSERT(licp->lic_next == NULL); 1081 ASSERT(licp->lic_next == NULL);
1081 return NULL; 1082 return NULL;
1082 } 1083 }
1083 for (i = 0; i < licp->lic_unused; i++) { 1084 for (i = 0; i < licp->lic_unused; i++) {
1084 /* 1085 /*
1085 * Skip unoccupied slots. 1086 * Skip unoccupied slots.
1086 */ 1087 */
1087 if (XFS_LIC_ISFREE(licp, i)) { 1088 if (XFS_LIC_ISFREE(licp, i)) {
1088 continue; 1089 continue;
1089 } 1090 }
1090 1091
1091 lidp = XFS_LIC_SLOT(licp, i); 1092 lidp = XFS_LIC_SLOT(licp, i);
1092 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1093 blip = (xfs_buf_log_item_t *)lidp->lid_item;
1093 if (blip->bli_item.li_type != XFS_LI_BUF) { 1094 if (blip->bli_item.li_type != XFS_LI_BUF) {
1094 continue; 1095 continue;
1095 } 1096 }
1096 1097
1097 bp = blip->bli_buf; 1098 bp = blip->bli_buf;
1098 if ((XFS_BUF_TARGET(bp) == target) && 1099 if ((XFS_BUF_TARGET(bp) == target) &&
1099 (XFS_BUF_ADDR(bp) == blkno) && 1100 (XFS_BUF_ADDR(bp) == blkno) &&
1100 (XFS_BUF_COUNT(bp) == len)) { 1101 (XFS_BUF_COUNT(bp) == len)) {
1101 /* 1102 /*
1102 * We found it. Break out and 1103 * We found it. Break out and
1103 * return the pointer to the buffer. 1104 * return the pointer to the buffer.
1104 */ 1105 */
1105 return bp; 1106 return bp;
1106 } 1107 }
1107 } 1108 }
1108 } 1109 }
1109 return NULL; 1110 return NULL;
1110 } 1111 }
1111 1112