Commit 15ac08a8b2c129abccf1be47b6ab09491e013db2
Committed by
Lachlan McIlroy
1 parent
e055f13a6d
Exists in
master
and in
7 other branches
[XFS] replace b_fspriv with b_mount
Replace the b_fspriv pointer and it's ugly accessors with a properly types xfs_mount pointer. Also switch log reocvery over to it instead of using b_fspriv for the mount pointer. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <david@fromorbit.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Showing 6 changed files with 19 additions and 32 deletions Inline Diff
fs/xfs/linux-2.6/xfs_buf.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include <linux/stddef.h> | 19 | #include <linux/stddef.h> |
20 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/vmalloc.h> | 24 | #include <linux/vmalloc.h> |
25 | #include <linux/bio.h> | 25 | #include <linux/bio.h> |
26 | #include <linux/sysctl.h> | 26 | #include <linux/sysctl.h> |
27 | #include <linux/proc_fs.h> | 27 | #include <linux/proc_fs.h> |
28 | #include <linux/workqueue.h> | 28 | #include <linux/workqueue.h> |
29 | #include <linux/percpu.h> | 29 | #include <linux/percpu.h> |
30 | #include <linux/blkdev.h> | 30 | #include <linux/blkdev.h> |
31 | #include <linux/hash.h> | 31 | #include <linux/hash.h> |
32 | #include <linux/kthread.h> | 32 | #include <linux/kthread.h> |
33 | #include <linux/migrate.h> | 33 | #include <linux/migrate.h> |
34 | #include <linux/backing-dev.h> | 34 | #include <linux/backing-dev.h> |
35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
36 | 36 | ||
37 | static kmem_zone_t *xfs_buf_zone; | 37 | static kmem_zone_t *xfs_buf_zone; |
38 | STATIC int xfsbufd(void *); | 38 | STATIC int xfsbufd(void *); |
39 | STATIC int xfsbufd_wakeup(int, gfp_t); | 39 | STATIC int xfsbufd_wakeup(int, gfp_t); |
40 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 40 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
41 | static struct shrinker xfs_buf_shake = { | 41 | static struct shrinker xfs_buf_shake = { |
42 | .shrink = xfsbufd_wakeup, | 42 | .shrink = xfsbufd_wakeup, |
43 | .seeks = DEFAULT_SEEKS, | 43 | .seeks = DEFAULT_SEEKS, |
44 | }; | 44 | }; |
45 | 45 | ||
46 | static struct workqueue_struct *xfslogd_workqueue; | 46 | static struct workqueue_struct *xfslogd_workqueue; |
47 | struct workqueue_struct *xfsdatad_workqueue; | 47 | struct workqueue_struct *xfsdatad_workqueue; |
48 | 48 | ||
49 | #ifdef XFS_BUF_TRACE | 49 | #ifdef XFS_BUF_TRACE |
50 | void | 50 | void |
51 | xfs_buf_trace( | 51 | xfs_buf_trace( |
52 | xfs_buf_t *bp, | 52 | xfs_buf_t *bp, |
53 | char *id, | 53 | char *id, |
54 | void *data, | 54 | void *data, |
55 | void *ra) | 55 | void *ra) |
56 | { | 56 | { |
57 | ktrace_enter(xfs_buf_trace_buf, | 57 | ktrace_enter(xfs_buf_trace_buf, |
58 | bp, id, | 58 | bp, id, |
59 | (void *)(unsigned long)bp->b_flags, | 59 | (void *)(unsigned long)bp->b_flags, |
60 | (void *)(unsigned long)bp->b_hold.counter, | 60 | (void *)(unsigned long)bp->b_hold.counter, |
61 | (void *)(unsigned long)bp->b_sema.count, | 61 | (void *)(unsigned long)bp->b_sema.count, |
62 | (void *)current, | 62 | (void *)current, |
63 | data, ra, | 63 | data, ra, |
64 | (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), | 64 | (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), |
65 | (void *)(unsigned long)(bp->b_file_offset & 0xffffffff), | 65 | (void *)(unsigned long)(bp->b_file_offset & 0xffffffff), |
66 | (void *)(unsigned long)bp->b_buffer_length, | 66 | (void *)(unsigned long)bp->b_buffer_length, |
67 | NULL, NULL, NULL, NULL, NULL); | 67 | NULL, NULL, NULL, NULL, NULL); |
68 | } | 68 | } |
69 | ktrace_t *xfs_buf_trace_buf; | 69 | ktrace_t *xfs_buf_trace_buf; |
70 | #define XFS_BUF_TRACE_SIZE 4096 | 70 | #define XFS_BUF_TRACE_SIZE 4096 |
71 | #define XB_TRACE(bp, id, data) \ | 71 | #define XB_TRACE(bp, id, data) \ |
72 | xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0)) | 72 | xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0)) |
73 | #else | 73 | #else |
74 | #define XB_TRACE(bp, id, data) do { } while (0) | 74 | #define XB_TRACE(bp, id, data) do { } while (0) |
75 | #endif | 75 | #endif |
76 | 76 | ||
77 | #ifdef XFS_BUF_LOCK_TRACKING | 77 | #ifdef XFS_BUF_LOCK_TRACKING |
78 | # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) | 78 | # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) |
79 | # define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) | 79 | # define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) |
80 | # define XB_GET_OWNER(bp) ((bp)->b_last_holder) | 80 | # define XB_GET_OWNER(bp) ((bp)->b_last_holder) |
81 | #else | 81 | #else |
82 | # define XB_SET_OWNER(bp) do { } while (0) | 82 | # define XB_SET_OWNER(bp) do { } while (0) |
83 | # define XB_CLEAR_OWNER(bp) do { } while (0) | 83 | # define XB_CLEAR_OWNER(bp) do { } while (0) |
84 | # define XB_GET_OWNER(bp) do { } while (0) | 84 | # define XB_GET_OWNER(bp) do { } while (0) |
85 | #endif | 85 | #endif |
86 | 86 | ||
87 | #define xb_to_gfp(flags) \ | 87 | #define xb_to_gfp(flags) \ |
88 | ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ | 88 | ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ |
89 | ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) | 89 | ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) |
90 | 90 | ||
91 | #define xb_to_km(flags) \ | 91 | #define xb_to_km(flags) \ |
92 | (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) | 92 | (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) |
93 | 93 | ||
94 | #define xfs_buf_allocate(flags) \ | 94 | #define xfs_buf_allocate(flags) \ |
95 | kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) | 95 | kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) |
96 | #define xfs_buf_deallocate(bp) \ | 96 | #define xfs_buf_deallocate(bp) \ |
97 | kmem_zone_free(xfs_buf_zone, (bp)); | 97 | kmem_zone_free(xfs_buf_zone, (bp)); |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * Page Region interfaces. | 100 | * Page Region interfaces. |
101 | * | 101 | * |
102 | * For pages in filesystems where the blocksize is smaller than the | 102 | * For pages in filesystems where the blocksize is smaller than the |
103 | * pagesize, we use the page->private field (long) to hold a bitmap | 103 | * pagesize, we use the page->private field (long) to hold a bitmap |
104 | * of uptodate regions within the page. | 104 | * of uptodate regions within the page. |
105 | * | 105 | * |
106 | * Each such region is "bytes per page / bits per long" bytes long. | 106 | * Each such region is "bytes per page / bits per long" bytes long. |
107 | * | 107 | * |
108 | * NBPPR == number-of-bytes-per-page-region | 108 | * NBPPR == number-of-bytes-per-page-region |
109 | * BTOPR == bytes-to-page-region (rounded up) | 109 | * BTOPR == bytes-to-page-region (rounded up) |
110 | * BTOPRT == bytes-to-page-region-truncated (rounded down) | 110 | * BTOPRT == bytes-to-page-region-truncated (rounded down) |
111 | */ | 111 | */ |
112 | #if (BITS_PER_LONG == 32) | 112 | #if (BITS_PER_LONG == 32) |
113 | #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ | 113 | #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ |
114 | #elif (BITS_PER_LONG == 64) | 114 | #elif (BITS_PER_LONG == 64) |
115 | #define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ | 115 | #define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ |
116 | #else | 116 | #else |
117 | #error BITS_PER_LONG must be 32 or 64 | 117 | #error BITS_PER_LONG must be 32 or 64 |
118 | #endif | 118 | #endif |
119 | #define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) | 119 | #define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) |
120 | #define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) | 120 | #define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) |
121 | #define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) | 121 | #define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) |
122 | 122 | ||
123 | STATIC unsigned long | 123 | STATIC unsigned long |
124 | page_region_mask( | 124 | page_region_mask( |
125 | size_t offset, | 125 | size_t offset, |
126 | size_t length) | 126 | size_t length) |
127 | { | 127 | { |
128 | unsigned long mask; | 128 | unsigned long mask; |
129 | int first, final; | 129 | int first, final; |
130 | 130 | ||
131 | first = BTOPR(offset); | 131 | first = BTOPR(offset); |
132 | final = BTOPRT(offset + length - 1); | 132 | final = BTOPRT(offset + length - 1); |
133 | first = min(first, final); | 133 | first = min(first, final); |
134 | 134 | ||
135 | mask = ~0UL; | 135 | mask = ~0UL; |
136 | mask <<= BITS_PER_LONG - (final - first); | 136 | mask <<= BITS_PER_LONG - (final - first); |
137 | mask >>= BITS_PER_LONG - (final); | 137 | mask >>= BITS_PER_LONG - (final); |
138 | 138 | ||
139 | ASSERT(offset + length <= PAGE_CACHE_SIZE); | 139 | ASSERT(offset + length <= PAGE_CACHE_SIZE); |
140 | ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); | 140 | ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); |
141 | 141 | ||
142 | return mask; | 142 | return mask; |
143 | } | 143 | } |
144 | 144 | ||
145 | STATIC_INLINE void | 145 | STATIC_INLINE void |
146 | set_page_region( | 146 | set_page_region( |
147 | struct page *page, | 147 | struct page *page, |
148 | size_t offset, | 148 | size_t offset, |
149 | size_t length) | 149 | size_t length) |
150 | { | 150 | { |
151 | set_page_private(page, | 151 | set_page_private(page, |
152 | page_private(page) | page_region_mask(offset, length)); | 152 | page_private(page) | page_region_mask(offset, length)); |
153 | if (page_private(page) == ~0UL) | 153 | if (page_private(page) == ~0UL) |
154 | SetPageUptodate(page); | 154 | SetPageUptodate(page); |
155 | } | 155 | } |
156 | 156 | ||
157 | STATIC_INLINE int | 157 | STATIC_INLINE int |
158 | test_page_region( | 158 | test_page_region( |
159 | struct page *page, | 159 | struct page *page, |
160 | size_t offset, | 160 | size_t offset, |
161 | size_t length) | 161 | size_t length) |
162 | { | 162 | { |
163 | unsigned long mask = page_region_mask(offset, length); | 163 | unsigned long mask = page_region_mask(offset, length); |
164 | 164 | ||
165 | return (mask && (page_private(page) & mask) == mask); | 165 | return (mask && (page_private(page) & mask) == mask); |
166 | } | 166 | } |
167 | 167 | ||
168 | /* | 168 | /* |
169 | * Mapping of multi-page buffers into contiguous virtual space | 169 | * Mapping of multi-page buffers into contiguous virtual space |
170 | */ | 170 | */ |
171 | 171 | ||
172 | typedef struct a_list { | 172 | typedef struct a_list { |
173 | void *vm_addr; | 173 | void *vm_addr; |
174 | struct a_list *next; | 174 | struct a_list *next; |
175 | } a_list_t; | 175 | } a_list_t; |
176 | 176 | ||
177 | static a_list_t *as_free_head; | 177 | static a_list_t *as_free_head; |
178 | static int as_list_len; | 178 | static int as_list_len; |
179 | static DEFINE_SPINLOCK(as_lock); | 179 | static DEFINE_SPINLOCK(as_lock); |
180 | 180 | ||
181 | /* | 181 | /* |
182 | * Try to batch vunmaps because they are costly. | 182 | * Try to batch vunmaps because they are costly. |
183 | */ | 183 | */ |
184 | STATIC void | 184 | STATIC void |
185 | free_address( | 185 | free_address( |
186 | void *addr) | 186 | void *addr) |
187 | { | 187 | { |
188 | a_list_t *aentry; | 188 | a_list_t *aentry; |
189 | 189 | ||
190 | #ifdef CONFIG_XEN | 190 | #ifdef CONFIG_XEN |
191 | /* | 191 | /* |
192 | * Xen needs to be able to make sure it can get an exclusive | 192 | * Xen needs to be able to make sure it can get an exclusive |
193 | * RO mapping of pages it wants to turn into a pagetable. If | 193 | * RO mapping of pages it wants to turn into a pagetable. If |
194 | * a newly allocated page is also still being vmap()ed by xfs, | 194 | * a newly allocated page is also still being vmap()ed by xfs, |
195 | * it will cause pagetable construction to fail. This is a | 195 | * it will cause pagetable construction to fail. This is a |
196 | * quick workaround to always eagerly unmap pages so that Xen | 196 | * quick workaround to always eagerly unmap pages so that Xen |
197 | * is happy. | 197 | * is happy. |
198 | */ | 198 | */ |
199 | vunmap(addr); | 199 | vunmap(addr); |
200 | return; | 200 | return; |
201 | #endif | 201 | #endif |
202 | 202 | ||
203 | aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT); | 203 | aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT); |
204 | if (likely(aentry)) { | 204 | if (likely(aentry)) { |
205 | spin_lock(&as_lock); | 205 | spin_lock(&as_lock); |
206 | aentry->next = as_free_head; | 206 | aentry->next = as_free_head; |
207 | aentry->vm_addr = addr; | 207 | aentry->vm_addr = addr; |
208 | as_free_head = aentry; | 208 | as_free_head = aentry; |
209 | as_list_len++; | 209 | as_list_len++; |
210 | spin_unlock(&as_lock); | 210 | spin_unlock(&as_lock); |
211 | } else { | 211 | } else { |
212 | vunmap(addr); | 212 | vunmap(addr); |
213 | } | 213 | } |
214 | } | 214 | } |
215 | 215 | ||
216 | STATIC void | 216 | STATIC void |
217 | purge_addresses(void) | 217 | purge_addresses(void) |
218 | { | 218 | { |
219 | a_list_t *aentry, *old; | 219 | a_list_t *aentry, *old; |
220 | 220 | ||
221 | if (as_free_head == NULL) | 221 | if (as_free_head == NULL) |
222 | return; | 222 | return; |
223 | 223 | ||
224 | spin_lock(&as_lock); | 224 | spin_lock(&as_lock); |
225 | aentry = as_free_head; | 225 | aentry = as_free_head; |
226 | as_free_head = NULL; | 226 | as_free_head = NULL; |
227 | as_list_len = 0; | 227 | as_list_len = 0; |
228 | spin_unlock(&as_lock); | 228 | spin_unlock(&as_lock); |
229 | 229 | ||
230 | while ((old = aentry) != NULL) { | 230 | while ((old = aentry) != NULL) { |
231 | vunmap(aentry->vm_addr); | 231 | vunmap(aentry->vm_addr); |
232 | aentry = aentry->next; | 232 | aentry = aentry->next; |
233 | kfree(old); | 233 | kfree(old); |
234 | } | 234 | } |
235 | } | 235 | } |
236 | 236 | ||
237 | /* | 237 | /* |
238 | * Internal xfs_buf_t object manipulation | 238 | * Internal xfs_buf_t object manipulation |
239 | */ | 239 | */ |
240 | 240 | ||
241 | STATIC void | 241 | STATIC void |
242 | _xfs_buf_initialize( | 242 | _xfs_buf_initialize( |
243 | xfs_buf_t *bp, | 243 | xfs_buf_t *bp, |
244 | xfs_buftarg_t *target, | 244 | xfs_buftarg_t *target, |
245 | xfs_off_t range_base, | 245 | xfs_off_t range_base, |
246 | size_t range_length, | 246 | size_t range_length, |
247 | xfs_buf_flags_t flags) | 247 | xfs_buf_flags_t flags) |
248 | { | 248 | { |
249 | /* | 249 | /* |
250 | * We don't want certain flags to appear in b_flags. | 250 | * We don't want certain flags to appear in b_flags. |
251 | */ | 251 | */ |
252 | flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); | 252 | flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); |
253 | 253 | ||
254 | memset(bp, 0, sizeof(xfs_buf_t)); | 254 | memset(bp, 0, sizeof(xfs_buf_t)); |
255 | atomic_set(&bp->b_hold, 1); | 255 | atomic_set(&bp->b_hold, 1); |
256 | init_completion(&bp->b_iowait); | 256 | init_completion(&bp->b_iowait); |
257 | INIT_LIST_HEAD(&bp->b_list); | 257 | INIT_LIST_HEAD(&bp->b_list); |
258 | INIT_LIST_HEAD(&bp->b_hash_list); | 258 | INIT_LIST_HEAD(&bp->b_hash_list); |
259 | init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ | 259 | init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ |
260 | XB_SET_OWNER(bp); | 260 | XB_SET_OWNER(bp); |
261 | bp->b_target = target; | 261 | bp->b_target = target; |
262 | bp->b_file_offset = range_base; | 262 | bp->b_file_offset = range_base; |
263 | /* | 263 | /* |
264 | * Set buffer_length and count_desired to the same value initially. | 264 | * Set buffer_length and count_desired to the same value initially. |
265 | * I/O routines should use count_desired, which will be the same in | 265 | * I/O routines should use count_desired, which will be the same in |
266 | * most cases but may be reset (e.g. XFS recovery). | 266 | * most cases but may be reset (e.g. XFS recovery). |
267 | */ | 267 | */ |
268 | bp->b_buffer_length = bp->b_count_desired = range_length; | 268 | bp->b_buffer_length = bp->b_count_desired = range_length; |
269 | bp->b_flags = flags; | 269 | bp->b_flags = flags; |
270 | bp->b_bn = XFS_BUF_DADDR_NULL; | 270 | bp->b_bn = XFS_BUF_DADDR_NULL; |
271 | atomic_set(&bp->b_pin_count, 0); | 271 | atomic_set(&bp->b_pin_count, 0); |
272 | init_waitqueue_head(&bp->b_waiters); | 272 | init_waitqueue_head(&bp->b_waiters); |
273 | 273 | ||
274 | XFS_STATS_INC(xb_create); | 274 | XFS_STATS_INC(xb_create); |
275 | XB_TRACE(bp, "initialize", target); | 275 | XB_TRACE(bp, "initialize", target); |
276 | } | 276 | } |
277 | 277 | ||
278 | /* | 278 | /* |
279 | * Allocate a page array capable of holding a specified number | 279 | * Allocate a page array capable of holding a specified number |
280 | * of pages, and point the page buf at it. | 280 | * of pages, and point the page buf at it. |
281 | */ | 281 | */ |
282 | STATIC int | 282 | STATIC int |
283 | _xfs_buf_get_pages( | 283 | _xfs_buf_get_pages( |
284 | xfs_buf_t *bp, | 284 | xfs_buf_t *bp, |
285 | int page_count, | 285 | int page_count, |
286 | xfs_buf_flags_t flags) | 286 | xfs_buf_flags_t flags) |
287 | { | 287 | { |
288 | /* Make sure that we have a page list */ | 288 | /* Make sure that we have a page list */ |
289 | if (bp->b_pages == NULL) { | 289 | if (bp->b_pages == NULL) { |
290 | bp->b_offset = xfs_buf_poff(bp->b_file_offset); | 290 | bp->b_offset = xfs_buf_poff(bp->b_file_offset); |
291 | bp->b_page_count = page_count; | 291 | bp->b_page_count = page_count; |
292 | if (page_count <= XB_PAGES) { | 292 | if (page_count <= XB_PAGES) { |
293 | bp->b_pages = bp->b_page_array; | 293 | bp->b_pages = bp->b_page_array; |
294 | } else { | 294 | } else { |
295 | bp->b_pages = kmem_alloc(sizeof(struct page *) * | 295 | bp->b_pages = kmem_alloc(sizeof(struct page *) * |
296 | page_count, xb_to_km(flags)); | 296 | page_count, xb_to_km(flags)); |
297 | if (bp->b_pages == NULL) | 297 | if (bp->b_pages == NULL) |
298 | return -ENOMEM; | 298 | return -ENOMEM; |
299 | } | 299 | } |
300 | memset(bp->b_pages, 0, sizeof(struct page *) * page_count); | 300 | memset(bp->b_pages, 0, sizeof(struct page *) * page_count); |
301 | } | 301 | } |
302 | return 0; | 302 | return 0; |
303 | } | 303 | } |
304 | 304 | ||
305 | /* | 305 | /* |
306 | * Frees b_pages if it was allocated. | 306 | * Frees b_pages if it was allocated. |
307 | */ | 307 | */ |
308 | STATIC void | 308 | STATIC void |
309 | _xfs_buf_free_pages( | 309 | _xfs_buf_free_pages( |
310 | xfs_buf_t *bp) | 310 | xfs_buf_t *bp) |
311 | { | 311 | { |
312 | if (bp->b_pages != bp->b_page_array) { | 312 | if (bp->b_pages != bp->b_page_array) { |
313 | kmem_free(bp->b_pages); | 313 | kmem_free(bp->b_pages); |
314 | } | 314 | } |
315 | } | 315 | } |
316 | 316 | ||
317 | /* | 317 | /* |
318 | * Releases the specified buffer. | 318 | * Releases the specified buffer. |
319 | * | 319 | * |
320 | * The modification state of any associated pages is left unchanged. | 320 | * The modification state of any associated pages is left unchanged. |
321 | * The buffer most not be on any hash - use xfs_buf_rele instead for | 321 | * The buffer most not be on any hash - use xfs_buf_rele instead for |
322 | * hashed and refcounted buffers | 322 | * hashed and refcounted buffers |
323 | */ | 323 | */ |
324 | void | 324 | void |
325 | xfs_buf_free( | 325 | xfs_buf_free( |
326 | xfs_buf_t *bp) | 326 | xfs_buf_t *bp) |
327 | { | 327 | { |
328 | XB_TRACE(bp, "free", 0); | 328 | XB_TRACE(bp, "free", 0); |
329 | 329 | ||
330 | ASSERT(list_empty(&bp->b_hash_list)); | 330 | ASSERT(list_empty(&bp->b_hash_list)); |
331 | 331 | ||
332 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 332 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { |
333 | uint i; | 333 | uint i; |
334 | 334 | ||
335 | if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) | 335 | if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) |
336 | free_address(bp->b_addr - bp->b_offset); | 336 | free_address(bp->b_addr - bp->b_offset); |
337 | 337 | ||
338 | for (i = 0; i < bp->b_page_count; i++) { | 338 | for (i = 0; i < bp->b_page_count; i++) { |
339 | struct page *page = bp->b_pages[i]; | 339 | struct page *page = bp->b_pages[i]; |
340 | 340 | ||
341 | if (bp->b_flags & _XBF_PAGE_CACHE) | 341 | if (bp->b_flags & _XBF_PAGE_CACHE) |
342 | ASSERT(!PagePrivate(page)); | 342 | ASSERT(!PagePrivate(page)); |
343 | page_cache_release(page); | 343 | page_cache_release(page); |
344 | } | 344 | } |
345 | _xfs_buf_free_pages(bp); | 345 | _xfs_buf_free_pages(bp); |
346 | } | 346 | } |
347 | 347 | ||
348 | xfs_buf_deallocate(bp); | 348 | xfs_buf_deallocate(bp); |
349 | } | 349 | } |
350 | 350 | ||
351 | /* | 351 | /* |
352 | * Finds all pages for buffer in question and builds it's page list. | 352 | * Finds all pages for buffer in question and builds it's page list. |
353 | */ | 353 | */ |
354 | STATIC int | 354 | STATIC int |
355 | _xfs_buf_lookup_pages( | 355 | _xfs_buf_lookup_pages( |
356 | xfs_buf_t *bp, | 356 | xfs_buf_t *bp, |
357 | uint flags) | 357 | uint flags) |
358 | { | 358 | { |
359 | struct address_space *mapping = bp->b_target->bt_mapping; | 359 | struct address_space *mapping = bp->b_target->bt_mapping; |
360 | size_t blocksize = bp->b_target->bt_bsize; | 360 | size_t blocksize = bp->b_target->bt_bsize; |
361 | size_t size = bp->b_count_desired; | 361 | size_t size = bp->b_count_desired; |
362 | size_t nbytes, offset; | 362 | size_t nbytes, offset; |
363 | gfp_t gfp_mask = xb_to_gfp(flags); | 363 | gfp_t gfp_mask = xb_to_gfp(flags); |
364 | unsigned short page_count, i; | 364 | unsigned short page_count, i; |
365 | pgoff_t first; | 365 | pgoff_t first; |
366 | xfs_off_t end; | 366 | xfs_off_t end; |
367 | int error; | 367 | int error; |
368 | 368 | ||
369 | end = bp->b_file_offset + bp->b_buffer_length; | 369 | end = bp->b_file_offset + bp->b_buffer_length; |
370 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); | 370 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); |
371 | 371 | ||
372 | error = _xfs_buf_get_pages(bp, page_count, flags); | 372 | error = _xfs_buf_get_pages(bp, page_count, flags); |
373 | if (unlikely(error)) | 373 | if (unlikely(error)) |
374 | return error; | 374 | return error; |
375 | bp->b_flags |= _XBF_PAGE_CACHE; | 375 | bp->b_flags |= _XBF_PAGE_CACHE; |
376 | 376 | ||
377 | offset = bp->b_offset; | 377 | offset = bp->b_offset; |
378 | first = bp->b_file_offset >> PAGE_CACHE_SHIFT; | 378 | first = bp->b_file_offset >> PAGE_CACHE_SHIFT; |
379 | 379 | ||
380 | for (i = 0; i < bp->b_page_count; i++) { | 380 | for (i = 0; i < bp->b_page_count; i++) { |
381 | struct page *page; | 381 | struct page *page; |
382 | uint retries = 0; | 382 | uint retries = 0; |
383 | 383 | ||
384 | retry: | 384 | retry: |
385 | page = find_or_create_page(mapping, first + i, gfp_mask); | 385 | page = find_or_create_page(mapping, first + i, gfp_mask); |
386 | if (unlikely(page == NULL)) { | 386 | if (unlikely(page == NULL)) { |
387 | if (flags & XBF_READ_AHEAD) { | 387 | if (flags & XBF_READ_AHEAD) { |
388 | bp->b_page_count = i; | 388 | bp->b_page_count = i; |
389 | for (i = 0; i < bp->b_page_count; i++) | 389 | for (i = 0; i < bp->b_page_count; i++) |
390 | unlock_page(bp->b_pages[i]); | 390 | unlock_page(bp->b_pages[i]); |
391 | return -ENOMEM; | 391 | return -ENOMEM; |
392 | } | 392 | } |
393 | 393 | ||
394 | /* | 394 | /* |
395 | * This could deadlock. | 395 | * This could deadlock. |
396 | * | 396 | * |
397 | * But until all the XFS lowlevel code is revamped to | 397 | * But until all the XFS lowlevel code is revamped to |
398 | * handle buffer allocation failures we can't do much. | 398 | * handle buffer allocation failures we can't do much. |
399 | */ | 399 | */ |
400 | if (!(++retries % 100)) | 400 | if (!(++retries % 100)) |
401 | printk(KERN_ERR | 401 | printk(KERN_ERR |
402 | "XFS: possible memory allocation " | 402 | "XFS: possible memory allocation " |
403 | "deadlock in %s (mode:0x%x)\n", | 403 | "deadlock in %s (mode:0x%x)\n", |
404 | __func__, gfp_mask); | 404 | __func__, gfp_mask); |
405 | 405 | ||
406 | XFS_STATS_INC(xb_page_retries); | 406 | XFS_STATS_INC(xb_page_retries); |
407 | xfsbufd_wakeup(0, gfp_mask); | 407 | xfsbufd_wakeup(0, gfp_mask); |
408 | congestion_wait(WRITE, HZ/50); | 408 | congestion_wait(WRITE, HZ/50); |
409 | goto retry; | 409 | goto retry; |
410 | } | 410 | } |
411 | 411 | ||
412 | XFS_STATS_INC(xb_page_found); | 412 | XFS_STATS_INC(xb_page_found); |
413 | 413 | ||
414 | nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); | 414 | nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); |
415 | size -= nbytes; | 415 | size -= nbytes; |
416 | 416 | ||
417 | ASSERT(!PagePrivate(page)); | 417 | ASSERT(!PagePrivate(page)); |
418 | if (!PageUptodate(page)) { | 418 | if (!PageUptodate(page)) { |
419 | page_count--; | 419 | page_count--; |
420 | if (blocksize >= PAGE_CACHE_SIZE) { | 420 | if (blocksize >= PAGE_CACHE_SIZE) { |
421 | if (flags & XBF_READ) | 421 | if (flags & XBF_READ) |
422 | bp->b_flags |= _XBF_PAGE_LOCKED; | 422 | bp->b_flags |= _XBF_PAGE_LOCKED; |
423 | } else if (!PagePrivate(page)) { | 423 | } else if (!PagePrivate(page)) { |
424 | if (test_page_region(page, offset, nbytes)) | 424 | if (test_page_region(page, offset, nbytes)) |
425 | page_count++; | 425 | page_count++; |
426 | } | 426 | } |
427 | } | 427 | } |
428 | 428 | ||
429 | bp->b_pages[i] = page; | 429 | bp->b_pages[i] = page; |
430 | offset = 0; | 430 | offset = 0; |
431 | } | 431 | } |
432 | 432 | ||
433 | if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { | 433 | if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { |
434 | for (i = 0; i < bp->b_page_count; i++) | 434 | for (i = 0; i < bp->b_page_count; i++) |
435 | unlock_page(bp->b_pages[i]); | 435 | unlock_page(bp->b_pages[i]); |
436 | } | 436 | } |
437 | 437 | ||
438 | if (page_count == bp->b_page_count) | 438 | if (page_count == bp->b_page_count) |
439 | bp->b_flags |= XBF_DONE; | 439 | bp->b_flags |= XBF_DONE; |
440 | 440 | ||
441 | XB_TRACE(bp, "lookup_pages", (long)page_count); | 441 | XB_TRACE(bp, "lookup_pages", (long)page_count); |
442 | return error; | 442 | return error; |
443 | } | 443 | } |
444 | 444 | ||
445 | /* | 445 | /* |
446 | * Map buffer into kernel address-space if nessecary. | 446 | * Map buffer into kernel address-space if nessecary. |
447 | */ | 447 | */ |
448 | STATIC int | 448 | STATIC int |
449 | _xfs_buf_map_pages( | 449 | _xfs_buf_map_pages( |
450 | xfs_buf_t *bp, | 450 | xfs_buf_t *bp, |
451 | uint flags) | 451 | uint flags) |
452 | { | 452 | { |
453 | /* A single page buffer is always mappable */ | 453 | /* A single page buffer is always mappable */ |
454 | if (bp->b_page_count == 1) { | 454 | if (bp->b_page_count == 1) { |
455 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; | 455 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; |
456 | bp->b_flags |= XBF_MAPPED; | 456 | bp->b_flags |= XBF_MAPPED; |
457 | } else if (flags & XBF_MAPPED) { | 457 | } else if (flags & XBF_MAPPED) { |
458 | if (as_list_len > 64) | 458 | if (as_list_len > 64) |
459 | purge_addresses(); | 459 | purge_addresses(); |
460 | bp->b_addr = vmap(bp->b_pages, bp->b_page_count, | 460 | bp->b_addr = vmap(bp->b_pages, bp->b_page_count, |
461 | VM_MAP, PAGE_KERNEL); | 461 | VM_MAP, PAGE_KERNEL); |
462 | if (unlikely(bp->b_addr == NULL)) | 462 | if (unlikely(bp->b_addr == NULL)) |
463 | return -ENOMEM; | 463 | return -ENOMEM; |
464 | bp->b_addr += bp->b_offset; | 464 | bp->b_addr += bp->b_offset; |
465 | bp->b_flags |= XBF_MAPPED; | 465 | bp->b_flags |= XBF_MAPPED; |
466 | } | 466 | } |
467 | 467 | ||
468 | return 0; | 468 | return 0; |
469 | } | 469 | } |
470 | 470 | ||
471 | /* | 471 | /* |
472 | * Finding and Reading Buffers | 472 | * Finding and Reading Buffers |
473 | */ | 473 | */ |
474 | 474 | ||
475 | /* | 475 | /* |
476 | * Look up, and creates if absent, a lockable buffer for | 476 | * Look up, and creates if absent, a lockable buffer for |
477 | * a given range of an inode. The buffer is returned | 477 | * a given range of an inode. The buffer is returned |
478 | * locked. If other overlapping buffers exist, they are | 478 | * locked. If other overlapping buffers exist, they are |
479 | * released before the new buffer is created and locked, | 479 | * released before the new buffer is created and locked, |
480 | * which may imply that this call will block until those buffers | 480 | * which may imply that this call will block until those buffers |
481 | * are unlocked. No I/O is implied by this call. | 481 | * are unlocked. No I/O is implied by this call. |
482 | */ | 482 | */ |
483 | xfs_buf_t * | 483 | xfs_buf_t * |
484 | _xfs_buf_find( | 484 | _xfs_buf_find( |
485 | xfs_buftarg_t *btp, /* block device target */ | 485 | xfs_buftarg_t *btp, /* block device target */ |
486 | xfs_off_t ioff, /* starting offset of range */ | 486 | xfs_off_t ioff, /* starting offset of range */ |
487 | size_t isize, /* length of range */ | 487 | size_t isize, /* length of range */ |
488 | xfs_buf_flags_t flags, | 488 | xfs_buf_flags_t flags, |
489 | xfs_buf_t *new_bp) | 489 | xfs_buf_t *new_bp) |
490 | { | 490 | { |
491 | xfs_off_t range_base; | 491 | xfs_off_t range_base; |
492 | size_t range_length; | 492 | size_t range_length; |
493 | xfs_bufhash_t *hash; | 493 | xfs_bufhash_t *hash; |
494 | xfs_buf_t *bp, *n; | 494 | xfs_buf_t *bp, *n; |
495 | 495 | ||
496 | range_base = (ioff << BBSHIFT); | 496 | range_base = (ioff << BBSHIFT); |
497 | range_length = (isize << BBSHIFT); | 497 | range_length = (isize << BBSHIFT); |
498 | 498 | ||
499 | /* Check for IOs smaller than the sector size / not sector aligned */ | 499 | /* Check for IOs smaller than the sector size / not sector aligned */ |
500 | ASSERT(!(range_length < (1 << btp->bt_sshift))); | 500 | ASSERT(!(range_length < (1 << btp->bt_sshift))); |
501 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); | 501 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); |
502 | 502 | ||
503 | hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; | 503 | hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; |
504 | 504 | ||
505 | spin_lock(&hash->bh_lock); | 505 | spin_lock(&hash->bh_lock); |
506 | 506 | ||
507 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { | 507 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { |
508 | ASSERT(btp == bp->b_target); | 508 | ASSERT(btp == bp->b_target); |
509 | if (bp->b_file_offset == range_base && | 509 | if (bp->b_file_offset == range_base && |
510 | bp->b_buffer_length == range_length) { | 510 | bp->b_buffer_length == range_length) { |
511 | /* | 511 | /* |
512 | * If we look at something, bring it to the | 512 | * If we look at something, bring it to the |
513 | * front of the list for next time. | 513 | * front of the list for next time. |
514 | */ | 514 | */ |
515 | atomic_inc(&bp->b_hold); | 515 | atomic_inc(&bp->b_hold); |
516 | list_move(&bp->b_hash_list, &hash->bh_list); | 516 | list_move(&bp->b_hash_list, &hash->bh_list); |
517 | goto found; | 517 | goto found; |
518 | } | 518 | } |
519 | } | 519 | } |
520 | 520 | ||
521 | /* No match found */ | 521 | /* No match found */ |
522 | if (new_bp) { | 522 | if (new_bp) { |
523 | _xfs_buf_initialize(new_bp, btp, range_base, | 523 | _xfs_buf_initialize(new_bp, btp, range_base, |
524 | range_length, flags); | 524 | range_length, flags); |
525 | new_bp->b_hash = hash; | 525 | new_bp->b_hash = hash; |
526 | list_add(&new_bp->b_hash_list, &hash->bh_list); | 526 | list_add(&new_bp->b_hash_list, &hash->bh_list); |
527 | } else { | 527 | } else { |
528 | XFS_STATS_INC(xb_miss_locked); | 528 | XFS_STATS_INC(xb_miss_locked); |
529 | } | 529 | } |
530 | 530 | ||
531 | spin_unlock(&hash->bh_lock); | 531 | spin_unlock(&hash->bh_lock); |
532 | return new_bp; | 532 | return new_bp; |
533 | 533 | ||
534 | found: | 534 | found: |
535 | spin_unlock(&hash->bh_lock); | 535 | spin_unlock(&hash->bh_lock); |
536 | 536 | ||
537 | /* Attempt to get the semaphore without sleeping, | 537 | /* Attempt to get the semaphore without sleeping, |
538 | * if this does not work then we need to drop the | 538 | * if this does not work then we need to drop the |
539 | * spinlock and do a hard attempt on the semaphore. | 539 | * spinlock and do a hard attempt on the semaphore. |
540 | */ | 540 | */ |
541 | if (down_trylock(&bp->b_sema)) { | 541 | if (down_trylock(&bp->b_sema)) { |
542 | if (!(flags & XBF_TRYLOCK)) { | 542 | if (!(flags & XBF_TRYLOCK)) { |
543 | /* wait for buffer ownership */ | 543 | /* wait for buffer ownership */ |
544 | XB_TRACE(bp, "get_lock", 0); | 544 | XB_TRACE(bp, "get_lock", 0); |
545 | xfs_buf_lock(bp); | 545 | xfs_buf_lock(bp); |
546 | XFS_STATS_INC(xb_get_locked_waited); | 546 | XFS_STATS_INC(xb_get_locked_waited); |
547 | } else { | 547 | } else { |
548 | /* We asked for a trylock and failed, no need | 548 | /* We asked for a trylock and failed, no need |
549 | * to look at file offset and length here, we | 549 | * to look at file offset and length here, we |
550 | * know that this buffer at least overlaps our | 550 | * know that this buffer at least overlaps our |
551 | * buffer and is locked, therefore our buffer | 551 | * buffer and is locked, therefore our buffer |
552 | * either does not exist, or is this buffer. | 552 | * either does not exist, or is this buffer. |
553 | */ | 553 | */ |
554 | xfs_buf_rele(bp); | 554 | xfs_buf_rele(bp); |
555 | XFS_STATS_INC(xb_busy_locked); | 555 | XFS_STATS_INC(xb_busy_locked); |
556 | return NULL; | 556 | return NULL; |
557 | } | 557 | } |
558 | } else { | 558 | } else { |
559 | /* trylock worked */ | 559 | /* trylock worked */ |
560 | XB_SET_OWNER(bp); | 560 | XB_SET_OWNER(bp); |
561 | } | 561 | } |
562 | 562 | ||
563 | if (bp->b_flags & XBF_STALE) { | 563 | if (bp->b_flags & XBF_STALE) { |
564 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); | 564 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); |
565 | bp->b_flags &= XBF_MAPPED; | 565 | bp->b_flags &= XBF_MAPPED; |
566 | } | 566 | } |
567 | XB_TRACE(bp, "got_lock", 0); | 567 | XB_TRACE(bp, "got_lock", 0); |
568 | XFS_STATS_INC(xb_get_locked); | 568 | XFS_STATS_INC(xb_get_locked); |
569 | return bp; | 569 | return bp; |
570 | } | 570 | } |
571 | 571 | ||
572 | /* | 572 | /* |
573 | * Assembles a buffer covering the specified range. | 573 | * Assembles a buffer covering the specified range. |
574 | * Storage in memory for all portions of the buffer will be allocated, | 574 | * Storage in memory for all portions of the buffer will be allocated, |
575 | * although backing storage may not be. | 575 | * although backing storage may not be. |
576 | */ | 576 | */ |
577 | xfs_buf_t * | 577 | xfs_buf_t * |
578 | xfs_buf_get_flags( | 578 | xfs_buf_get_flags( |
579 | xfs_buftarg_t *target,/* target for buffer */ | 579 | xfs_buftarg_t *target,/* target for buffer */ |
580 | xfs_off_t ioff, /* starting offset of range */ | 580 | xfs_off_t ioff, /* starting offset of range */ |
581 | size_t isize, /* length of range */ | 581 | size_t isize, /* length of range */ |
582 | xfs_buf_flags_t flags) | 582 | xfs_buf_flags_t flags) |
583 | { | 583 | { |
584 | xfs_buf_t *bp, *new_bp; | 584 | xfs_buf_t *bp, *new_bp; |
585 | int error = 0, i; | 585 | int error = 0, i; |
586 | 586 | ||
587 | new_bp = xfs_buf_allocate(flags); | 587 | new_bp = xfs_buf_allocate(flags); |
588 | if (unlikely(!new_bp)) | 588 | if (unlikely(!new_bp)) |
589 | return NULL; | 589 | return NULL; |
590 | 590 | ||
591 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); | 591 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); |
592 | if (bp == new_bp) { | 592 | if (bp == new_bp) { |
593 | error = _xfs_buf_lookup_pages(bp, flags); | 593 | error = _xfs_buf_lookup_pages(bp, flags); |
594 | if (error) | 594 | if (error) |
595 | goto no_buffer; | 595 | goto no_buffer; |
596 | } else { | 596 | } else { |
597 | xfs_buf_deallocate(new_bp); | 597 | xfs_buf_deallocate(new_bp); |
598 | if (unlikely(bp == NULL)) | 598 | if (unlikely(bp == NULL)) |
599 | return NULL; | 599 | return NULL; |
600 | } | 600 | } |
601 | 601 | ||
602 | for (i = 0; i < bp->b_page_count; i++) | 602 | for (i = 0; i < bp->b_page_count; i++) |
603 | mark_page_accessed(bp->b_pages[i]); | 603 | mark_page_accessed(bp->b_pages[i]); |
604 | 604 | ||
605 | if (!(bp->b_flags & XBF_MAPPED)) { | 605 | if (!(bp->b_flags & XBF_MAPPED)) { |
606 | error = _xfs_buf_map_pages(bp, flags); | 606 | error = _xfs_buf_map_pages(bp, flags); |
607 | if (unlikely(error)) { | 607 | if (unlikely(error)) { |
608 | printk(KERN_WARNING "%s: failed to map pages\n", | 608 | printk(KERN_WARNING "%s: failed to map pages\n", |
609 | __func__); | 609 | __func__); |
610 | goto no_buffer; | 610 | goto no_buffer; |
611 | } | 611 | } |
612 | } | 612 | } |
613 | 613 | ||
614 | XFS_STATS_INC(xb_get); | 614 | XFS_STATS_INC(xb_get); |
615 | 615 | ||
616 | /* | 616 | /* |
617 | * Always fill in the block number now, the mapped cases can do | 617 | * Always fill in the block number now, the mapped cases can do |
618 | * their own overlay of this later. | 618 | * their own overlay of this later. |
619 | */ | 619 | */ |
620 | bp->b_bn = ioff; | 620 | bp->b_bn = ioff; |
621 | bp->b_count_desired = bp->b_buffer_length; | 621 | bp->b_count_desired = bp->b_buffer_length; |
622 | 622 | ||
623 | XB_TRACE(bp, "get", (unsigned long)flags); | 623 | XB_TRACE(bp, "get", (unsigned long)flags); |
624 | return bp; | 624 | return bp; |
625 | 625 | ||
626 | no_buffer: | 626 | no_buffer: |
627 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) | 627 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) |
628 | xfs_buf_unlock(bp); | 628 | xfs_buf_unlock(bp); |
629 | xfs_buf_rele(bp); | 629 | xfs_buf_rele(bp); |
630 | return NULL; | 630 | return NULL; |
631 | } | 631 | } |
632 | 632 | ||
633 | STATIC int | 633 | STATIC int |
634 | _xfs_buf_read( | 634 | _xfs_buf_read( |
635 | xfs_buf_t *bp, | 635 | xfs_buf_t *bp, |
636 | xfs_buf_flags_t flags) | 636 | xfs_buf_flags_t flags) |
637 | { | 637 | { |
638 | int status; | 638 | int status; |
639 | 639 | ||
640 | XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags); | 640 | XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags); |
641 | 641 | ||
642 | ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); | 642 | ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); |
643 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); | 643 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); |
644 | 644 | ||
645 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ | 645 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ |
646 | XBF_READ_AHEAD | _XBF_RUN_QUEUES); | 646 | XBF_READ_AHEAD | _XBF_RUN_QUEUES); |
647 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \ | 647 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \ |
648 | XBF_READ_AHEAD | _XBF_RUN_QUEUES); | 648 | XBF_READ_AHEAD | _XBF_RUN_QUEUES); |
649 | 649 | ||
650 | status = xfs_buf_iorequest(bp); | 650 | status = xfs_buf_iorequest(bp); |
651 | if (!status && !(flags & XBF_ASYNC)) | 651 | if (!status && !(flags & XBF_ASYNC)) |
652 | status = xfs_buf_iowait(bp); | 652 | status = xfs_buf_iowait(bp); |
653 | return status; | 653 | return status; |
654 | } | 654 | } |
655 | 655 | ||
656 | xfs_buf_t * | 656 | xfs_buf_t * |
657 | xfs_buf_read_flags( | 657 | xfs_buf_read_flags( |
658 | xfs_buftarg_t *target, | 658 | xfs_buftarg_t *target, |
659 | xfs_off_t ioff, | 659 | xfs_off_t ioff, |
660 | size_t isize, | 660 | size_t isize, |
661 | xfs_buf_flags_t flags) | 661 | xfs_buf_flags_t flags) |
662 | { | 662 | { |
663 | xfs_buf_t *bp; | 663 | xfs_buf_t *bp; |
664 | 664 | ||
665 | flags |= XBF_READ; | 665 | flags |= XBF_READ; |
666 | 666 | ||
667 | bp = xfs_buf_get_flags(target, ioff, isize, flags); | 667 | bp = xfs_buf_get_flags(target, ioff, isize, flags); |
668 | if (bp) { | 668 | if (bp) { |
669 | if (!XFS_BUF_ISDONE(bp)) { | 669 | if (!XFS_BUF_ISDONE(bp)) { |
670 | XB_TRACE(bp, "read", (unsigned long)flags); | 670 | XB_TRACE(bp, "read", (unsigned long)flags); |
671 | XFS_STATS_INC(xb_get_read); | 671 | XFS_STATS_INC(xb_get_read); |
672 | _xfs_buf_read(bp, flags); | 672 | _xfs_buf_read(bp, flags); |
673 | } else if (flags & XBF_ASYNC) { | 673 | } else if (flags & XBF_ASYNC) { |
674 | XB_TRACE(bp, "read_async", (unsigned long)flags); | 674 | XB_TRACE(bp, "read_async", (unsigned long)flags); |
675 | /* | 675 | /* |
676 | * Read ahead call which is already satisfied, | 676 | * Read ahead call which is already satisfied, |
677 | * drop the buffer | 677 | * drop the buffer |
678 | */ | 678 | */ |
679 | goto no_buffer; | 679 | goto no_buffer; |
680 | } else { | 680 | } else { |
681 | XB_TRACE(bp, "read_done", (unsigned long)flags); | 681 | XB_TRACE(bp, "read_done", (unsigned long)flags); |
682 | /* We do not want read in the flags */ | 682 | /* We do not want read in the flags */ |
683 | bp->b_flags &= ~XBF_READ; | 683 | bp->b_flags &= ~XBF_READ; |
684 | } | 684 | } |
685 | } | 685 | } |
686 | 686 | ||
687 | return bp; | 687 | return bp; |
688 | 688 | ||
689 | no_buffer: | 689 | no_buffer: |
690 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) | 690 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) |
691 | xfs_buf_unlock(bp); | 691 | xfs_buf_unlock(bp); |
692 | xfs_buf_rele(bp); | 692 | xfs_buf_rele(bp); |
693 | return NULL; | 693 | return NULL; |
694 | } | 694 | } |
695 | 695 | ||
696 | /* | 696 | /* |
697 | * If we are not low on memory then do the readahead in a deadlock | 697 | * If we are not low on memory then do the readahead in a deadlock |
698 | * safe manner. | 698 | * safe manner. |
699 | */ | 699 | */ |
700 | void | 700 | void |
701 | xfs_buf_readahead( | 701 | xfs_buf_readahead( |
702 | xfs_buftarg_t *target, | 702 | xfs_buftarg_t *target, |
703 | xfs_off_t ioff, | 703 | xfs_off_t ioff, |
704 | size_t isize, | 704 | size_t isize, |
705 | xfs_buf_flags_t flags) | 705 | xfs_buf_flags_t flags) |
706 | { | 706 | { |
707 | struct backing_dev_info *bdi; | 707 | struct backing_dev_info *bdi; |
708 | 708 | ||
709 | bdi = target->bt_mapping->backing_dev_info; | 709 | bdi = target->bt_mapping->backing_dev_info; |
710 | if (bdi_read_congested(bdi)) | 710 | if (bdi_read_congested(bdi)) |
711 | return; | 711 | return; |
712 | 712 | ||
713 | flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); | 713 | flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); |
714 | xfs_buf_read_flags(target, ioff, isize, flags); | 714 | xfs_buf_read_flags(target, ioff, isize, flags); |
715 | } | 715 | } |
716 | 716 | ||
717 | xfs_buf_t * | 717 | xfs_buf_t * |
718 | xfs_buf_get_empty( | 718 | xfs_buf_get_empty( |
719 | size_t len, | 719 | size_t len, |
720 | xfs_buftarg_t *target) | 720 | xfs_buftarg_t *target) |
721 | { | 721 | { |
722 | xfs_buf_t *bp; | 722 | xfs_buf_t *bp; |
723 | 723 | ||
724 | bp = xfs_buf_allocate(0); | 724 | bp = xfs_buf_allocate(0); |
725 | if (bp) | 725 | if (bp) |
726 | _xfs_buf_initialize(bp, target, 0, len, 0); | 726 | _xfs_buf_initialize(bp, target, 0, len, 0); |
727 | return bp; | 727 | return bp; |
728 | } | 728 | } |
729 | 729 | ||
730 | static inline struct page * | 730 | static inline struct page * |
731 | mem_to_page( | 731 | mem_to_page( |
732 | void *addr) | 732 | void *addr) |
733 | { | 733 | { |
734 | if ((!is_vmalloc_addr(addr))) { | 734 | if ((!is_vmalloc_addr(addr))) { |
735 | return virt_to_page(addr); | 735 | return virt_to_page(addr); |
736 | } else { | 736 | } else { |
737 | return vmalloc_to_page(addr); | 737 | return vmalloc_to_page(addr); |
738 | } | 738 | } |
739 | } | 739 | } |
740 | 740 | ||
741 | int | 741 | int |
742 | xfs_buf_associate_memory( | 742 | xfs_buf_associate_memory( |
743 | xfs_buf_t *bp, | 743 | xfs_buf_t *bp, |
744 | void *mem, | 744 | void *mem, |
745 | size_t len) | 745 | size_t len) |
746 | { | 746 | { |
747 | int rval; | 747 | int rval; |
748 | int i = 0; | 748 | int i = 0; |
749 | unsigned long pageaddr; | 749 | unsigned long pageaddr; |
750 | unsigned long offset; | 750 | unsigned long offset; |
751 | size_t buflen; | 751 | size_t buflen; |
752 | int page_count; | 752 | int page_count; |
753 | 753 | ||
754 | pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; | 754 | pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; |
755 | offset = (unsigned long)mem - pageaddr; | 755 | offset = (unsigned long)mem - pageaddr; |
756 | buflen = PAGE_CACHE_ALIGN(len + offset); | 756 | buflen = PAGE_CACHE_ALIGN(len + offset); |
757 | page_count = buflen >> PAGE_CACHE_SHIFT; | 757 | page_count = buflen >> PAGE_CACHE_SHIFT; |
758 | 758 | ||
759 | /* Free any previous set of page pointers */ | 759 | /* Free any previous set of page pointers */ |
760 | if (bp->b_pages) | 760 | if (bp->b_pages) |
761 | _xfs_buf_free_pages(bp); | 761 | _xfs_buf_free_pages(bp); |
762 | 762 | ||
763 | bp->b_pages = NULL; | 763 | bp->b_pages = NULL; |
764 | bp->b_addr = mem; | 764 | bp->b_addr = mem; |
765 | 765 | ||
766 | rval = _xfs_buf_get_pages(bp, page_count, 0); | 766 | rval = _xfs_buf_get_pages(bp, page_count, 0); |
767 | if (rval) | 767 | if (rval) |
768 | return rval; | 768 | return rval; |
769 | 769 | ||
770 | bp->b_offset = offset; | 770 | bp->b_offset = offset; |
771 | 771 | ||
772 | for (i = 0; i < bp->b_page_count; i++) { | 772 | for (i = 0; i < bp->b_page_count; i++) { |
773 | bp->b_pages[i] = mem_to_page((void *)pageaddr); | 773 | bp->b_pages[i] = mem_to_page((void *)pageaddr); |
774 | pageaddr += PAGE_CACHE_SIZE; | 774 | pageaddr += PAGE_CACHE_SIZE; |
775 | } | 775 | } |
776 | 776 | ||
777 | bp->b_count_desired = len; | 777 | bp->b_count_desired = len; |
778 | bp->b_buffer_length = buflen; | 778 | bp->b_buffer_length = buflen; |
779 | bp->b_flags |= XBF_MAPPED; | 779 | bp->b_flags |= XBF_MAPPED; |
780 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | 780 | bp->b_flags &= ~_XBF_PAGE_LOCKED; |
781 | 781 | ||
782 | return 0; | 782 | return 0; |
783 | } | 783 | } |
784 | 784 | ||
785 | xfs_buf_t * | 785 | xfs_buf_t * |
786 | xfs_buf_get_noaddr( | 786 | xfs_buf_get_noaddr( |
787 | size_t len, | 787 | size_t len, |
788 | xfs_buftarg_t *target) | 788 | xfs_buftarg_t *target) |
789 | { | 789 | { |
790 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; | 790 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; |
791 | int error, i; | 791 | int error, i; |
792 | xfs_buf_t *bp; | 792 | xfs_buf_t *bp; |
793 | 793 | ||
794 | bp = xfs_buf_allocate(0); | 794 | bp = xfs_buf_allocate(0); |
795 | if (unlikely(bp == NULL)) | 795 | if (unlikely(bp == NULL)) |
796 | goto fail; | 796 | goto fail; |
797 | _xfs_buf_initialize(bp, target, 0, len, 0); | 797 | _xfs_buf_initialize(bp, target, 0, len, 0); |
798 | 798 | ||
799 | error = _xfs_buf_get_pages(bp, page_count, 0); | 799 | error = _xfs_buf_get_pages(bp, page_count, 0); |
800 | if (error) | 800 | if (error) |
801 | goto fail_free_buf; | 801 | goto fail_free_buf; |
802 | 802 | ||
803 | for (i = 0; i < page_count; i++) { | 803 | for (i = 0; i < page_count; i++) { |
804 | bp->b_pages[i] = alloc_page(GFP_KERNEL); | 804 | bp->b_pages[i] = alloc_page(GFP_KERNEL); |
805 | if (!bp->b_pages[i]) | 805 | if (!bp->b_pages[i]) |
806 | goto fail_free_mem; | 806 | goto fail_free_mem; |
807 | } | 807 | } |
808 | bp->b_flags |= _XBF_PAGES; | 808 | bp->b_flags |= _XBF_PAGES; |
809 | 809 | ||
810 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); | 810 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); |
811 | if (unlikely(error)) { | 811 | if (unlikely(error)) { |
812 | printk(KERN_WARNING "%s: failed to map pages\n", | 812 | printk(KERN_WARNING "%s: failed to map pages\n", |
813 | __func__); | 813 | __func__); |
814 | goto fail_free_mem; | 814 | goto fail_free_mem; |
815 | } | 815 | } |
816 | 816 | ||
817 | xfs_buf_unlock(bp); | 817 | xfs_buf_unlock(bp); |
818 | 818 | ||
819 | XB_TRACE(bp, "no_daddr", len); | 819 | XB_TRACE(bp, "no_daddr", len); |
820 | return bp; | 820 | return bp; |
821 | 821 | ||
822 | fail_free_mem: | 822 | fail_free_mem: |
823 | while (--i >= 0) | 823 | while (--i >= 0) |
824 | __free_page(bp->b_pages[i]); | 824 | __free_page(bp->b_pages[i]); |
825 | _xfs_buf_free_pages(bp); | 825 | _xfs_buf_free_pages(bp); |
826 | fail_free_buf: | 826 | fail_free_buf: |
827 | xfs_buf_deallocate(bp); | 827 | xfs_buf_deallocate(bp); |
828 | fail: | 828 | fail: |
829 | return NULL; | 829 | return NULL; |
830 | } | 830 | } |
831 | 831 | ||
832 | /* | 832 | /* |
833 | * Increment reference count on buffer, to hold the buffer concurrently | 833 | * Increment reference count on buffer, to hold the buffer concurrently |
834 | * with another thread which may release (free) the buffer asynchronously. | 834 | * with another thread which may release (free) the buffer asynchronously. |
835 | * Must hold the buffer already to call this function. | 835 | * Must hold the buffer already to call this function. |
836 | */ | 836 | */ |
837 | void | 837 | void |
838 | xfs_buf_hold( | 838 | xfs_buf_hold( |
839 | xfs_buf_t *bp) | 839 | xfs_buf_t *bp) |
840 | { | 840 | { |
841 | atomic_inc(&bp->b_hold); | 841 | atomic_inc(&bp->b_hold); |
842 | XB_TRACE(bp, "hold", 0); | 842 | XB_TRACE(bp, "hold", 0); |
843 | } | 843 | } |
844 | 844 | ||
845 | /* | 845 | /* |
846 | * Releases a hold on the specified buffer. If the | 846 | * Releases a hold on the specified buffer. If the |
847 | * the hold count is 1, calls xfs_buf_free. | 847 | * the hold count is 1, calls xfs_buf_free. |
848 | */ | 848 | */ |
849 | void | 849 | void |
850 | xfs_buf_rele( | 850 | xfs_buf_rele( |
851 | xfs_buf_t *bp) | 851 | xfs_buf_t *bp) |
852 | { | 852 | { |
853 | xfs_bufhash_t *hash = bp->b_hash; | 853 | xfs_bufhash_t *hash = bp->b_hash; |
854 | 854 | ||
855 | XB_TRACE(bp, "rele", bp->b_relse); | 855 | XB_TRACE(bp, "rele", bp->b_relse); |
856 | 856 | ||
857 | if (unlikely(!hash)) { | 857 | if (unlikely(!hash)) { |
858 | ASSERT(!bp->b_relse); | 858 | ASSERT(!bp->b_relse); |
859 | if (atomic_dec_and_test(&bp->b_hold)) | 859 | if (atomic_dec_and_test(&bp->b_hold)) |
860 | xfs_buf_free(bp); | 860 | xfs_buf_free(bp); |
861 | return; | 861 | return; |
862 | } | 862 | } |
863 | 863 | ||
864 | ASSERT(atomic_read(&bp->b_hold) > 0); | 864 | ASSERT(atomic_read(&bp->b_hold) > 0); |
865 | if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { | 865 | if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { |
866 | if (bp->b_relse) { | 866 | if (bp->b_relse) { |
867 | atomic_inc(&bp->b_hold); | 867 | atomic_inc(&bp->b_hold); |
868 | spin_unlock(&hash->bh_lock); | 868 | spin_unlock(&hash->bh_lock); |
869 | (*(bp->b_relse)) (bp); | 869 | (*(bp->b_relse)) (bp); |
870 | } else if (bp->b_flags & XBF_FS_MANAGED) { | 870 | } else if (bp->b_flags & XBF_FS_MANAGED) { |
871 | spin_unlock(&hash->bh_lock); | 871 | spin_unlock(&hash->bh_lock); |
872 | } else { | 872 | } else { |
873 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 873 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); |
874 | list_del_init(&bp->b_hash_list); | 874 | list_del_init(&bp->b_hash_list); |
875 | spin_unlock(&hash->bh_lock); | 875 | spin_unlock(&hash->bh_lock); |
876 | xfs_buf_free(bp); | 876 | xfs_buf_free(bp); |
877 | } | 877 | } |
878 | } | 878 | } |
879 | } | 879 | } |
880 | 880 | ||
881 | 881 | ||
882 | /* | 882 | /* |
883 | * Mutual exclusion on buffers. Locking model: | 883 | * Mutual exclusion on buffers. Locking model: |
884 | * | 884 | * |
885 | * Buffers associated with inodes for which buffer locking | 885 | * Buffers associated with inodes for which buffer locking |
886 | * is not enabled are not protected by semaphores, and are | 886 | * is not enabled are not protected by semaphores, and are |
887 | * assumed to be exclusively owned by the caller. There is a | 887 | * assumed to be exclusively owned by the caller. There is a |
888 | * spinlock in the buffer, used by the caller when concurrent | 888 | * spinlock in the buffer, used by the caller when concurrent |
889 | * access is possible. | 889 | * access is possible. |
890 | */ | 890 | */ |
891 | 891 | ||
892 | /* | 892 | /* |
893 | * Locks a buffer object, if it is not already locked. | 893 | * Locks a buffer object, if it is not already locked. |
894 | * Note that this in no way locks the underlying pages, so it is only | 894 | * Note that this in no way locks the underlying pages, so it is only |
895 | * useful for synchronizing concurrent use of buffer objects, not for | 895 | * useful for synchronizing concurrent use of buffer objects, not for |
896 | * synchronizing independent access to the underlying pages. | 896 | * synchronizing independent access to the underlying pages. |
897 | */ | 897 | */ |
898 | int | 898 | int |
899 | xfs_buf_cond_lock( | 899 | xfs_buf_cond_lock( |
900 | xfs_buf_t *bp) | 900 | xfs_buf_t *bp) |
901 | { | 901 | { |
902 | int locked; | 902 | int locked; |
903 | 903 | ||
904 | locked = down_trylock(&bp->b_sema) == 0; | 904 | locked = down_trylock(&bp->b_sema) == 0; |
905 | if (locked) { | 905 | if (locked) { |
906 | XB_SET_OWNER(bp); | 906 | XB_SET_OWNER(bp); |
907 | } | 907 | } |
908 | XB_TRACE(bp, "cond_lock", (long)locked); | 908 | XB_TRACE(bp, "cond_lock", (long)locked); |
909 | return locked ? 0 : -EBUSY; | 909 | return locked ? 0 : -EBUSY; |
910 | } | 910 | } |
911 | 911 | ||
912 | #if defined(DEBUG) || defined(XFS_BLI_TRACE) | 912 | #if defined(DEBUG) || defined(XFS_BLI_TRACE) |
913 | int | 913 | int |
914 | xfs_buf_lock_value( | 914 | xfs_buf_lock_value( |
915 | xfs_buf_t *bp) | 915 | xfs_buf_t *bp) |
916 | { | 916 | { |
917 | return bp->b_sema.count; | 917 | return bp->b_sema.count; |
918 | } | 918 | } |
919 | #endif | 919 | #endif |
920 | 920 | ||
921 | /* | 921 | /* |
922 | * Locks a buffer object. | 922 | * Locks a buffer object. |
923 | * Note that this in no way locks the underlying pages, so it is only | 923 | * Note that this in no way locks the underlying pages, so it is only |
924 | * useful for synchronizing concurrent use of buffer objects, not for | 924 | * useful for synchronizing concurrent use of buffer objects, not for |
925 | * synchronizing independent access to the underlying pages. | 925 | * synchronizing independent access to the underlying pages. |
926 | */ | 926 | */ |
927 | void | 927 | void |
928 | xfs_buf_lock( | 928 | xfs_buf_lock( |
929 | xfs_buf_t *bp) | 929 | xfs_buf_t *bp) |
930 | { | 930 | { |
931 | XB_TRACE(bp, "lock", 0); | 931 | XB_TRACE(bp, "lock", 0); |
932 | if (atomic_read(&bp->b_io_remaining)) | 932 | if (atomic_read(&bp->b_io_remaining)) |
933 | blk_run_address_space(bp->b_target->bt_mapping); | 933 | blk_run_address_space(bp->b_target->bt_mapping); |
934 | down(&bp->b_sema); | 934 | down(&bp->b_sema); |
935 | XB_SET_OWNER(bp); | 935 | XB_SET_OWNER(bp); |
936 | XB_TRACE(bp, "locked", 0); | 936 | XB_TRACE(bp, "locked", 0); |
937 | } | 937 | } |
938 | 938 | ||
939 | /* | 939 | /* |
940 | * Releases the lock on the buffer object. | 940 | * Releases the lock on the buffer object. |
941 | * If the buffer is marked delwri but is not queued, do so before we | 941 | * If the buffer is marked delwri but is not queued, do so before we |
942 | * unlock the buffer as we need to set flags correctly. We also need to | 942 | * unlock the buffer as we need to set flags correctly. We also need to |
943 | * take a reference for the delwri queue because the unlocker is going to | 943 | * take a reference for the delwri queue because the unlocker is going to |
944 | * drop their's and they don't know we just queued it. | 944 | * drop their's and they don't know we just queued it. |
945 | */ | 945 | */ |
946 | void | 946 | void |
947 | xfs_buf_unlock( | 947 | xfs_buf_unlock( |
948 | xfs_buf_t *bp) | 948 | xfs_buf_t *bp) |
949 | { | 949 | { |
950 | if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { | 950 | if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { |
951 | atomic_inc(&bp->b_hold); | 951 | atomic_inc(&bp->b_hold); |
952 | bp->b_flags |= XBF_ASYNC; | 952 | bp->b_flags |= XBF_ASYNC; |
953 | xfs_buf_delwri_queue(bp, 0); | 953 | xfs_buf_delwri_queue(bp, 0); |
954 | } | 954 | } |
955 | 955 | ||
956 | XB_CLEAR_OWNER(bp); | 956 | XB_CLEAR_OWNER(bp); |
957 | up(&bp->b_sema); | 957 | up(&bp->b_sema); |
958 | XB_TRACE(bp, "unlock", 0); | 958 | XB_TRACE(bp, "unlock", 0); |
959 | } | 959 | } |
960 | 960 | ||
961 | 961 | ||
962 | /* | 962 | /* |
963 | * Pinning Buffer Storage in Memory | 963 | * Pinning Buffer Storage in Memory |
964 | * Ensure that no attempt to force a buffer to disk will succeed. | 964 | * Ensure that no attempt to force a buffer to disk will succeed. |
965 | */ | 965 | */ |
966 | void | 966 | void |
967 | xfs_buf_pin( | 967 | xfs_buf_pin( |
968 | xfs_buf_t *bp) | 968 | xfs_buf_t *bp) |
969 | { | 969 | { |
970 | atomic_inc(&bp->b_pin_count); | 970 | atomic_inc(&bp->b_pin_count); |
971 | XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter); | 971 | XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter); |
972 | } | 972 | } |
973 | 973 | ||
974 | void | 974 | void |
975 | xfs_buf_unpin( | 975 | xfs_buf_unpin( |
976 | xfs_buf_t *bp) | 976 | xfs_buf_t *bp) |
977 | { | 977 | { |
978 | if (atomic_dec_and_test(&bp->b_pin_count)) | 978 | if (atomic_dec_and_test(&bp->b_pin_count)) |
979 | wake_up_all(&bp->b_waiters); | 979 | wake_up_all(&bp->b_waiters); |
980 | XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter); | 980 | XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter); |
981 | } | 981 | } |
982 | 982 | ||
983 | int | 983 | int |
984 | xfs_buf_ispin( | 984 | xfs_buf_ispin( |
985 | xfs_buf_t *bp) | 985 | xfs_buf_t *bp) |
986 | { | 986 | { |
987 | return atomic_read(&bp->b_pin_count); | 987 | return atomic_read(&bp->b_pin_count); |
988 | } | 988 | } |
989 | 989 | ||
990 | STATIC void | 990 | STATIC void |
991 | xfs_buf_wait_unpin( | 991 | xfs_buf_wait_unpin( |
992 | xfs_buf_t *bp) | 992 | xfs_buf_t *bp) |
993 | { | 993 | { |
994 | DECLARE_WAITQUEUE (wait, current); | 994 | DECLARE_WAITQUEUE (wait, current); |
995 | 995 | ||
996 | if (atomic_read(&bp->b_pin_count) == 0) | 996 | if (atomic_read(&bp->b_pin_count) == 0) |
997 | return; | 997 | return; |
998 | 998 | ||
999 | add_wait_queue(&bp->b_waiters, &wait); | 999 | add_wait_queue(&bp->b_waiters, &wait); |
1000 | for (;;) { | 1000 | for (;;) { |
1001 | set_current_state(TASK_UNINTERRUPTIBLE); | 1001 | set_current_state(TASK_UNINTERRUPTIBLE); |
1002 | if (atomic_read(&bp->b_pin_count) == 0) | 1002 | if (atomic_read(&bp->b_pin_count) == 0) |
1003 | break; | 1003 | break; |
1004 | if (atomic_read(&bp->b_io_remaining)) | 1004 | if (atomic_read(&bp->b_io_remaining)) |
1005 | blk_run_address_space(bp->b_target->bt_mapping); | 1005 | blk_run_address_space(bp->b_target->bt_mapping); |
1006 | schedule(); | 1006 | schedule(); |
1007 | } | 1007 | } |
1008 | remove_wait_queue(&bp->b_waiters, &wait); | 1008 | remove_wait_queue(&bp->b_waiters, &wait); |
1009 | set_current_state(TASK_RUNNING); | 1009 | set_current_state(TASK_RUNNING); |
1010 | } | 1010 | } |
1011 | 1011 | ||
1012 | /* | 1012 | /* |
1013 | * Buffer Utility Routines | 1013 | * Buffer Utility Routines |
1014 | */ | 1014 | */ |
1015 | 1015 | ||
1016 | STATIC void | 1016 | STATIC void |
1017 | xfs_buf_iodone_work( | 1017 | xfs_buf_iodone_work( |
1018 | struct work_struct *work) | 1018 | struct work_struct *work) |
1019 | { | 1019 | { |
1020 | xfs_buf_t *bp = | 1020 | xfs_buf_t *bp = |
1021 | container_of(work, xfs_buf_t, b_iodone_work); | 1021 | container_of(work, xfs_buf_t, b_iodone_work); |
1022 | 1022 | ||
1023 | /* | 1023 | /* |
1024 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | 1024 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the |
1025 | * ordered flag and reissue them. Because we can't tell the higher | 1025 | * ordered flag and reissue them. Because we can't tell the higher |
1026 | * layers directly that they should not issue ordered I/O anymore, they | 1026 | * layers directly that they should not issue ordered I/O anymore, they |
1027 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. | 1027 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. |
1028 | */ | 1028 | */ |
1029 | if ((bp->b_error == EOPNOTSUPP) && | 1029 | if ((bp->b_error == EOPNOTSUPP) && |
1030 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | 1030 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { |
1031 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); | 1031 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); |
1032 | bp->b_flags &= ~XBF_ORDERED; | 1032 | bp->b_flags &= ~XBF_ORDERED; |
1033 | bp->b_flags |= _XFS_BARRIER_FAILED; | 1033 | bp->b_flags |= _XFS_BARRIER_FAILED; |
1034 | xfs_buf_iorequest(bp); | 1034 | xfs_buf_iorequest(bp); |
1035 | } else if (bp->b_iodone) | 1035 | } else if (bp->b_iodone) |
1036 | (*(bp->b_iodone))(bp); | 1036 | (*(bp->b_iodone))(bp); |
1037 | else if (bp->b_flags & XBF_ASYNC) | 1037 | else if (bp->b_flags & XBF_ASYNC) |
1038 | xfs_buf_relse(bp); | 1038 | xfs_buf_relse(bp); |
1039 | } | 1039 | } |
1040 | 1040 | ||
1041 | void | 1041 | void |
1042 | xfs_buf_ioend( | 1042 | xfs_buf_ioend( |
1043 | xfs_buf_t *bp, | 1043 | xfs_buf_t *bp, |
1044 | int schedule) | 1044 | int schedule) |
1045 | { | 1045 | { |
1046 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); | 1046 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); |
1047 | if (bp->b_error == 0) | 1047 | if (bp->b_error == 0) |
1048 | bp->b_flags |= XBF_DONE; | 1048 | bp->b_flags |= XBF_DONE; |
1049 | 1049 | ||
1050 | XB_TRACE(bp, "iodone", bp->b_iodone); | 1050 | XB_TRACE(bp, "iodone", bp->b_iodone); |
1051 | 1051 | ||
1052 | if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { | 1052 | if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { |
1053 | if (schedule) { | 1053 | if (schedule) { |
1054 | INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); | 1054 | INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); |
1055 | queue_work(xfslogd_workqueue, &bp->b_iodone_work); | 1055 | queue_work(xfslogd_workqueue, &bp->b_iodone_work); |
1056 | } else { | 1056 | } else { |
1057 | xfs_buf_iodone_work(&bp->b_iodone_work); | 1057 | xfs_buf_iodone_work(&bp->b_iodone_work); |
1058 | } | 1058 | } |
1059 | } else { | 1059 | } else { |
1060 | complete(&bp->b_iowait); | 1060 | complete(&bp->b_iowait); |
1061 | } | 1061 | } |
1062 | } | 1062 | } |
1063 | 1063 | ||
1064 | void | 1064 | void |
1065 | xfs_buf_ioerror( | 1065 | xfs_buf_ioerror( |
1066 | xfs_buf_t *bp, | 1066 | xfs_buf_t *bp, |
1067 | int error) | 1067 | int error) |
1068 | { | 1068 | { |
1069 | ASSERT(error >= 0 && error <= 0xffff); | 1069 | ASSERT(error >= 0 && error <= 0xffff); |
1070 | bp->b_error = (unsigned short)error; | 1070 | bp->b_error = (unsigned short)error; |
1071 | XB_TRACE(bp, "ioerror", (unsigned long)error); | 1071 | XB_TRACE(bp, "ioerror", (unsigned long)error); |
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | int | 1074 | int |
1075 | xfs_bawrite( | 1075 | xfs_bawrite( |
1076 | void *mp, | 1076 | void *mp, |
1077 | struct xfs_buf *bp) | 1077 | struct xfs_buf *bp) |
1078 | { | 1078 | { |
1079 | XB_TRACE(bp, "bawrite", 0); | 1079 | XB_TRACE(bp, "bawrite", 0); |
1080 | 1080 | ||
1081 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); | 1081 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); |
1082 | 1082 | ||
1083 | xfs_buf_delwri_dequeue(bp); | 1083 | xfs_buf_delwri_dequeue(bp); |
1084 | 1084 | ||
1085 | bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); | 1085 | bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); |
1086 | bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); | 1086 | bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); |
1087 | 1087 | ||
1088 | bp->b_fspriv3 = mp; | 1088 | bp->b_mount = mp; |
1089 | bp->b_strat = xfs_bdstrat_cb; | 1089 | bp->b_strat = xfs_bdstrat_cb; |
1090 | return xfs_bdstrat_cb(bp); | 1090 | return xfs_bdstrat_cb(bp); |
1091 | } | 1091 | } |
1092 | 1092 | ||
1093 | void | 1093 | void |
1094 | xfs_bdwrite( | 1094 | xfs_bdwrite( |
1095 | void *mp, | 1095 | void *mp, |
1096 | struct xfs_buf *bp) | 1096 | struct xfs_buf *bp) |
1097 | { | 1097 | { |
1098 | XB_TRACE(bp, "bdwrite", 0); | 1098 | XB_TRACE(bp, "bdwrite", 0); |
1099 | 1099 | ||
1100 | bp->b_strat = xfs_bdstrat_cb; | 1100 | bp->b_strat = xfs_bdstrat_cb; |
1101 | bp->b_fspriv3 = mp; | 1101 | bp->b_mount = mp; |
1102 | 1102 | ||
1103 | bp->b_flags &= ~XBF_READ; | 1103 | bp->b_flags &= ~XBF_READ; |
1104 | bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); | 1104 | bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); |
1105 | 1105 | ||
1106 | xfs_buf_delwri_queue(bp, 1); | 1106 | xfs_buf_delwri_queue(bp, 1); |
1107 | } | 1107 | } |
1108 | 1108 | ||
1109 | STATIC_INLINE void | 1109 | STATIC_INLINE void |
1110 | _xfs_buf_ioend( | 1110 | _xfs_buf_ioend( |
1111 | xfs_buf_t *bp, | 1111 | xfs_buf_t *bp, |
1112 | int schedule) | 1112 | int schedule) |
1113 | { | 1113 | { |
1114 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { | 1114 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { |
1115 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | 1115 | bp->b_flags &= ~_XBF_PAGE_LOCKED; |
1116 | xfs_buf_ioend(bp, schedule); | 1116 | xfs_buf_ioend(bp, schedule); |
1117 | } | 1117 | } |
1118 | } | 1118 | } |
1119 | 1119 | ||
1120 | STATIC void | 1120 | STATIC void |
1121 | xfs_buf_bio_end_io( | 1121 | xfs_buf_bio_end_io( |
1122 | struct bio *bio, | 1122 | struct bio *bio, |
1123 | int error) | 1123 | int error) |
1124 | { | 1124 | { |
1125 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; | 1125 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; |
1126 | unsigned int blocksize = bp->b_target->bt_bsize; | 1126 | unsigned int blocksize = bp->b_target->bt_bsize; |
1127 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | 1127 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; |
1128 | 1128 | ||
1129 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 1129 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
1130 | bp->b_error = EIO; | 1130 | bp->b_error = EIO; |
1131 | 1131 | ||
1132 | do { | 1132 | do { |
1133 | struct page *page = bvec->bv_page; | 1133 | struct page *page = bvec->bv_page; |
1134 | 1134 | ||
1135 | ASSERT(!PagePrivate(page)); | 1135 | ASSERT(!PagePrivate(page)); |
1136 | if (unlikely(bp->b_error)) { | 1136 | if (unlikely(bp->b_error)) { |
1137 | if (bp->b_flags & XBF_READ) | 1137 | if (bp->b_flags & XBF_READ) |
1138 | ClearPageUptodate(page); | 1138 | ClearPageUptodate(page); |
1139 | } else if (blocksize >= PAGE_CACHE_SIZE) { | 1139 | } else if (blocksize >= PAGE_CACHE_SIZE) { |
1140 | SetPageUptodate(page); | 1140 | SetPageUptodate(page); |
1141 | } else if (!PagePrivate(page) && | 1141 | } else if (!PagePrivate(page) && |
1142 | (bp->b_flags & _XBF_PAGE_CACHE)) { | 1142 | (bp->b_flags & _XBF_PAGE_CACHE)) { |
1143 | set_page_region(page, bvec->bv_offset, bvec->bv_len); | 1143 | set_page_region(page, bvec->bv_offset, bvec->bv_len); |
1144 | } | 1144 | } |
1145 | 1145 | ||
1146 | if (--bvec >= bio->bi_io_vec) | 1146 | if (--bvec >= bio->bi_io_vec) |
1147 | prefetchw(&bvec->bv_page->flags); | 1147 | prefetchw(&bvec->bv_page->flags); |
1148 | 1148 | ||
1149 | if (bp->b_flags & _XBF_PAGE_LOCKED) | 1149 | if (bp->b_flags & _XBF_PAGE_LOCKED) |
1150 | unlock_page(page); | 1150 | unlock_page(page); |
1151 | } while (bvec >= bio->bi_io_vec); | 1151 | } while (bvec >= bio->bi_io_vec); |
1152 | 1152 | ||
1153 | _xfs_buf_ioend(bp, 1); | 1153 | _xfs_buf_ioend(bp, 1); |
1154 | bio_put(bio); | 1154 | bio_put(bio); |
1155 | } | 1155 | } |
1156 | 1156 | ||
1157 | STATIC void | 1157 | STATIC void |
1158 | _xfs_buf_ioapply( | 1158 | _xfs_buf_ioapply( |
1159 | xfs_buf_t *bp) | 1159 | xfs_buf_t *bp) |
1160 | { | 1160 | { |
1161 | int rw, map_i, total_nr_pages, nr_pages; | 1161 | int rw, map_i, total_nr_pages, nr_pages; |
1162 | struct bio *bio; | 1162 | struct bio *bio; |
1163 | int offset = bp->b_offset; | 1163 | int offset = bp->b_offset; |
1164 | int size = bp->b_count_desired; | 1164 | int size = bp->b_count_desired; |
1165 | sector_t sector = bp->b_bn; | 1165 | sector_t sector = bp->b_bn; |
1166 | unsigned int blocksize = bp->b_target->bt_bsize; | 1166 | unsigned int blocksize = bp->b_target->bt_bsize; |
1167 | 1167 | ||
1168 | total_nr_pages = bp->b_page_count; | 1168 | total_nr_pages = bp->b_page_count; |
1169 | map_i = 0; | 1169 | map_i = 0; |
1170 | 1170 | ||
1171 | if (bp->b_flags & XBF_ORDERED) { | 1171 | if (bp->b_flags & XBF_ORDERED) { |
1172 | ASSERT(!(bp->b_flags & XBF_READ)); | 1172 | ASSERT(!(bp->b_flags & XBF_READ)); |
1173 | rw = WRITE_BARRIER; | 1173 | rw = WRITE_BARRIER; |
1174 | } else if (bp->b_flags & _XBF_RUN_QUEUES) { | 1174 | } else if (bp->b_flags & _XBF_RUN_QUEUES) { |
1175 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); | 1175 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); |
1176 | bp->b_flags &= ~_XBF_RUN_QUEUES; | 1176 | bp->b_flags &= ~_XBF_RUN_QUEUES; |
1177 | rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; | 1177 | rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; |
1178 | } else { | 1178 | } else { |
1179 | rw = (bp->b_flags & XBF_WRITE) ? WRITE : | 1179 | rw = (bp->b_flags & XBF_WRITE) ? WRITE : |
1180 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; | 1180 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; |
1181 | } | 1181 | } |
1182 | 1182 | ||
1183 | /* Special code path for reading a sub page size buffer in -- | 1183 | /* Special code path for reading a sub page size buffer in -- |
1184 | * we populate up the whole page, and hence the other metadata | 1184 | * we populate up the whole page, and hence the other metadata |
1185 | * in the same page. This optimization is only valid when the | 1185 | * in the same page. This optimization is only valid when the |
1186 | * filesystem block size is not smaller than the page size. | 1186 | * filesystem block size is not smaller than the page size. |
1187 | */ | 1187 | */ |
1188 | if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && | 1188 | if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && |
1189 | ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == | 1189 | ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == |
1190 | (XBF_READ|_XBF_PAGE_LOCKED)) && | 1190 | (XBF_READ|_XBF_PAGE_LOCKED)) && |
1191 | (blocksize >= PAGE_CACHE_SIZE)) { | 1191 | (blocksize >= PAGE_CACHE_SIZE)) { |
1192 | bio = bio_alloc(GFP_NOIO, 1); | 1192 | bio = bio_alloc(GFP_NOIO, 1); |
1193 | 1193 | ||
1194 | bio->bi_bdev = bp->b_target->bt_bdev; | 1194 | bio->bi_bdev = bp->b_target->bt_bdev; |
1195 | bio->bi_sector = sector - (offset >> BBSHIFT); | 1195 | bio->bi_sector = sector - (offset >> BBSHIFT); |
1196 | bio->bi_end_io = xfs_buf_bio_end_io; | 1196 | bio->bi_end_io = xfs_buf_bio_end_io; |
1197 | bio->bi_private = bp; | 1197 | bio->bi_private = bp; |
1198 | 1198 | ||
1199 | bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); | 1199 | bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); |
1200 | size = 0; | 1200 | size = 0; |
1201 | 1201 | ||
1202 | atomic_inc(&bp->b_io_remaining); | 1202 | atomic_inc(&bp->b_io_remaining); |
1203 | 1203 | ||
1204 | goto submit_io; | 1204 | goto submit_io; |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | next_chunk: | 1207 | next_chunk: |
1208 | atomic_inc(&bp->b_io_remaining); | 1208 | atomic_inc(&bp->b_io_remaining); |
1209 | nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); | 1209 | nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); |
1210 | if (nr_pages > total_nr_pages) | 1210 | if (nr_pages > total_nr_pages) |
1211 | nr_pages = total_nr_pages; | 1211 | nr_pages = total_nr_pages; |
1212 | 1212 | ||
1213 | bio = bio_alloc(GFP_NOIO, nr_pages); | 1213 | bio = bio_alloc(GFP_NOIO, nr_pages); |
1214 | bio->bi_bdev = bp->b_target->bt_bdev; | 1214 | bio->bi_bdev = bp->b_target->bt_bdev; |
1215 | bio->bi_sector = sector; | 1215 | bio->bi_sector = sector; |
1216 | bio->bi_end_io = xfs_buf_bio_end_io; | 1216 | bio->bi_end_io = xfs_buf_bio_end_io; |
1217 | bio->bi_private = bp; | 1217 | bio->bi_private = bp; |
1218 | 1218 | ||
1219 | for (; size && nr_pages; nr_pages--, map_i++) { | 1219 | for (; size && nr_pages; nr_pages--, map_i++) { |
1220 | int rbytes, nbytes = PAGE_CACHE_SIZE - offset; | 1220 | int rbytes, nbytes = PAGE_CACHE_SIZE - offset; |
1221 | 1221 | ||
1222 | if (nbytes > size) | 1222 | if (nbytes > size) |
1223 | nbytes = size; | 1223 | nbytes = size; |
1224 | 1224 | ||
1225 | rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); | 1225 | rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); |
1226 | if (rbytes < nbytes) | 1226 | if (rbytes < nbytes) |
1227 | break; | 1227 | break; |
1228 | 1228 | ||
1229 | offset = 0; | 1229 | offset = 0; |
1230 | sector += nbytes >> BBSHIFT; | 1230 | sector += nbytes >> BBSHIFT; |
1231 | size -= nbytes; | 1231 | size -= nbytes; |
1232 | total_nr_pages--; | 1232 | total_nr_pages--; |
1233 | } | 1233 | } |
1234 | 1234 | ||
1235 | submit_io: | 1235 | submit_io: |
1236 | if (likely(bio->bi_size)) { | 1236 | if (likely(bio->bi_size)) { |
1237 | submit_bio(rw, bio); | 1237 | submit_bio(rw, bio); |
1238 | if (size) | 1238 | if (size) |
1239 | goto next_chunk; | 1239 | goto next_chunk; |
1240 | } else { | 1240 | } else { |
1241 | bio_put(bio); | 1241 | bio_put(bio); |
1242 | xfs_buf_ioerror(bp, EIO); | 1242 | xfs_buf_ioerror(bp, EIO); |
1243 | } | 1243 | } |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | int | 1246 | int |
1247 | xfs_buf_iorequest( | 1247 | xfs_buf_iorequest( |
1248 | xfs_buf_t *bp) | 1248 | xfs_buf_t *bp) |
1249 | { | 1249 | { |
1250 | XB_TRACE(bp, "iorequest", 0); | 1250 | XB_TRACE(bp, "iorequest", 0); |
1251 | 1251 | ||
1252 | if (bp->b_flags & XBF_DELWRI) { | 1252 | if (bp->b_flags & XBF_DELWRI) { |
1253 | xfs_buf_delwri_queue(bp, 1); | 1253 | xfs_buf_delwri_queue(bp, 1); |
1254 | return 0; | 1254 | return 0; |
1255 | } | 1255 | } |
1256 | 1256 | ||
1257 | if (bp->b_flags & XBF_WRITE) { | 1257 | if (bp->b_flags & XBF_WRITE) { |
1258 | xfs_buf_wait_unpin(bp); | 1258 | xfs_buf_wait_unpin(bp); |
1259 | } | 1259 | } |
1260 | 1260 | ||
1261 | xfs_buf_hold(bp); | 1261 | xfs_buf_hold(bp); |
1262 | 1262 | ||
1263 | /* Set the count to 1 initially, this will stop an I/O | 1263 | /* Set the count to 1 initially, this will stop an I/O |
1264 | * completion callout which happens before we have started | 1264 | * completion callout which happens before we have started |
1265 | * all the I/O from calling xfs_buf_ioend too early. | 1265 | * all the I/O from calling xfs_buf_ioend too early. |
1266 | */ | 1266 | */ |
1267 | atomic_set(&bp->b_io_remaining, 1); | 1267 | atomic_set(&bp->b_io_remaining, 1); |
1268 | _xfs_buf_ioapply(bp); | 1268 | _xfs_buf_ioapply(bp); |
1269 | _xfs_buf_ioend(bp, 0); | 1269 | _xfs_buf_ioend(bp, 0); |
1270 | 1270 | ||
1271 | xfs_buf_rele(bp); | 1271 | xfs_buf_rele(bp); |
1272 | return 0; | 1272 | return 0; |
1273 | } | 1273 | } |
1274 | 1274 | ||
1275 | /* | 1275 | /* |
1276 | * Waits for I/O to complete on the buffer supplied. | 1276 | * Waits for I/O to complete on the buffer supplied. |
1277 | * It returns immediately if no I/O is pending. | 1277 | * It returns immediately if no I/O is pending. |
1278 | * It returns the I/O error code, if any, or 0 if there was no error. | 1278 | * It returns the I/O error code, if any, or 0 if there was no error. |
1279 | */ | 1279 | */ |
1280 | int | 1280 | int |
1281 | xfs_buf_iowait( | 1281 | xfs_buf_iowait( |
1282 | xfs_buf_t *bp) | 1282 | xfs_buf_t *bp) |
1283 | { | 1283 | { |
1284 | XB_TRACE(bp, "iowait", 0); | 1284 | XB_TRACE(bp, "iowait", 0); |
1285 | if (atomic_read(&bp->b_io_remaining)) | 1285 | if (atomic_read(&bp->b_io_remaining)) |
1286 | blk_run_address_space(bp->b_target->bt_mapping); | 1286 | blk_run_address_space(bp->b_target->bt_mapping); |
1287 | wait_for_completion(&bp->b_iowait); | 1287 | wait_for_completion(&bp->b_iowait); |
1288 | XB_TRACE(bp, "iowaited", (long)bp->b_error); | 1288 | XB_TRACE(bp, "iowaited", (long)bp->b_error); |
1289 | return bp->b_error; | 1289 | return bp->b_error; |
1290 | } | 1290 | } |
1291 | 1291 | ||
1292 | xfs_caddr_t | 1292 | xfs_caddr_t |
1293 | xfs_buf_offset( | 1293 | xfs_buf_offset( |
1294 | xfs_buf_t *bp, | 1294 | xfs_buf_t *bp, |
1295 | size_t offset) | 1295 | size_t offset) |
1296 | { | 1296 | { |
1297 | struct page *page; | 1297 | struct page *page; |
1298 | 1298 | ||
1299 | if (bp->b_flags & XBF_MAPPED) | 1299 | if (bp->b_flags & XBF_MAPPED) |
1300 | return XFS_BUF_PTR(bp) + offset; | 1300 | return XFS_BUF_PTR(bp) + offset; |
1301 | 1301 | ||
1302 | offset += bp->b_offset; | 1302 | offset += bp->b_offset; |
1303 | page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; | 1303 | page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; |
1304 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); | 1304 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); |
1305 | } | 1305 | } |
1306 | 1306 | ||
1307 | /* | 1307 | /* |
1308 | * Move data into or out of a buffer. | 1308 | * Move data into or out of a buffer. |
1309 | */ | 1309 | */ |
1310 | void | 1310 | void |
1311 | xfs_buf_iomove( | 1311 | xfs_buf_iomove( |
1312 | xfs_buf_t *bp, /* buffer to process */ | 1312 | xfs_buf_t *bp, /* buffer to process */ |
1313 | size_t boff, /* starting buffer offset */ | 1313 | size_t boff, /* starting buffer offset */ |
1314 | size_t bsize, /* length to copy */ | 1314 | size_t bsize, /* length to copy */ |
1315 | caddr_t data, /* data address */ | 1315 | caddr_t data, /* data address */ |
1316 | xfs_buf_rw_t mode) /* read/write/zero flag */ | 1316 | xfs_buf_rw_t mode) /* read/write/zero flag */ |
1317 | { | 1317 | { |
1318 | size_t bend, cpoff, csize; | 1318 | size_t bend, cpoff, csize; |
1319 | struct page *page; | 1319 | struct page *page; |
1320 | 1320 | ||
1321 | bend = boff + bsize; | 1321 | bend = boff + bsize; |
1322 | while (boff < bend) { | 1322 | while (boff < bend) { |
1323 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; | 1323 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; |
1324 | cpoff = xfs_buf_poff(boff + bp->b_offset); | 1324 | cpoff = xfs_buf_poff(boff + bp->b_offset); |
1325 | csize = min_t(size_t, | 1325 | csize = min_t(size_t, |
1326 | PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); | 1326 | PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); |
1327 | 1327 | ||
1328 | ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); | 1328 | ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); |
1329 | 1329 | ||
1330 | switch (mode) { | 1330 | switch (mode) { |
1331 | case XBRW_ZERO: | 1331 | case XBRW_ZERO: |
1332 | memset(page_address(page) + cpoff, 0, csize); | 1332 | memset(page_address(page) + cpoff, 0, csize); |
1333 | break; | 1333 | break; |
1334 | case XBRW_READ: | 1334 | case XBRW_READ: |
1335 | memcpy(data, page_address(page) + cpoff, csize); | 1335 | memcpy(data, page_address(page) + cpoff, csize); |
1336 | break; | 1336 | break; |
1337 | case XBRW_WRITE: | 1337 | case XBRW_WRITE: |
1338 | memcpy(page_address(page) + cpoff, data, csize); | 1338 | memcpy(page_address(page) + cpoff, data, csize); |
1339 | } | 1339 | } |
1340 | 1340 | ||
1341 | boff += csize; | 1341 | boff += csize; |
1342 | data += csize; | 1342 | data += csize; |
1343 | } | 1343 | } |
1344 | } | 1344 | } |
1345 | 1345 | ||
1346 | /* | 1346 | /* |
1347 | * Handling of buffer targets (buftargs). | 1347 | * Handling of buffer targets (buftargs). |
1348 | */ | 1348 | */ |
1349 | 1349 | ||
1350 | /* | 1350 | /* |
1351 | * Wait for any bufs with callbacks that have been submitted but | 1351 | * Wait for any bufs with callbacks that have been submitted but |
1352 | * have not yet returned... walk the hash list for the target. | 1352 | * have not yet returned... walk the hash list for the target. |
1353 | */ | 1353 | */ |
1354 | void | 1354 | void |
1355 | xfs_wait_buftarg( | 1355 | xfs_wait_buftarg( |
1356 | xfs_buftarg_t *btp) | 1356 | xfs_buftarg_t *btp) |
1357 | { | 1357 | { |
1358 | xfs_buf_t *bp, *n; | 1358 | xfs_buf_t *bp, *n; |
1359 | xfs_bufhash_t *hash; | 1359 | xfs_bufhash_t *hash; |
1360 | uint i; | 1360 | uint i; |
1361 | 1361 | ||
1362 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1362 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { |
1363 | hash = &btp->bt_hash[i]; | 1363 | hash = &btp->bt_hash[i]; |
1364 | again: | 1364 | again: |
1365 | spin_lock(&hash->bh_lock); | 1365 | spin_lock(&hash->bh_lock); |
1366 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { | 1366 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { |
1367 | ASSERT(btp == bp->b_target); | 1367 | ASSERT(btp == bp->b_target); |
1368 | if (!(bp->b_flags & XBF_FS_MANAGED)) { | 1368 | if (!(bp->b_flags & XBF_FS_MANAGED)) { |
1369 | spin_unlock(&hash->bh_lock); | 1369 | spin_unlock(&hash->bh_lock); |
1370 | /* | 1370 | /* |
1371 | * Catch superblock reference count leaks | 1371 | * Catch superblock reference count leaks |
1372 | * immediately | 1372 | * immediately |
1373 | */ | 1373 | */ |
1374 | BUG_ON(bp->b_bn == 0); | 1374 | BUG_ON(bp->b_bn == 0); |
1375 | delay(100); | 1375 | delay(100); |
1376 | goto again; | 1376 | goto again; |
1377 | } | 1377 | } |
1378 | } | 1378 | } |
1379 | spin_unlock(&hash->bh_lock); | 1379 | spin_unlock(&hash->bh_lock); |
1380 | } | 1380 | } |
1381 | } | 1381 | } |
1382 | 1382 | ||
1383 | /* | 1383 | /* |
1384 | * Allocate buffer hash table for a given target. | 1384 | * Allocate buffer hash table for a given target. |
1385 | * For devices containing metadata (i.e. not the log/realtime devices) | 1385 | * For devices containing metadata (i.e. not the log/realtime devices) |
1386 | * we need to allocate a much larger hash table. | 1386 | * we need to allocate a much larger hash table. |
1387 | */ | 1387 | */ |
1388 | STATIC void | 1388 | STATIC void |
1389 | xfs_alloc_bufhash( | 1389 | xfs_alloc_bufhash( |
1390 | xfs_buftarg_t *btp, | 1390 | xfs_buftarg_t *btp, |
1391 | int external) | 1391 | int external) |
1392 | { | 1392 | { |
1393 | unsigned int i; | 1393 | unsigned int i; |
1394 | 1394 | ||
1395 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ | 1395 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ |
1396 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; | 1396 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; |
1397 | btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * | 1397 | btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * |
1398 | sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); | 1398 | sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); |
1399 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1399 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { |
1400 | spin_lock_init(&btp->bt_hash[i].bh_lock); | 1400 | spin_lock_init(&btp->bt_hash[i].bh_lock); |
1401 | INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); | 1401 | INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); |
1402 | } | 1402 | } |
1403 | } | 1403 | } |
1404 | 1404 | ||
1405 | STATIC void | 1405 | STATIC void |
1406 | xfs_free_bufhash( | 1406 | xfs_free_bufhash( |
1407 | xfs_buftarg_t *btp) | 1407 | xfs_buftarg_t *btp) |
1408 | { | 1408 | { |
1409 | kmem_free(btp->bt_hash); | 1409 | kmem_free(btp->bt_hash); |
1410 | btp->bt_hash = NULL; | 1410 | btp->bt_hash = NULL; |
1411 | } | 1411 | } |
1412 | 1412 | ||
1413 | /* | 1413 | /* |
1414 | * buftarg list for delwrite queue processing | 1414 | * buftarg list for delwrite queue processing |
1415 | */ | 1415 | */ |
1416 | static LIST_HEAD(xfs_buftarg_list); | 1416 | static LIST_HEAD(xfs_buftarg_list); |
1417 | static DEFINE_SPINLOCK(xfs_buftarg_lock); | 1417 | static DEFINE_SPINLOCK(xfs_buftarg_lock); |
1418 | 1418 | ||
1419 | STATIC void | 1419 | STATIC void |
1420 | xfs_register_buftarg( | 1420 | xfs_register_buftarg( |
1421 | xfs_buftarg_t *btp) | 1421 | xfs_buftarg_t *btp) |
1422 | { | 1422 | { |
1423 | spin_lock(&xfs_buftarg_lock); | 1423 | spin_lock(&xfs_buftarg_lock); |
1424 | list_add(&btp->bt_list, &xfs_buftarg_list); | 1424 | list_add(&btp->bt_list, &xfs_buftarg_list); |
1425 | spin_unlock(&xfs_buftarg_lock); | 1425 | spin_unlock(&xfs_buftarg_lock); |
1426 | } | 1426 | } |
1427 | 1427 | ||
1428 | STATIC void | 1428 | STATIC void |
1429 | xfs_unregister_buftarg( | 1429 | xfs_unregister_buftarg( |
1430 | xfs_buftarg_t *btp) | 1430 | xfs_buftarg_t *btp) |
1431 | { | 1431 | { |
1432 | spin_lock(&xfs_buftarg_lock); | 1432 | spin_lock(&xfs_buftarg_lock); |
1433 | list_del(&btp->bt_list); | 1433 | list_del(&btp->bt_list); |
1434 | spin_unlock(&xfs_buftarg_lock); | 1434 | spin_unlock(&xfs_buftarg_lock); |
1435 | } | 1435 | } |
1436 | 1436 | ||
1437 | void | 1437 | void |
1438 | xfs_free_buftarg( | 1438 | xfs_free_buftarg( |
1439 | xfs_buftarg_t *btp) | 1439 | xfs_buftarg_t *btp) |
1440 | { | 1440 | { |
1441 | xfs_flush_buftarg(btp, 1); | 1441 | xfs_flush_buftarg(btp, 1); |
1442 | xfs_blkdev_issue_flush(btp); | 1442 | xfs_blkdev_issue_flush(btp); |
1443 | xfs_free_bufhash(btp); | 1443 | xfs_free_bufhash(btp); |
1444 | iput(btp->bt_mapping->host); | 1444 | iput(btp->bt_mapping->host); |
1445 | 1445 | ||
1446 | /* Unregister the buftarg first so that we don't get a | 1446 | /* Unregister the buftarg first so that we don't get a |
1447 | * wakeup finding a non-existent task | 1447 | * wakeup finding a non-existent task |
1448 | */ | 1448 | */ |
1449 | xfs_unregister_buftarg(btp); | 1449 | xfs_unregister_buftarg(btp); |
1450 | kthread_stop(btp->bt_task); | 1450 | kthread_stop(btp->bt_task); |
1451 | 1451 | ||
1452 | kmem_free(btp); | 1452 | kmem_free(btp); |
1453 | } | 1453 | } |
1454 | 1454 | ||
1455 | STATIC int | 1455 | STATIC int |
1456 | xfs_setsize_buftarg_flags( | 1456 | xfs_setsize_buftarg_flags( |
1457 | xfs_buftarg_t *btp, | 1457 | xfs_buftarg_t *btp, |
1458 | unsigned int blocksize, | 1458 | unsigned int blocksize, |
1459 | unsigned int sectorsize, | 1459 | unsigned int sectorsize, |
1460 | int verbose) | 1460 | int verbose) |
1461 | { | 1461 | { |
1462 | btp->bt_bsize = blocksize; | 1462 | btp->bt_bsize = blocksize; |
1463 | btp->bt_sshift = ffs(sectorsize) - 1; | 1463 | btp->bt_sshift = ffs(sectorsize) - 1; |
1464 | btp->bt_smask = sectorsize - 1; | 1464 | btp->bt_smask = sectorsize - 1; |
1465 | 1465 | ||
1466 | if (set_blocksize(btp->bt_bdev, sectorsize)) { | 1466 | if (set_blocksize(btp->bt_bdev, sectorsize)) { |
1467 | printk(KERN_WARNING | 1467 | printk(KERN_WARNING |
1468 | "XFS: Cannot set_blocksize to %u on device %s\n", | 1468 | "XFS: Cannot set_blocksize to %u on device %s\n", |
1469 | sectorsize, XFS_BUFTARG_NAME(btp)); | 1469 | sectorsize, XFS_BUFTARG_NAME(btp)); |
1470 | return EINVAL; | 1470 | return EINVAL; |
1471 | } | 1471 | } |
1472 | 1472 | ||
1473 | if (verbose && | 1473 | if (verbose && |
1474 | (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { | 1474 | (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { |
1475 | printk(KERN_WARNING | 1475 | printk(KERN_WARNING |
1476 | "XFS: %u byte sectors in use on device %s. " | 1476 | "XFS: %u byte sectors in use on device %s. " |
1477 | "This is suboptimal; %u or greater is ideal.\n", | 1477 | "This is suboptimal; %u or greater is ideal.\n", |
1478 | sectorsize, XFS_BUFTARG_NAME(btp), | 1478 | sectorsize, XFS_BUFTARG_NAME(btp), |
1479 | (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); | 1479 | (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); |
1480 | } | 1480 | } |
1481 | 1481 | ||
1482 | return 0; | 1482 | return 0; |
1483 | } | 1483 | } |
1484 | 1484 | ||
1485 | /* | 1485 | /* |
1486 | * When allocating the initial buffer target we have not yet | 1486 | * When allocating the initial buffer target we have not yet |
1487 | * read in the superblock, so don't know what sized sectors | 1487 | * read in the superblock, so don't know what sized sectors |
1488 | * are being used is at this early stage. Play safe. | 1488 | * are being used is at this early stage. Play safe. |
1489 | */ | 1489 | */ |
1490 | STATIC int | 1490 | STATIC int |
1491 | xfs_setsize_buftarg_early( | 1491 | xfs_setsize_buftarg_early( |
1492 | xfs_buftarg_t *btp, | 1492 | xfs_buftarg_t *btp, |
1493 | struct block_device *bdev) | 1493 | struct block_device *bdev) |
1494 | { | 1494 | { |
1495 | return xfs_setsize_buftarg_flags(btp, | 1495 | return xfs_setsize_buftarg_flags(btp, |
1496 | PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0); | 1496 | PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0); |
1497 | } | 1497 | } |
1498 | 1498 | ||
1499 | int | 1499 | int |
1500 | xfs_setsize_buftarg( | 1500 | xfs_setsize_buftarg( |
1501 | xfs_buftarg_t *btp, | 1501 | xfs_buftarg_t *btp, |
1502 | unsigned int blocksize, | 1502 | unsigned int blocksize, |
1503 | unsigned int sectorsize) | 1503 | unsigned int sectorsize) |
1504 | { | 1504 | { |
1505 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); | 1505 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); |
1506 | } | 1506 | } |
1507 | 1507 | ||
1508 | STATIC int | 1508 | STATIC int |
1509 | xfs_mapping_buftarg( | 1509 | xfs_mapping_buftarg( |
1510 | xfs_buftarg_t *btp, | 1510 | xfs_buftarg_t *btp, |
1511 | struct block_device *bdev) | 1511 | struct block_device *bdev) |
1512 | { | 1512 | { |
1513 | struct backing_dev_info *bdi; | 1513 | struct backing_dev_info *bdi; |
1514 | struct inode *inode; | 1514 | struct inode *inode; |
1515 | struct address_space *mapping; | 1515 | struct address_space *mapping; |
1516 | static const struct address_space_operations mapping_aops = { | 1516 | static const struct address_space_operations mapping_aops = { |
1517 | .sync_page = block_sync_page, | 1517 | .sync_page = block_sync_page, |
1518 | .migratepage = fail_migrate_page, | 1518 | .migratepage = fail_migrate_page, |
1519 | }; | 1519 | }; |
1520 | 1520 | ||
1521 | inode = new_inode(bdev->bd_inode->i_sb); | 1521 | inode = new_inode(bdev->bd_inode->i_sb); |
1522 | if (!inode) { | 1522 | if (!inode) { |
1523 | printk(KERN_WARNING | 1523 | printk(KERN_WARNING |
1524 | "XFS: Cannot allocate mapping inode for device %s\n", | 1524 | "XFS: Cannot allocate mapping inode for device %s\n", |
1525 | XFS_BUFTARG_NAME(btp)); | 1525 | XFS_BUFTARG_NAME(btp)); |
1526 | return ENOMEM; | 1526 | return ENOMEM; |
1527 | } | 1527 | } |
1528 | inode->i_mode = S_IFBLK; | 1528 | inode->i_mode = S_IFBLK; |
1529 | inode->i_bdev = bdev; | 1529 | inode->i_bdev = bdev; |
1530 | inode->i_rdev = bdev->bd_dev; | 1530 | inode->i_rdev = bdev->bd_dev; |
1531 | bdi = blk_get_backing_dev_info(bdev); | 1531 | bdi = blk_get_backing_dev_info(bdev); |
1532 | if (!bdi) | 1532 | if (!bdi) |
1533 | bdi = &default_backing_dev_info; | 1533 | bdi = &default_backing_dev_info; |
1534 | mapping = &inode->i_data; | 1534 | mapping = &inode->i_data; |
1535 | mapping->a_ops = &mapping_aops; | 1535 | mapping->a_ops = &mapping_aops; |
1536 | mapping->backing_dev_info = bdi; | 1536 | mapping->backing_dev_info = bdi; |
1537 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 1537 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
1538 | btp->bt_mapping = mapping; | 1538 | btp->bt_mapping = mapping; |
1539 | return 0; | 1539 | return 0; |
1540 | } | 1540 | } |
1541 | 1541 | ||
1542 | STATIC int | 1542 | STATIC int |
1543 | xfs_alloc_delwrite_queue( | 1543 | xfs_alloc_delwrite_queue( |
1544 | xfs_buftarg_t *btp) | 1544 | xfs_buftarg_t *btp) |
1545 | { | 1545 | { |
1546 | int error = 0; | 1546 | int error = 0; |
1547 | 1547 | ||
1548 | INIT_LIST_HEAD(&btp->bt_list); | 1548 | INIT_LIST_HEAD(&btp->bt_list); |
1549 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | 1549 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); |
1550 | spin_lock_init(&btp->bt_delwrite_lock); | 1550 | spin_lock_init(&btp->bt_delwrite_lock); |
1551 | btp->bt_flags = 0; | 1551 | btp->bt_flags = 0; |
1552 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); | 1552 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); |
1553 | if (IS_ERR(btp->bt_task)) { | 1553 | if (IS_ERR(btp->bt_task)) { |
1554 | error = PTR_ERR(btp->bt_task); | 1554 | error = PTR_ERR(btp->bt_task); |
1555 | goto out_error; | 1555 | goto out_error; |
1556 | } | 1556 | } |
1557 | xfs_register_buftarg(btp); | 1557 | xfs_register_buftarg(btp); |
1558 | out_error: | 1558 | out_error: |
1559 | return error; | 1559 | return error; |
1560 | } | 1560 | } |
1561 | 1561 | ||
1562 | xfs_buftarg_t * | 1562 | xfs_buftarg_t * |
1563 | xfs_alloc_buftarg( | 1563 | xfs_alloc_buftarg( |
1564 | struct block_device *bdev, | 1564 | struct block_device *bdev, |
1565 | int external) | 1565 | int external) |
1566 | { | 1566 | { |
1567 | xfs_buftarg_t *btp; | 1567 | xfs_buftarg_t *btp; |
1568 | 1568 | ||
1569 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); | 1569 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); |
1570 | 1570 | ||
1571 | btp->bt_dev = bdev->bd_dev; | 1571 | btp->bt_dev = bdev->bd_dev; |
1572 | btp->bt_bdev = bdev; | 1572 | btp->bt_bdev = bdev; |
1573 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1573 | if (xfs_setsize_buftarg_early(btp, bdev)) |
1574 | goto error; | 1574 | goto error; |
1575 | if (xfs_mapping_buftarg(btp, bdev)) | 1575 | if (xfs_mapping_buftarg(btp, bdev)) |
1576 | goto error; | 1576 | goto error; |
1577 | if (xfs_alloc_delwrite_queue(btp)) | 1577 | if (xfs_alloc_delwrite_queue(btp)) |
1578 | goto error; | 1578 | goto error; |
1579 | xfs_alloc_bufhash(btp, external); | 1579 | xfs_alloc_bufhash(btp, external); |
1580 | return btp; | 1580 | return btp; |
1581 | 1581 | ||
1582 | error: | 1582 | error: |
1583 | kmem_free(btp); | 1583 | kmem_free(btp); |
1584 | return NULL; | 1584 | return NULL; |
1585 | } | 1585 | } |
1586 | 1586 | ||
1587 | 1587 | ||
1588 | /* | 1588 | /* |
1589 | * Delayed write buffer handling | 1589 | * Delayed write buffer handling |
1590 | */ | 1590 | */ |
1591 | STATIC void | 1591 | STATIC void |
1592 | xfs_buf_delwri_queue( | 1592 | xfs_buf_delwri_queue( |
1593 | xfs_buf_t *bp, | 1593 | xfs_buf_t *bp, |
1594 | int unlock) | 1594 | int unlock) |
1595 | { | 1595 | { |
1596 | struct list_head *dwq = &bp->b_target->bt_delwrite_queue; | 1596 | struct list_head *dwq = &bp->b_target->bt_delwrite_queue; |
1597 | spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; | 1597 | spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; |
1598 | 1598 | ||
1599 | XB_TRACE(bp, "delwri_q", (long)unlock); | 1599 | XB_TRACE(bp, "delwri_q", (long)unlock); |
1600 | ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); | 1600 | ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); |
1601 | 1601 | ||
1602 | spin_lock(dwlk); | 1602 | spin_lock(dwlk); |
1603 | /* If already in the queue, dequeue and place at tail */ | 1603 | /* If already in the queue, dequeue and place at tail */ |
1604 | if (!list_empty(&bp->b_list)) { | 1604 | if (!list_empty(&bp->b_list)) { |
1605 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | 1605 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); |
1606 | if (unlock) | 1606 | if (unlock) |
1607 | atomic_dec(&bp->b_hold); | 1607 | atomic_dec(&bp->b_hold); |
1608 | list_del(&bp->b_list); | 1608 | list_del(&bp->b_list); |
1609 | } | 1609 | } |
1610 | 1610 | ||
1611 | bp->b_flags |= _XBF_DELWRI_Q; | 1611 | bp->b_flags |= _XBF_DELWRI_Q; |
1612 | list_add_tail(&bp->b_list, dwq); | 1612 | list_add_tail(&bp->b_list, dwq); |
1613 | bp->b_queuetime = jiffies; | 1613 | bp->b_queuetime = jiffies; |
1614 | spin_unlock(dwlk); | 1614 | spin_unlock(dwlk); |
1615 | 1615 | ||
1616 | if (unlock) | 1616 | if (unlock) |
1617 | xfs_buf_unlock(bp); | 1617 | xfs_buf_unlock(bp); |
1618 | } | 1618 | } |
1619 | 1619 | ||
1620 | void | 1620 | void |
1621 | xfs_buf_delwri_dequeue( | 1621 | xfs_buf_delwri_dequeue( |
1622 | xfs_buf_t *bp) | 1622 | xfs_buf_t *bp) |
1623 | { | 1623 | { |
1624 | spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; | 1624 | spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; |
1625 | int dequeued = 0; | 1625 | int dequeued = 0; |
1626 | 1626 | ||
1627 | spin_lock(dwlk); | 1627 | spin_lock(dwlk); |
1628 | if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { | 1628 | if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { |
1629 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | 1629 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); |
1630 | list_del_init(&bp->b_list); | 1630 | list_del_init(&bp->b_list); |
1631 | dequeued = 1; | 1631 | dequeued = 1; |
1632 | } | 1632 | } |
1633 | bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); | 1633 | bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); |
1634 | spin_unlock(dwlk); | 1634 | spin_unlock(dwlk); |
1635 | 1635 | ||
1636 | if (dequeued) | 1636 | if (dequeued) |
1637 | xfs_buf_rele(bp); | 1637 | xfs_buf_rele(bp); |
1638 | 1638 | ||
1639 | XB_TRACE(bp, "delwri_dq", (long)dequeued); | 1639 | XB_TRACE(bp, "delwri_dq", (long)dequeued); |
1640 | } | 1640 | } |
1641 | 1641 | ||
1642 | STATIC void | 1642 | STATIC void |
1643 | xfs_buf_runall_queues( | 1643 | xfs_buf_runall_queues( |
1644 | struct workqueue_struct *queue) | 1644 | struct workqueue_struct *queue) |
1645 | { | 1645 | { |
1646 | flush_workqueue(queue); | 1646 | flush_workqueue(queue); |
1647 | } | 1647 | } |
1648 | 1648 | ||
1649 | STATIC int | 1649 | STATIC int |
1650 | xfsbufd_wakeup( | 1650 | xfsbufd_wakeup( |
1651 | int priority, | 1651 | int priority, |
1652 | gfp_t mask) | 1652 | gfp_t mask) |
1653 | { | 1653 | { |
1654 | xfs_buftarg_t *btp; | 1654 | xfs_buftarg_t *btp; |
1655 | 1655 | ||
1656 | spin_lock(&xfs_buftarg_lock); | 1656 | spin_lock(&xfs_buftarg_lock); |
1657 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { | 1657 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { |
1658 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) | 1658 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) |
1659 | continue; | 1659 | continue; |
1660 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); | 1660 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); |
1661 | wake_up_process(btp->bt_task); | 1661 | wake_up_process(btp->bt_task); |
1662 | } | 1662 | } |
1663 | spin_unlock(&xfs_buftarg_lock); | 1663 | spin_unlock(&xfs_buftarg_lock); |
1664 | return 0; | 1664 | return 0; |
1665 | } | 1665 | } |
1666 | 1666 | ||
1667 | /* | 1667 | /* |
1668 | * Move as many buffers as specified to the supplied list | 1668 | * Move as many buffers as specified to the supplied list |
1669 | * idicating if we skipped any buffers to prevent deadlocks. | 1669 | * idicating if we skipped any buffers to prevent deadlocks. |
1670 | */ | 1670 | */ |
1671 | STATIC int | 1671 | STATIC int |
1672 | xfs_buf_delwri_split( | 1672 | xfs_buf_delwri_split( |
1673 | xfs_buftarg_t *target, | 1673 | xfs_buftarg_t *target, |
1674 | struct list_head *list, | 1674 | struct list_head *list, |
1675 | unsigned long age) | 1675 | unsigned long age) |
1676 | { | 1676 | { |
1677 | xfs_buf_t *bp, *n; | 1677 | xfs_buf_t *bp, *n; |
1678 | struct list_head *dwq = &target->bt_delwrite_queue; | 1678 | struct list_head *dwq = &target->bt_delwrite_queue; |
1679 | spinlock_t *dwlk = &target->bt_delwrite_lock; | 1679 | spinlock_t *dwlk = &target->bt_delwrite_lock; |
1680 | int skipped = 0; | 1680 | int skipped = 0; |
1681 | int force; | 1681 | int force; |
1682 | 1682 | ||
1683 | force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); | 1683 | force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); |
1684 | INIT_LIST_HEAD(list); | 1684 | INIT_LIST_HEAD(list); |
1685 | spin_lock(dwlk); | 1685 | spin_lock(dwlk); |
1686 | list_for_each_entry_safe(bp, n, dwq, b_list) { | 1686 | list_for_each_entry_safe(bp, n, dwq, b_list) { |
1687 | XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); | 1687 | XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); |
1688 | ASSERT(bp->b_flags & XBF_DELWRI); | 1688 | ASSERT(bp->b_flags & XBF_DELWRI); |
1689 | 1689 | ||
1690 | if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { | 1690 | if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { |
1691 | if (!force && | 1691 | if (!force && |
1692 | time_before(jiffies, bp->b_queuetime + age)) { | 1692 | time_before(jiffies, bp->b_queuetime + age)) { |
1693 | xfs_buf_unlock(bp); | 1693 | xfs_buf_unlock(bp); |
1694 | break; | 1694 | break; |
1695 | } | 1695 | } |
1696 | 1696 | ||
1697 | bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q| | 1697 | bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q| |
1698 | _XBF_RUN_QUEUES); | 1698 | _XBF_RUN_QUEUES); |
1699 | bp->b_flags |= XBF_WRITE; | 1699 | bp->b_flags |= XBF_WRITE; |
1700 | list_move_tail(&bp->b_list, list); | 1700 | list_move_tail(&bp->b_list, list); |
1701 | } else | 1701 | } else |
1702 | skipped++; | 1702 | skipped++; |
1703 | } | 1703 | } |
1704 | spin_unlock(dwlk); | 1704 | spin_unlock(dwlk); |
1705 | 1705 | ||
1706 | return skipped; | 1706 | return skipped; |
1707 | 1707 | ||
1708 | } | 1708 | } |
1709 | 1709 | ||
1710 | STATIC int | 1710 | STATIC int |
1711 | xfsbufd( | 1711 | xfsbufd( |
1712 | void *data) | 1712 | void *data) |
1713 | { | 1713 | { |
1714 | struct list_head tmp; | 1714 | struct list_head tmp; |
1715 | xfs_buftarg_t *target = (xfs_buftarg_t *)data; | 1715 | xfs_buftarg_t *target = (xfs_buftarg_t *)data; |
1716 | int count; | 1716 | int count; |
1717 | xfs_buf_t *bp; | 1717 | xfs_buf_t *bp; |
1718 | 1718 | ||
1719 | current->flags |= PF_MEMALLOC; | 1719 | current->flags |= PF_MEMALLOC; |
1720 | 1720 | ||
1721 | set_freezable(); | 1721 | set_freezable(); |
1722 | 1722 | ||
1723 | do { | 1723 | do { |
1724 | if (unlikely(freezing(current))) { | 1724 | if (unlikely(freezing(current))) { |
1725 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1725 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
1726 | refrigerator(); | 1726 | refrigerator(); |
1727 | } else { | 1727 | } else { |
1728 | clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1728 | clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
1729 | } | 1729 | } |
1730 | 1730 | ||
1731 | schedule_timeout_interruptible( | 1731 | schedule_timeout_interruptible( |
1732 | xfs_buf_timer_centisecs * msecs_to_jiffies(10)); | 1732 | xfs_buf_timer_centisecs * msecs_to_jiffies(10)); |
1733 | 1733 | ||
1734 | xfs_buf_delwri_split(target, &tmp, | 1734 | xfs_buf_delwri_split(target, &tmp, |
1735 | xfs_buf_age_centisecs * msecs_to_jiffies(10)); | 1735 | xfs_buf_age_centisecs * msecs_to_jiffies(10)); |
1736 | 1736 | ||
1737 | count = 0; | 1737 | count = 0; |
1738 | while (!list_empty(&tmp)) { | 1738 | while (!list_empty(&tmp)) { |
1739 | bp = list_entry(tmp.next, xfs_buf_t, b_list); | 1739 | bp = list_entry(tmp.next, xfs_buf_t, b_list); |
1740 | ASSERT(target == bp->b_target); | 1740 | ASSERT(target == bp->b_target); |
1741 | 1741 | ||
1742 | list_del_init(&bp->b_list); | 1742 | list_del_init(&bp->b_list); |
1743 | xfs_buf_iostrategy(bp); | 1743 | xfs_buf_iostrategy(bp); |
1744 | count++; | 1744 | count++; |
1745 | } | 1745 | } |
1746 | 1746 | ||
1747 | if (as_list_len > 0) | 1747 | if (as_list_len > 0) |
1748 | purge_addresses(); | 1748 | purge_addresses(); |
1749 | if (count) | 1749 | if (count) |
1750 | blk_run_address_space(target->bt_mapping); | 1750 | blk_run_address_space(target->bt_mapping); |
1751 | 1751 | ||
1752 | } while (!kthread_should_stop()); | 1752 | } while (!kthread_should_stop()); |
1753 | 1753 | ||
1754 | return 0; | 1754 | return 0; |
1755 | } | 1755 | } |
1756 | 1756 | ||
1757 | /* | 1757 | /* |
1758 | * Go through all incore buffers, and release buffers if they belong to | 1758 | * Go through all incore buffers, and release buffers if they belong to |
1759 | * the given device. This is used in filesystem error handling to | 1759 | * the given device. This is used in filesystem error handling to |
1760 | * preserve the consistency of its metadata. | 1760 | * preserve the consistency of its metadata. |
1761 | */ | 1761 | */ |
1762 | int | 1762 | int |
1763 | xfs_flush_buftarg( | 1763 | xfs_flush_buftarg( |
1764 | xfs_buftarg_t *target, | 1764 | xfs_buftarg_t *target, |
1765 | int wait) | 1765 | int wait) |
1766 | { | 1766 | { |
1767 | struct list_head tmp; | 1767 | struct list_head tmp; |
1768 | xfs_buf_t *bp, *n; | 1768 | xfs_buf_t *bp, *n; |
1769 | int pincount = 0; | 1769 | int pincount = 0; |
1770 | 1770 | ||
1771 | xfs_buf_runall_queues(xfsdatad_workqueue); | 1771 | xfs_buf_runall_queues(xfsdatad_workqueue); |
1772 | xfs_buf_runall_queues(xfslogd_workqueue); | 1772 | xfs_buf_runall_queues(xfslogd_workqueue); |
1773 | 1773 | ||
1774 | set_bit(XBT_FORCE_FLUSH, &target->bt_flags); | 1774 | set_bit(XBT_FORCE_FLUSH, &target->bt_flags); |
1775 | pincount = xfs_buf_delwri_split(target, &tmp, 0); | 1775 | pincount = xfs_buf_delwri_split(target, &tmp, 0); |
1776 | 1776 | ||
1777 | /* | 1777 | /* |
1778 | * Dropped the delayed write list lock, now walk the temporary list | 1778 | * Dropped the delayed write list lock, now walk the temporary list |
1779 | */ | 1779 | */ |
1780 | list_for_each_entry_safe(bp, n, &tmp, b_list) { | 1780 | list_for_each_entry_safe(bp, n, &tmp, b_list) { |
1781 | ASSERT(target == bp->b_target); | 1781 | ASSERT(target == bp->b_target); |
1782 | if (wait) | 1782 | if (wait) |
1783 | bp->b_flags &= ~XBF_ASYNC; | 1783 | bp->b_flags &= ~XBF_ASYNC; |
1784 | else | 1784 | else |
1785 | list_del_init(&bp->b_list); | 1785 | list_del_init(&bp->b_list); |
1786 | 1786 | ||
1787 | xfs_buf_iostrategy(bp); | 1787 | xfs_buf_iostrategy(bp); |
1788 | } | 1788 | } |
1789 | 1789 | ||
1790 | if (wait) | 1790 | if (wait) |
1791 | blk_run_address_space(target->bt_mapping); | 1791 | blk_run_address_space(target->bt_mapping); |
1792 | 1792 | ||
1793 | /* | 1793 | /* |
1794 | * Remaining list items must be flushed before returning | 1794 | * Remaining list items must be flushed before returning |
1795 | */ | 1795 | */ |
1796 | while (!list_empty(&tmp)) { | 1796 | while (!list_empty(&tmp)) { |
1797 | bp = list_entry(tmp.next, xfs_buf_t, b_list); | 1797 | bp = list_entry(tmp.next, xfs_buf_t, b_list); |
1798 | 1798 | ||
1799 | list_del_init(&bp->b_list); | 1799 | list_del_init(&bp->b_list); |
1800 | xfs_iowait(bp); | 1800 | xfs_iowait(bp); |
1801 | xfs_buf_relse(bp); | 1801 | xfs_buf_relse(bp); |
1802 | } | 1802 | } |
1803 | 1803 | ||
1804 | return pincount; | 1804 | return pincount; |
1805 | } | 1805 | } |
1806 | 1806 | ||
1807 | int __init | 1807 | int __init |
1808 | xfs_buf_init(void) | 1808 | xfs_buf_init(void) |
1809 | { | 1809 | { |
1810 | #ifdef XFS_BUF_TRACE | 1810 | #ifdef XFS_BUF_TRACE |
1811 | xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS); | 1811 | xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS); |
1812 | #endif | 1812 | #endif |
1813 | 1813 | ||
1814 | xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", | 1814 | xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", |
1815 | KM_ZONE_HWALIGN, NULL); | 1815 | KM_ZONE_HWALIGN, NULL); |
1816 | if (!xfs_buf_zone) | 1816 | if (!xfs_buf_zone) |
1817 | goto out_free_trace_buf; | 1817 | goto out_free_trace_buf; |
1818 | 1818 | ||
1819 | xfslogd_workqueue = create_workqueue("xfslogd"); | 1819 | xfslogd_workqueue = create_workqueue("xfslogd"); |
1820 | if (!xfslogd_workqueue) | 1820 | if (!xfslogd_workqueue) |
1821 | goto out_free_buf_zone; | 1821 | goto out_free_buf_zone; |
1822 | 1822 | ||
1823 | xfsdatad_workqueue = create_workqueue("xfsdatad"); | 1823 | xfsdatad_workqueue = create_workqueue("xfsdatad"); |
1824 | if (!xfsdatad_workqueue) | 1824 | if (!xfsdatad_workqueue) |
1825 | goto out_destroy_xfslogd_workqueue; | 1825 | goto out_destroy_xfslogd_workqueue; |
1826 | 1826 | ||
1827 | register_shrinker(&xfs_buf_shake); | 1827 | register_shrinker(&xfs_buf_shake); |
1828 | return 0; | 1828 | return 0; |
1829 | 1829 | ||
1830 | out_destroy_xfslogd_workqueue: | 1830 | out_destroy_xfslogd_workqueue: |
1831 | destroy_workqueue(xfslogd_workqueue); | 1831 | destroy_workqueue(xfslogd_workqueue); |
1832 | out_free_buf_zone: | 1832 | out_free_buf_zone: |
1833 | kmem_zone_destroy(xfs_buf_zone); | 1833 | kmem_zone_destroy(xfs_buf_zone); |
1834 | out_free_trace_buf: | 1834 | out_free_trace_buf: |
1835 | #ifdef XFS_BUF_TRACE | 1835 | #ifdef XFS_BUF_TRACE |
1836 | ktrace_free(xfs_buf_trace_buf); | 1836 | ktrace_free(xfs_buf_trace_buf); |
1837 | #endif | 1837 | #endif |
1838 | return -ENOMEM; | 1838 | return -ENOMEM; |
1839 | } | 1839 | } |
1840 | 1840 | ||
1841 | void | 1841 | void |
1842 | xfs_buf_terminate(void) | 1842 | xfs_buf_terminate(void) |
1843 | { | 1843 | { |
1844 | unregister_shrinker(&xfs_buf_shake); | 1844 | unregister_shrinker(&xfs_buf_shake); |
1845 | destroy_workqueue(xfsdatad_workqueue); | 1845 | destroy_workqueue(xfsdatad_workqueue); |
1846 | destroy_workqueue(xfslogd_workqueue); | 1846 | destroy_workqueue(xfslogd_workqueue); |
1847 | kmem_zone_destroy(xfs_buf_zone); | 1847 | kmem_zone_destroy(xfs_buf_zone); |
1848 | #ifdef XFS_BUF_TRACE | 1848 | #ifdef XFS_BUF_TRACE |
1849 | ktrace_free(xfs_buf_trace_buf); | 1849 | ktrace_free(xfs_buf_trace_buf); |
1850 | #endif | 1850 | #endif |
1851 | } | 1851 | } |
1852 | 1852 | ||
1853 | #ifdef CONFIG_KDB_MODULES | 1853 | #ifdef CONFIG_KDB_MODULES |
1854 | struct list_head * | 1854 | struct list_head * |
1855 | xfs_get_buftarg_list(void) | 1855 | xfs_get_buftarg_list(void) |
1856 | { | 1856 | { |
1857 | return &xfs_buftarg_list; | 1857 | return &xfs_buftarg_list; |
1858 | } | 1858 | } |
1859 | #endif | 1859 | #endif |
1860 | 1860 |
fs/xfs/linux-2.6/xfs_buf.h
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #ifndef __XFS_BUF_H__ | 18 | #ifndef __XFS_BUF_H__ |
19 | #define __XFS_BUF_H__ | 19 | #define __XFS_BUF_H__ |
20 | 20 | ||
21 | #include <linux/list.h> | 21 | #include <linux/list.h> |
22 | #include <linux/types.h> | 22 | #include <linux/types.h> |
23 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
24 | #include <asm/system.h> | 24 | #include <asm/system.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/buffer_head.h> | 27 | #include <linux/buffer_head.h> |
28 | #include <linux/uio.h> | 28 | #include <linux/uio.h> |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * Base types | 31 | * Base types |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) | 34 | #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) |
35 | 35 | ||
36 | #define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) | 36 | #define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) |
37 | #define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) | 37 | #define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) |
38 | #define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) | 38 | #define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) |
39 | #define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) | 39 | #define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) |
40 | 40 | ||
41 | typedef enum { | 41 | typedef enum { |
42 | XBRW_READ = 1, /* transfer into target memory */ | 42 | XBRW_READ = 1, /* transfer into target memory */ |
43 | XBRW_WRITE = 2, /* transfer from target memory */ | 43 | XBRW_WRITE = 2, /* transfer from target memory */ |
44 | XBRW_ZERO = 3, /* Zero target memory */ | 44 | XBRW_ZERO = 3, /* Zero target memory */ |
45 | } xfs_buf_rw_t; | 45 | } xfs_buf_rw_t; |
46 | 46 | ||
47 | typedef enum { | 47 | typedef enum { |
48 | XBF_READ = (1 << 0), /* buffer intended for reading from device */ | 48 | XBF_READ = (1 << 0), /* buffer intended for reading from device */ |
49 | XBF_WRITE = (1 << 1), /* buffer intended for writing to device */ | 49 | XBF_WRITE = (1 << 1), /* buffer intended for writing to device */ |
50 | XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */ | 50 | XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */ |
51 | XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ | 51 | XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ |
52 | XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ | 52 | XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ |
53 | XBF_DELWRI = (1 << 6), /* buffer has dirty pages */ | 53 | XBF_DELWRI = (1 << 6), /* buffer has dirty pages */ |
54 | XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ | 54 | XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ |
55 | XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ | 55 | XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ |
56 | XBF_ORDERED = (1 << 11), /* use ordered writes */ | 56 | XBF_ORDERED = (1 << 11), /* use ordered writes */ |
57 | XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ | 57 | XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ |
58 | 58 | ||
59 | /* flags used only as arguments to access routines */ | 59 | /* flags used only as arguments to access routines */ |
60 | XBF_LOCK = (1 << 14), /* lock requested */ | 60 | XBF_LOCK = (1 << 14), /* lock requested */ |
61 | XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ | 61 | XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ |
62 | XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ | 62 | XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ |
63 | 63 | ||
64 | /* flags used only internally */ | 64 | /* flags used only internally */ |
65 | _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ | 65 | _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ |
66 | _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ | 66 | _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ |
67 | _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ | 67 | _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ |
68 | _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ | 68 | _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ |
69 | 69 | ||
70 | /* | 70 | /* |
71 | * Special flag for supporting metadata blocks smaller than a FSB. | 71 | * Special flag for supporting metadata blocks smaller than a FSB. |
72 | * | 72 | * |
73 | * In this case we can have multiple xfs_buf_t on a single page and | 73 | * In this case we can have multiple xfs_buf_t on a single page and |
74 | * need to lock out concurrent xfs_buf_t readers as they only | 74 | * need to lock out concurrent xfs_buf_t readers as they only |
75 | * serialise access to the buffer. | 75 | * serialise access to the buffer. |
76 | * | 76 | * |
77 | * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation | 77 | * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation |
78 | * between reads of the page. Hence we can have one thread read the | 78 | * between reads of the page. Hence we can have one thread read the |
79 | * page and modify it, but then race with another thread that thinks | 79 | * page and modify it, but then race with another thread that thinks |
80 | * the page is not up-to-date and hence reads it again. | 80 | * the page is not up-to-date and hence reads it again. |
81 | * | 81 | * |
82 | * The result is that the first modifcation to the page is lost. | 82 | * The result is that the first modifcation to the page is lost. |
83 | * This sort of AGF/AGI reading race can happen when unlinking inodes | 83 | * This sort of AGF/AGI reading race can happen when unlinking inodes |
84 | * that require truncation and results in the AGI unlinked list | 84 | * that require truncation and results in the AGI unlinked list |
85 | * modifications being lost. | 85 | * modifications being lost. |
86 | */ | 86 | */ |
87 | _XBF_PAGE_LOCKED = (1 << 22), | 87 | _XBF_PAGE_LOCKED = (1 << 22), |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * If we try a barrier write, but it fails we have to communicate | 90 | * If we try a barrier write, but it fails we have to communicate |
91 | * this to the upper layers. Unfortunately b_error gets overwritten | 91 | * this to the upper layers. Unfortunately b_error gets overwritten |
92 | * when the buffer is re-issued so we have to add another flag to | 92 | * when the buffer is re-issued so we have to add another flag to |
93 | * keep this information. | 93 | * keep this information. |
94 | */ | 94 | */ |
95 | _XFS_BARRIER_FAILED = (1 << 23), | 95 | _XFS_BARRIER_FAILED = (1 << 23), |
96 | } xfs_buf_flags_t; | 96 | } xfs_buf_flags_t; |
97 | 97 | ||
98 | typedef enum { | 98 | typedef enum { |
99 | XBT_FORCE_SLEEP = 0, | 99 | XBT_FORCE_SLEEP = 0, |
100 | XBT_FORCE_FLUSH = 1, | 100 | XBT_FORCE_FLUSH = 1, |
101 | } xfs_buftarg_flags_t; | 101 | } xfs_buftarg_flags_t; |
102 | 102 | ||
103 | typedef struct xfs_bufhash { | 103 | typedef struct xfs_bufhash { |
104 | struct list_head bh_list; | 104 | struct list_head bh_list; |
105 | spinlock_t bh_lock; | 105 | spinlock_t bh_lock; |
106 | } xfs_bufhash_t; | 106 | } xfs_bufhash_t; |
107 | 107 | ||
108 | typedef struct xfs_buftarg { | 108 | typedef struct xfs_buftarg { |
109 | dev_t bt_dev; | 109 | dev_t bt_dev; |
110 | struct block_device *bt_bdev; | 110 | struct block_device *bt_bdev; |
111 | struct address_space *bt_mapping; | 111 | struct address_space *bt_mapping; |
112 | unsigned int bt_bsize; | 112 | unsigned int bt_bsize; |
113 | unsigned int bt_sshift; | 113 | unsigned int bt_sshift; |
114 | size_t bt_smask; | 114 | size_t bt_smask; |
115 | 115 | ||
116 | /* per device buffer hash table */ | 116 | /* per device buffer hash table */ |
117 | uint bt_hashmask; | 117 | uint bt_hashmask; |
118 | uint bt_hashshift; | 118 | uint bt_hashshift; |
119 | xfs_bufhash_t *bt_hash; | 119 | xfs_bufhash_t *bt_hash; |
120 | 120 | ||
121 | /* per device delwri queue */ | 121 | /* per device delwri queue */ |
122 | struct task_struct *bt_task; | 122 | struct task_struct *bt_task; |
123 | struct list_head bt_list; | 123 | struct list_head bt_list; |
124 | struct list_head bt_delwrite_queue; | 124 | struct list_head bt_delwrite_queue; |
125 | spinlock_t bt_delwrite_lock; | 125 | spinlock_t bt_delwrite_lock; |
126 | unsigned long bt_flags; | 126 | unsigned long bt_flags; |
127 | } xfs_buftarg_t; | 127 | } xfs_buftarg_t; |
128 | 128 | ||
129 | /* | 129 | /* |
130 | * xfs_buf_t: Buffer structure for pagecache-based buffers | 130 | * xfs_buf_t: Buffer structure for pagecache-based buffers |
131 | * | 131 | * |
132 | * This buffer structure is used by the pagecache buffer management routines | 132 | * This buffer structure is used by the pagecache buffer management routines |
133 | * to refer to an assembly of pages forming a logical buffer. | 133 | * to refer to an assembly of pages forming a logical buffer. |
134 | * | 134 | * |
135 | * The buffer structure is used on a temporary basis only, and discarded when | 135 | * The buffer structure is used on a temporary basis only, and discarded when |
136 | * released. The real data storage is recorded in the pagecache. Buffers are | 136 | * released. The real data storage is recorded in the pagecache. Buffers are |
137 | * hashed to the block device on which the file system resides. | 137 | * hashed to the block device on which the file system resides. |
138 | */ | 138 | */ |
139 | 139 | ||
140 | struct xfs_buf; | 140 | struct xfs_buf; |
141 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); | 141 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); |
142 | typedef void (*xfs_buf_relse_t)(struct xfs_buf *); | 142 | typedef void (*xfs_buf_relse_t)(struct xfs_buf *); |
143 | typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); | 143 | typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); |
144 | 144 | ||
145 | #define XB_PAGES 2 | 145 | #define XB_PAGES 2 |
146 | 146 | ||
147 | typedef struct xfs_buf { | 147 | typedef struct xfs_buf { |
148 | struct semaphore b_sema; /* semaphore for lockables */ | 148 | struct semaphore b_sema; /* semaphore for lockables */ |
149 | unsigned long b_queuetime; /* time buffer was queued */ | 149 | unsigned long b_queuetime; /* time buffer was queued */ |
150 | atomic_t b_pin_count; /* pin count */ | 150 | atomic_t b_pin_count; /* pin count */ |
151 | wait_queue_head_t b_waiters; /* unpin waiters */ | 151 | wait_queue_head_t b_waiters; /* unpin waiters */ |
152 | struct list_head b_list; | 152 | struct list_head b_list; |
153 | xfs_buf_flags_t b_flags; /* status flags */ | 153 | xfs_buf_flags_t b_flags; /* status flags */ |
154 | struct list_head b_hash_list; /* hash table list */ | 154 | struct list_head b_hash_list; /* hash table list */ |
155 | xfs_bufhash_t *b_hash; /* hash table list start */ | 155 | xfs_bufhash_t *b_hash; /* hash table list start */ |
156 | xfs_buftarg_t *b_target; /* buffer target (device) */ | 156 | xfs_buftarg_t *b_target; /* buffer target (device) */ |
157 | atomic_t b_hold; /* reference count */ | 157 | atomic_t b_hold; /* reference count */ |
158 | xfs_daddr_t b_bn; /* block number for I/O */ | 158 | xfs_daddr_t b_bn; /* block number for I/O */ |
159 | xfs_off_t b_file_offset; /* offset in file */ | 159 | xfs_off_t b_file_offset; /* offset in file */ |
160 | size_t b_buffer_length;/* size of buffer in bytes */ | 160 | size_t b_buffer_length;/* size of buffer in bytes */ |
161 | size_t b_count_desired;/* desired transfer size */ | 161 | size_t b_count_desired;/* desired transfer size */ |
162 | void *b_addr; /* virtual address of buffer */ | 162 | void *b_addr; /* virtual address of buffer */ |
163 | struct work_struct b_iodone_work; | 163 | struct work_struct b_iodone_work; |
164 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | 164 | atomic_t b_io_remaining; /* #outstanding I/O requests */ |
165 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 165 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
166 | xfs_buf_relse_t b_relse; /* releasing function */ | 166 | xfs_buf_relse_t b_relse; /* releasing function */ |
167 | xfs_buf_bdstrat_t b_strat; /* pre-write function */ | 167 | xfs_buf_bdstrat_t b_strat; /* pre-write function */ |
168 | struct completion b_iowait; /* queue for I/O waiters */ | 168 | struct completion b_iowait; /* queue for I/O waiters */ |
169 | void *b_fspriv; | 169 | void *b_fspriv; |
170 | void *b_fspriv2; | 170 | void *b_fspriv2; |
171 | void *b_fspriv3; | 171 | struct xfs_mount *b_mount; |
172 | unsigned short b_error; /* error code on I/O */ | 172 | unsigned short b_error; /* error code on I/O */ |
173 | unsigned int b_page_count; /* size of page array */ | 173 | unsigned int b_page_count; /* size of page array */ |
174 | unsigned int b_offset; /* page offset in first page */ | 174 | unsigned int b_offset; /* page offset in first page */ |
175 | struct page **b_pages; /* array of page pointers */ | 175 | struct page **b_pages; /* array of page pointers */ |
176 | struct page *b_page_array[XB_PAGES]; /* inline pages */ | 176 | struct page *b_page_array[XB_PAGES]; /* inline pages */ |
177 | #ifdef XFS_BUF_LOCK_TRACKING | 177 | #ifdef XFS_BUF_LOCK_TRACKING |
178 | int b_last_holder; | 178 | int b_last_holder; |
179 | #endif | 179 | #endif |
180 | } xfs_buf_t; | 180 | } xfs_buf_t; |
181 | 181 | ||
182 | 182 | ||
183 | /* Finding and Reading Buffers */ | 183 | /* Finding and Reading Buffers */ |
184 | extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, | 184 | extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, |
185 | xfs_buf_flags_t, xfs_buf_t *); | 185 | xfs_buf_flags_t, xfs_buf_t *); |
186 | #define xfs_incore(buftarg,blkno,len,lockit) \ | 186 | #define xfs_incore(buftarg,blkno,len,lockit) \ |
187 | _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) | 187 | _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) |
188 | 188 | ||
189 | extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t, | 189 | extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t, |
190 | xfs_buf_flags_t); | 190 | xfs_buf_flags_t); |
191 | #define xfs_buf_get(target, blkno, len, flags) \ | 191 | #define xfs_buf_get(target, blkno, len, flags) \ |
192 | xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) | 192 | xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) |
193 | 193 | ||
194 | extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t, | 194 | extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t, |
195 | xfs_buf_flags_t); | 195 | xfs_buf_flags_t); |
196 | #define xfs_buf_read(target, blkno, len, flags) \ | 196 | #define xfs_buf_read(target, blkno, len, flags) \ |
197 | xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) | 197 | xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) |
198 | 198 | ||
199 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); | 199 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); |
200 | extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); | 200 | extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); |
201 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); | 201 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); |
202 | extern void xfs_buf_hold(xfs_buf_t *); | 202 | extern void xfs_buf_hold(xfs_buf_t *); |
203 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, | 203 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, |
204 | xfs_buf_flags_t); | 204 | xfs_buf_flags_t); |
205 | 205 | ||
206 | /* Releasing Buffers */ | 206 | /* Releasing Buffers */ |
207 | extern void xfs_buf_free(xfs_buf_t *); | 207 | extern void xfs_buf_free(xfs_buf_t *); |
208 | extern void xfs_buf_rele(xfs_buf_t *); | 208 | extern void xfs_buf_rele(xfs_buf_t *); |
209 | 209 | ||
210 | /* Locking and Unlocking Buffers */ | 210 | /* Locking and Unlocking Buffers */ |
211 | extern int xfs_buf_cond_lock(xfs_buf_t *); | 211 | extern int xfs_buf_cond_lock(xfs_buf_t *); |
212 | extern int xfs_buf_lock_value(xfs_buf_t *); | 212 | extern int xfs_buf_lock_value(xfs_buf_t *); |
213 | extern void xfs_buf_lock(xfs_buf_t *); | 213 | extern void xfs_buf_lock(xfs_buf_t *); |
214 | extern void xfs_buf_unlock(xfs_buf_t *); | 214 | extern void xfs_buf_unlock(xfs_buf_t *); |
215 | 215 | ||
216 | /* Buffer Read and Write Routines */ | 216 | /* Buffer Read and Write Routines */ |
217 | extern int xfs_bawrite(void *mp, xfs_buf_t *bp); | 217 | extern int xfs_bawrite(void *mp, xfs_buf_t *bp); |
218 | extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); | 218 | extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); |
219 | extern void xfs_buf_ioend(xfs_buf_t *, int); | 219 | extern void xfs_buf_ioend(xfs_buf_t *, int); |
220 | extern void xfs_buf_ioerror(xfs_buf_t *, int); | 220 | extern void xfs_buf_ioerror(xfs_buf_t *, int); |
221 | extern int xfs_buf_iorequest(xfs_buf_t *); | 221 | extern int xfs_buf_iorequest(xfs_buf_t *); |
222 | extern int xfs_buf_iowait(xfs_buf_t *); | 222 | extern int xfs_buf_iowait(xfs_buf_t *); |
223 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, | 223 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, |
224 | xfs_buf_rw_t); | 224 | xfs_buf_rw_t); |
225 | 225 | ||
226 | static inline int xfs_buf_iostrategy(xfs_buf_t *bp) | 226 | static inline int xfs_buf_iostrategy(xfs_buf_t *bp) |
227 | { | 227 | { |
228 | return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp); | 228 | return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp); |
229 | } | 229 | } |
230 | 230 | ||
231 | static inline int xfs_buf_geterror(xfs_buf_t *bp) | 231 | static inline int xfs_buf_geterror(xfs_buf_t *bp) |
232 | { | 232 | { |
233 | return bp ? bp->b_error : ENOMEM; | 233 | return bp ? bp->b_error : ENOMEM; |
234 | } | 234 | } |
235 | 235 | ||
236 | /* Buffer Utility Routines */ | 236 | /* Buffer Utility Routines */ |
237 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); | 237 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); |
238 | 238 | ||
239 | /* Pinning Buffer Storage in Memory */ | 239 | /* Pinning Buffer Storage in Memory */ |
240 | extern void xfs_buf_pin(xfs_buf_t *); | 240 | extern void xfs_buf_pin(xfs_buf_t *); |
241 | extern void xfs_buf_unpin(xfs_buf_t *); | 241 | extern void xfs_buf_unpin(xfs_buf_t *); |
242 | extern int xfs_buf_ispin(xfs_buf_t *); | 242 | extern int xfs_buf_ispin(xfs_buf_t *); |
243 | 243 | ||
244 | /* Delayed Write Buffer Routines */ | 244 | /* Delayed Write Buffer Routines */ |
245 | extern void xfs_buf_delwri_dequeue(xfs_buf_t *); | 245 | extern void xfs_buf_delwri_dequeue(xfs_buf_t *); |
246 | 246 | ||
247 | /* Buffer Daemon Setup Routines */ | 247 | /* Buffer Daemon Setup Routines */ |
248 | extern int xfs_buf_init(void); | 248 | extern int xfs_buf_init(void); |
249 | extern void xfs_buf_terminate(void); | 249 | extern void xfs_buf_terminate(void); |
250 | 250 | ||
251 | #ifdef XFS_BUF_TRACE | 251 | #ifdef XFS_BUF_TRACE |
252 | extern ktrace_t *xfs_buf_trace_buf; | 252 | extern ktrace_t *xfs_buf_trace_buf; |
253 | extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); | 253 | extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); |
254 | #else | 254 | #else |
255 | #define xfs_buf_trace(bp,id,ptr,ra) do { } while (0) | 255 | #define xfs_buf_trace(bp,id,ptr,ra) do { } while (0) |
256 | #endif | 256 | #endif |
257 | 257 | ||
258 | #define xfs_buf_target_name(target) \ | 258 | #define xfs_buf_target_name(target) \ |
259 | ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) | 259 | ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) |
260 | 260 | ||
261 | 261 | ||
262 | #define XFS_B_ASYNC XBF_ASYNC | 262 | #define XFS_B_ASYNC XBF_ASYNC |
263 | #define XFS_B_DELWRI XBF_DELWRI | 263 | #define XFS_B_DELWRI XBF_DELWRI |
264 | #define XFS_B_READ XBF_READ | 264 | #define XFS_B_READ XBF_READ |
265 | #define XFS_B_WRITE XBF_WRITE | 265 | #define XFS_B_WRITE XBF_WRITE |
266 | #define XFS_B_STALE XBF_STALE | 266 | #define XFS_B_STALE XBF_STALE |
267 | 267 | ||
268 | #define XFS_BUF_TRYLOCK XBF_TRYLOCK | 268 | #define XFS_BUF_TRYLOCK XBF_TRYLOCK |
269 | #define XFS_INCORE_TRYLOCK XBF_TRYLOCK | 269 | #define XFS_INCORE_TRYLOCK XBF_TRYLOCK |
270 | #define XFS_BUF_LOCK XBF_LOCK | 270 | #define XFS_BUF_LOCK XBF_LOCK |
271 | #define XFS_BUF_MAPPED XBF_MAPPED | 271 | #define XFS_BUF_MAPPED XBF_MAPPED |
272 | 272 | ||
273 | #define BUF_BUSY XBF_DONT_BLOCK | 273 | #define BUF_BUSY XBF_DONT_BLOCK |
274 | 274 | ||
275 | #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) | 275 | #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) |
276 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ | 276 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ |
277 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) | 277 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) |
278 | 278 | ||
279 | #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) | 279 | #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) |
280 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) | 280 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) |
281 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) | 281 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) |
282 | #define XFS_BUF_SUPER_STALE(bp) do { \ | 282 | #define XFS_BUF_SUPER_STALE(bp) do { \ |
283 | XFS_BUF_STALE(bp); \ | 283 | XFS_BUF_STALE(bp); \ |
284 | xfs_buf_delwri_dequeue(bp); \ | 284 | xfs_buf_delwri_dequeue(bp); \ |
285 | XFS_BUF_DONE(bp); \ | 285 | XFS_BUF_DONE(bp); \ |
286 | } while (0) | 286 | } while (0) |
287 | 287 | ||
288 | #define XFS_BUF_MANAGE XBF_FS_MANAGED | 288 | #define XFS_BUF_MANAGE XBF_FS_MANAGED |
289 | #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) | 289 | #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) |
290 | 290 | ||
291 | #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) | 291 | #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) |
292 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) | 292 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) |
293 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) | 293 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) |
294 | 294 | ||
295 | #define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no) | 295 | #define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no) |
296 | #define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp) | 296 | #define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp) |
297 | #define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0) | 297 | #define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0) |
298 | 298 | ||
299 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) | 299 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) |
300 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) | 300 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) |
301 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) | 301 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) |
302 | 302 | ||
303 | #define XFS_BUF_BUSY(bp) do { } while (0) | 303 | #define XFS_BUF_BUSY(bp) do { } while (0) |
304 | #define XFS_BUF_UNBUSY(bp) do { } while (0) | 304 | #define XFS_BUF_UNBUSY(bp) do { } while (0) |
305 | #define XFS_BUF_ISBUSY(bp) (1) | 305 | #define XFS_BUF_ISBUSY(bp) (1) |
306 | 306 | ||
307 | #define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) | 307 | #define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) |
308 | #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) | 308 | #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) |
309 | #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) | 309 | #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) |
310 | 310 | ||
311 | #define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED) | 311 | #define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED) |
312 | #define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) | 312 | #define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) |
313 | #define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) | 313 | #define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) |
314 | 314 | ||
315 | #define XFS_BUF_SHUT(bp) do { } while (0) | 315 | #define XFS_BUF_SHUT(bp) do { } while (0) |
316 | #define XFS_BUF_UNSHUT(bp) do { } while (0) | 316 | #define XFS_BUF_UNSHUT(bp) do { } while (0) |
317 | #define XFS_BUF_ISSHUT(bp) (0) | 317 | #define XFS_BUF_ISSHUT(bp) (0) |
318 | 318 | ||
319 | #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) | 319 | #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) |
320 | #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) | 320 | #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) |
321 | #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) | 321 | #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) |
322 | #define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) | 322 | #define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) |
323 | 323 | ||
324 | #define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE) | 324 | #define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE) |
325 | #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) | 325 | #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) |
326 | #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) | 326 | #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) |
327 | 327 | ||
328 | #define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) | 328 | #define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) |
329 | #define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) | 329 | #define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) |
330 | #define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) | 330 | #define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) |
331 | #define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func)) | 331 | #define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func)) |
332 | #define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL) | 332 | #define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL) |
333 | 333 | ||
334 | #define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) | 334 | #define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) |
335 | #define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) | 335 | #define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) |
336 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) | 336 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) |
337 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) | 337 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) |
338 | #define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3) | ||
339 | #define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val)) | ||
340 | #define XFS_BUF_SET_START(bp) do { } while (0) | 338 | #define XFS_BUF_SET_START(bp) do { } while (0) |
341 | #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) | 339 | #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) |
342 | 340 | ||
343 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) | 341 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) |
344 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) | 342 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) |
345 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) | 343 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) |
346 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) | 344 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) |
347 | #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) | 345 | #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) |
348 | #define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) | 346 | #define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) |
349 | #define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) | 347 | #define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) |
350 | #define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) | 348 | #define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) |
351 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) | 349 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) |
352 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) | 350 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) |
353 | 351 | ||
354 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) | 352 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) |
355 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) | 353 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) |
356 | #define XFS_BUF_SET_REF(bp, ref) do { } while (0) | 354 | #define XFS_BUF_SET_REF(bp, ref) do { } while (0) |
357 | 355 | ||
358 | #define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp) | 356 | #define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp) |
359 | 357 | ||
360 | #define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) | 358 | #define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) |
361 | #define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) | 359 | #define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) |
362 | #define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) | 360 | #define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) |
363 | #define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) | 361 | #define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) |
364 | #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); | 362 | #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); |
365 | 363 | ||
366 | #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) | 364 | #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) |
367 | #define XFS_BUF_TARGET(bp) ((bp)->b_target) | 365 | #define XFS_BUF_TARGET(bp) ((bp)->b_target) |
368 | #define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) | 366 | #define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) |
369 | 367 | ||
370 | static inline void xfs_buf_relse(xfs_buf_t *bp) | 368 | static inline void xfs_buf_relse(xfs_buf_t *bp) |
371 | { | 369 | { |
372 | if (!bp->b_relse) | 370 | if (!bp->b_relse) |
373 | xfs_buf_unlock(bp); | 371 | xfs_buf_unlock(bp); |
374 | xfs_buf_rele(bp); | 372 | xfs_buf_rele(bp); |
375 | } | 373 | } |
376 | 374 | ||
377 | #define xfs_bpin(bp) xfs_buf_pin(bp) | 375 | #define xfs_bpin(bp) xfs_buf_pin(bp) |
378 | #define xfs_bunpin(bp) xfs_buf_unpin(bp) | 376 | #define xfs_bunpin(bp) xfs_buf_unpin(bp) |
379 | 377 | ||
380 | #define xfs_buftrace(id, bp) \ | 378 | #define xfs_buftrace(id, bp) \ |
381 | xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) | 379 | xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) |
382 | 380 | ||
383 | #define xfs_biodone(bp) xfs_buf_ioend(bp, 0) | 381 | #define xfs_biodone(bp) xfs_buf_ioend(bp, 0) |
384 | 382 | ||
385 | #define xfs_biomove(bp, off, len, data, rw) \ | 383 | #define xfs_biomove(bp, off, len, data, rw) \ |
386 | xfs_buf_iomove((bp), (off), (len), (data), \ | 384 | xfs_buf_iomove((bp), (off), (len), (data), \ |
387 | ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) | 385 | ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) |
388 | 386 | ||
389 | #define xfs_biozero(bp, off, len) \ | 387 | #define xfs_biozero(bp, off, len) \ |
390 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | 388 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) |
391 | 389 | ||
392 | 390 | ||
393 | static inline int XFS_bwrite(xfs_buf_t *bp) | 391 | static inline int XFS_bwrite(xfs_buf_t *bp) |
394 | { | 392 | { |
395 | int iowait = (bp->b_flags & XBF_ASYNC) == 0; | 393 | int iowait = (bp->b_flags & XBF_ASYNC) == 0; |
396 | int error = 0; | 394 | int error = 0; |
397 | 395 | ||
398 | if (!iowait) | 396 | if (!iowait) |
399 | bp->b_flags |= _XBF_RUN_QUEUES; | 397 | bp->b_flags |= _XBF_RUN_QUEUES; |
400 | 398 | ||
401 | xfs_buf_delwri_dequeue(bp); | 399 | xfs_buf_delwri_dequeue(bp); |
402 | xfs_buf_iostrategy(bp); | 400 | xfs_buf_iostrategy(bp); |
403 | if (iowait) { | 401 | if (iowait) { |
404 | error = xfs_buf_iowait(bp); | 402 | error = xfs_buf_iowait(bp); |
405 | xfs_buf_relse(bp); | 403 | xfs_buf_relse(bp); |
406 | } | 404 | } |
407 | return error; | 405 | return error; |
408 | } | 406 | } |
409 | 407 | ||
410 | #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) | 408 | #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) |
411 | 409 | ||
412 | #define xfs_iowait(bp) xfs_buf_iowait(bp) | 410 | #define xfs_iowait(bp) xfs_buf_iowait(bp) |
413 | 411 | ||
414 | #define xfs_baread(target, rablkno, ralen) \ | 412 | #define xfs_baread(target, rablkno, ralen) \ |
415 | xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK) | 413 | xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK) |
416 | 414 | ||
417 | 415 | ||
418 | /* | 416 | /* |
419 | * Handling of buftargs. | 417 | * Handling of buftargs. |
420 | */ | 418 | */ |
421 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); | 419 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); |
422 | extern void xfs_free_buftarg(xfs_buftarg_t *); | 420 | extern void xfs_free_buftarg(xfs_buftarg_t *); |
423 | extern void xfs_wait_buftarg(xfs_buftarg_t *); | 421 | extern void xfs_wait_buftarg(xfs_buftarg_t *); |
424 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | 422 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); |
425 | extern int xfs_flush_buftarg(xfs_buftarg_t *, int); | 423 | extern int xfs_flush_buftarg(xfs_buftarg_t *, int); |
426 | #ifdef CONFIG_KDB_MODULES | 424 | #ifdef CONFIG_KDB_MODULES |
427 | extern struct list_head *xfs_get_buftarg_list(void); | 425 | extern struct list_head *xfs_get_buftarg_list(void); |
428 | #endif | 426 | #endif |
429 | 427 | ||
430 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) | 428 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) |
431 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) | 429 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) |
432 | 430 | ||
433 | #define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) | 431 | #define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) |
434 | #define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) | 432 | #define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) |
435 | 433 | ||
436 | #endif /* __XFS_BUF_H__ */ | 434 | #endif /* __XFS_BUF_H__ */ |
437 | 435 |
fs/xfs/linux-2.6/xfs_lrw.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | 20 | #include "xfs_bit.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
26 | #include "xfs_dir2.h" | 26 | #include "xfs_dir2.h" |
27 | #include "xfs_alloc.h" | 27 | #include "xfs_alloc.h" |
28 | #include "xfs_dmapi.h" | 28 | #include "xfs_dmapi.h" |
29 | #include "xfs_quota.h" | 29 | #include "xfs_quota.h" |
30 | #include "xfs_mount.h" | 30 | #include "xfs_mount.h" |
31 | #include "xfs_bmap_btree.h" | 31 | #include "xfs_bmap_btree.h" |
32 | #include "xfs_alloc_btree.h" | 32 | #include "xfs_alloc_btree.h" |
33 | #include "xfs_ialloc_btree.h" | 33 | #include "xfs_ialloc_btree.h" |
34 | #include "xfs_dir2_sf.h" | 34 | #include "xfs_dir2_sf.h" |
35 | #include "xfs_attr_sf.h" | 35 | #include "xfs_attr_sf.h" |
36 | #include "xfs_dinode.h" | 36 | #include "xfs_dinode.h" |
37 | #include "xfs_inode.h" | 37 | #include "xfs_inode.h" |
38 | #include "xfs_bmap.h" | 38 | #include "xfs_bmap.h" |
39 | #include "xfs_btree.h" | 39 | #include "xfs_btree.h" |
40 | #include "xfs_ialloc.h" | 40 | #include "xfs_ialloc.h" |
41 | #include "xfs_rtalloc.h" | 41 | #include "xfs_rtalloc.h" |
42 | #include "xfs_error.h" | 42 | #include "xfs_error.h" |
43 | #include "xfs_itable.h" | 43 | #include "xfs_itable.h" |
44 | #include "xfs_rw.h" | 44 | #include "xfs_rw.h" |
45 | #include "xfs_acl.h" | 45 | #include "xfs_acl.h" |
46 | #include "xfs_attr.h" | 46 | #include "xfs_attr.h" |
47 | #include "xfs_inode_item.h" | 47 | #include "xfs_inode_item.h" |
48 | #include "xfs_buf_item.h" | 48 | #include "xfs_buf_item.h" |
49 | #include "xfs_utils.h" | 49 | #include "xfs_utils.h" |
50 | #include "xfs_iomap.h" | 50 | #include "xfs_iomap.h" |
51 | #include "xfs_vnodeops.h" | 51 | #include "xfs_vnodeops.h" |
52 | 52 | ||
53 | #include <linux/capability.h> | 53 | #include <linux/capability.h> |
54 | #include <linux/writeback.h> | 54 | #include <linux/writeback.h> |
55 | 55 | ||
56 | 56 | ||
57 | #if defined(XFS_RW_TRACE) | 57 | #if defined(XFS_RW_TRACE) |
58 | void | 58 | void |
59 | xfs_rw_enter_trace( | 59 | xfs_rw_enter_trace( |
60 | int tag, | 60 | int tag, |
61 | xfs_inode_t *ip, | 61 | xfs_inode_t *ip, |
62 | void *data, | 62 | void *data, |
63 | size_t segs, | 63 | size_t segs, |
64 | loff_t offset, | 64 | loff_t offset, |
65 | int ioflags) | 65 | int ioflags) |
66 | { | 66 | { |
67 | if (ip->i_rwtrace == NULL) | 67 | if (ip->i_rwtrace == NULL) |
68 | return; | 68 | return; |
69 | ktrace_enter(ip->i_rwtrace, | 69 | ktrace_enter(ip->i_rwtrace, |
70 | (void *)(unsigned long)tag, | 70 | (void *)(unsigned long)tag, |
71 | (void *)ip, | 71 | (void *)ip, |
72 | (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), | 72 | (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), |
73 | (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), | 73 | (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), |
74 | (void *)data, | 74 | (void *)data, |
75 | (void *)((unsigned long)segs), | 75 | (void *)((unsigned long)segs), |
76 | (void *)((unsigned long)((offset >> 32) & 0xffffffff)), | 76 | (void *)((unsigned long)((offset >> 32) & 0xffffffff)), |
77 | (void *)((unsigned long)(offset & 0xffffffff)), | 77 | (void *)((unsigned long)(offset & 0xffffffff)), |
78 | (void *)((unsigned long)ioflags), | 78 | (void *)((unsigned long)ioflags), |
79 | (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), | 79 | (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), |
80 | (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), | 80 | (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), |
81 | (void *)((unsigned long)current_pid()), | 81 | (void *)((unsigned long)current_pid()), |
82 | (void *)NULL, | 82 | (void *)NULL, |
83 | (void *)NULL, | 83 | (void *)NULL, |
84 | (void *)NULL, | 84 | (void *)NULL, |
85 | (void *)NULL); | 85 | (void *)NULL); |
86 | } | 86 | } |
87 | 87 | ||
88 | void | 88 | void |
89 | xfs_inval_cached_trace( | 89 | xfs_inval_cached_trace( |
90 | xfs_inode_t *ip, | 90 | xfs_inode_t *ip, |
91 | xfs_off_t offset, | 91 | xfs_off_t offset, |
92 | xfs_off_t len, | 92 | xfs_off_t len, |
93 | xfs_off_t first, | 93 | xfs_off_t first, |
94 | xfs_off_t last) | 94 | xfs_off_t last) |
95 | { | 95 | { |
96 | 96 | ||
97 | if (ip->i_rwtrace == NULL) | 97 | if (ip->i_rwtrace == NULL) |
98 | return; | 98 | return; |
99 | ktrace_enter(ip->i_rwtrace, | 99 | ktrace_enter(ip->i_rwtrace, |
100 | (void *)(__psint_t)XFS_INVAL_CACHED, | 100 | (void *)(__psint_t)XFS_INVAL_CACHED, |
101 | (void *)ip, | 101 | (void *)ip, |
102 | (void *)((unsigned long)((offset >> 32) & 0xffffffff)), | 102 | (void *)((unsigned long)((offset >> 32) & 0xffffffff)), |
103 | (void *)((unsigned long)(offset & 0xffffffff)), | 103 | (void *)((unsigned long)(offset & 0xffffffff)), |
104 | (void *)((unsigned long)((len >> 32) & 0xffffffff)), | 104 | (void *)((unsigned long)((len >> 32) & 0xffffffff)), |
105 | (void *)((unsigned long)(len & 0xffffffff)), | 105 | (void *)((unsigned long)(len & 0xffffffff)), |
106 | (void *)((unsigned long)((first >> 32) & 0xffffffff)), | 106 | (void *)((unsigned long)((first >> 32) & 0xffffffff)), |
107 | (void *)((unsigned long)(first & 0xffffffff)), | 107 | (void *)((unsigned long)(first & 0xffffffff)), |
108 | (void *)((unsigned long)((last >> 32) & 0xffffffff)), | 108 | (void *)((unsigned long)((last >> 32) & 0xffffffff)), |
109 | (void *)((unsigned long)(last & 0xffffffff)), | 109 | (void *)((unsigned long)(last & 0xffffffff)), |
110 | (void *)((unsigned long)current_pid()), | 110 | (void *)((unsigned long)current_pid()), |
111 | (void *)NULL, | 111 | (void *)NULL, |
112 | (void *)NULL, | 112 | (void *)NULL, |
113 | (void *)NULL, | 113 | (void *)NULL, |
114 | (void *)NULL, | 114 | (void *)NULL, |
115 | (void *)NULL); | 115 | (void *)NULL); |
116 | } | 116 | } |
117 | #endif | 117 | #endif |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * xfs_iozero | 120 | * xfs_iozero |
121 | * | 121 | * |
122 | * xfs_iozero clears the specified range of buffer supplied, | 122 | * xfs_iozero clears the specified range of buffer supplied, |
123 | * and marks all the affected blocks as valid and modified. If | 123 | * and marks all the affected blocks as valid and modified. If |
124 | * an affected block is not allocated, it will be allocated. If | 124 | * an affected block is not allocated, it will be allocated. If |
125 | * an affected block is not completely overwritten, and is not | 125 | * an affected block is not completely overwritten, and is not |
126 | * valid before the operation, it will be read from disk before | 126 | * valid before the operation, it will be read from disk before |
127 | * being partially zeroed. | 127 | * being partially zeroed. |
128 | */ | 128 | */ |
129 | STATIC int | 129 | STATIC int |
130 | xfs_iozero( | 130 | xfs_iozero( |
131 | struct xfs_inode *ip, /* inode */ | 131 | struct xfs_inode *ip, /* inode */ |
132 | loff_t pos, /* offset in file */ | 132 | loff_t pos, /* offset in file */ |
133 | size_t count) /* size of data to zero */ | 133 | size_t count) /* size of data to zero */ |
134 | { | 134 | { |
135 | struct page *page; | 135 | struct page *page; |
136 | struct address_space *mapping; | 136 | struct address_space *mapping; |
137 | int status; | 137 | int status; |
138 | 138 | ||
139 | mapping = VFS_I(ip)->i_mapping; | 139 | mapping = VFS_I(ip)->i_mapping; |
140 | do { | 140 | do { |
141 | unsigned offset, bytes; | 141 | unsigned offset, bytes; |
142 | void *fsdata; | 142 | void *fsdata; |
143 | 143 | ||
144 | offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ | 144 | offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ |
145 | bytes = PAGE_CACHE_SIZE - offset; | 145 | bytes = PAGE_CACHE_SIZE - offset; |
146 | if (bytes > count) | 146 | if (bytes > count) |
147 | bytes = count; | 147 | bytes = count; |
148 | 148 | ||
149 | status = pagecache_write_begin(NULL, mapping, pos, bytes, | 149 | status = pagecache_write_begin(NULL, mapping, pos, bytes, |
150 | AOP_FLAG_UNINTERRUPTIBLE, | 150 | AOP_FLAG_UNINTERRUPTIBLE, |
151 | &page, &fsdata); | 151 | &page, &fsdata); |
152 | if (status) | 152 | if (status) |
153 | break; | 153 | break; |
154 | 154 | ||
155 | zero_user(page, offset, bytes); | 155 | zero_user(page, offset, bytes); |
156 | 156 | ||
157 | status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, | 157 | status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, |
158 | page, fsdata); | 158 | page, fsdata); |
159 | WARN_ON(status <= 0); /* can't return less than zero! */ | 159 | WARN_ON(status <= 0); /* can't return less than zero! */ |
160 | pos += bytes; | 160 | pos += bytes; |
161 | count -= bytes; | 161 | count -= bytes; |
162 | status = 0; | 162 | status = 0; |
163 | } while (count); | 163 | } while (count); |
164 | 164 | ||
165 | return (-status); | 165 | return (-status); |
166 | } | 166 | } |
167 | 167 | ||
168 | ssize_t /* bytes read, or (-) error */ | 168 | ssize_t /* bytes read, or (-) error */ |
169 | xfs_read( | 169 | xfs_read( |
170 | xfs_inode_t *ip, | 170 | xfs_inode_t *ip, |
171 | struct kiocb *iocb, | 171 | struct kiocb *iocb, |
172 | const struct iovec *iovp, | 172 | const struct iovec *iovp, |
173 | unsigned int segs, | 173 | unsigned int segs, |
174 | loff_t *offset, | 174 | loff_t *offset, |
175 | int ioflags) | 175 | int ioflags) |
176 | { | 176 | { |
177 | struct file *file = iocb->ki_filp; | 177 | struct file *file = iocb->ki_filp; |
178 | struct inode *inode = file->f_mapping->host; | 178 | struct inode *inode = file->f_mapping->host; |
179 | xfs_mount_t *mp = ip->i_mount; | 179 | xfs_mount_t *mp = ip->i_mount; |
180 | size_t size = 0; | 180 | size_t size = 0; |
181 | ssize_t ret = 0; | 181 | ssize_t ret = 0; |
182 | xfs_fsize_t n; | 182 | xfs_fsize_t n; |
183 | unsigned long seg; | 183 | unsigned long seg; |
184 | 184 | ||
185 | 185 | ||
186 | XFS_STATS_INC(xs_read_calls); | 186 | XFS_STATS_INC(xs_read_calls); |
187 | 187 | ||
188 | /* START copy & waste from filemap.c */ | 188 | /* START copy & waste from filemap.c */ |
189 | for (seg = 0; seg < segs; seg++) { | 189 | for (seg = 0; seg < segs; seg++) { |
190 | const struct iovec *iv = &iovp[seg]; | 190 | const struct iovec *iv = &iovp[seg]; |
191 | 191 | ||
192 | /* | 192 | /* |
193 | * If any segment has a negative length, or the cumulative | 193 | * If any segment has a negative length, or the cumulative |
194 | * length ever wraps negative then return -EINVAL. | 194 | * length ever wraps negative then return -EINVAL. |
195 | */ | 195 | */ |
196 | size += iv->iov_len; | 196 | size += iv->iov_len; |
197 | if (unlikely((ssize_t)(size|iv->iov_len) < 0)) | 197 | if (unlikely((ssize_t)(size|iv->iov_len) < 0)) |
198 | return XFS_ERROR(-EINVAL); | 198 | return XFS_ERROR(-EINVAL); |
199 | } | 199 | } |
200 | /* END copy & waste from filemap.c */ | 200 | /* END copy & waste from filemap.c */ |
201 | 201 | ||
202 | if (unlikely(ioflags & IO_ISDIRECT)) { | 202 | if (unlikely(ioflags & IO_ISDIRECT)) { |
203 | xfs_buftarg_t *target = | 203 | xfs_buftarg_t *target = |
204 | XFS_IS_REALTIME_INODE(ip) ? | 204 | XFS_IS_REALTIME_INODE(ip) ? |
205 | mp->m_rtdev_targp : mp->m_ddev_targp; | 205 | mp->m_rtdev_targp : mp->m_ddev_targp; |
206 | if ((*offset & target->bt_smask) || | 206 | if ((*offset & target->bt_smask) || |
207 | (size & target->bt_smask)) { | 207 | (size & target->bt_smask)) { |
208 | if (*offset == ip->i_size) { | 208 | if (*offset == ip->i_size) { |
209 | return (0); | 209 | return (0); |
210 | } | 210 | } |
211 | return -XFS_ERROR(EINVAL); | 211 | return -XFS_ERROR(EINVAL); |
212 | } | 212 | } |
213 | } | 213 | } |
214 | 214 | ||
215 | n = XFS_MAXIOFFSET(mp) - *offset; | 215 | n = XFS_MAXIOFFSET(mp) - *offset; |
216 | if ((n <= 0) || (size == 0)) | 216 | if ((n <= 0) || (size == 0)) |
217 | return 0; | 217 | return 0; |
218 | 218 | ||
219 | if (n < size) | 219 | if (n < size) |
220 | size = n; | 220 | size = n; |
221 | 221 | ||
222 | if (XFS_FORCED_SHUTDOWN(mp)) | 222 | if (XFS_FORCED_SHUTDOWN(mp)) |
223 | return -EIO; | 223 | return -EIO; |
224 | 224 | ||
225 | if (unlikely(ioflags & IO_ISDIRECT)) | 225 | if (unlikely(ioflags & IO_ISDIRECT)) |
226 | mutex_lock(&inode->i_mutex); | 226 | mutex_lock(&inode->i_mutex); |
227 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 227 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
228 | 228 | ||
229 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { | 229 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { |
230 | int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); | 230 | int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); |
231 | int iolock = XFS_IOLOCK_SHARED; | 231 | int iolock = XFS_IOLOCK_SHARED; |
232 | 232 | ||
233 | ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size, | 233 | ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size, |
234 | dmflags, &iolock); | 234 | dmflags, &iolock); |
235 | if (ret) { | 235 | if (ret) { |
236 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 236 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
237 | if (unlikely(ioflags & IO_ISDIRECT)) | 237 | if (unlikely(ioflags & IO_ISDIRECT)) |
238 | mutex_unlock(&inode->i_mutex); | 238 | mutex_unlock(&inode->i_mutex); |
239 | return ret; | 239 | return ret; |
240 | } | 240 | } |
241 | } | 241 | } |
242 | 242 | ||
243 | if (unlikely(ioflags & IO_ISDIRECT)) { | 243 | if (unlikely(ioflags & IO_ISDIRECT)) { |
244 | if (inode->i_mapping->nrpages) | 244 | if (inode->i_mapping->nrpages) |
245 | ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), | 245 | ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), |
246 | -1, FI_REMAPF_LOCKED); | 246 | -1, FI_REMAPF_LOCKED); |
247 | mutex_unlock(&inode->i_mutex); | 247 | mutex_unlock(&inode->i_mutex); |
248 | if (ret) { | 248 | if (ret) { |
249 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 249 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
250 | return ret; | 250 | return ret; |
251 | } | 251 | } |
252 | } | 252 | } |
253 | 253 | ||
254 | xfs_rw_enter_trace(XFS_READ_ENTER, ip, | 254 | xfs_rw_enter_trace(XFS_READ_ENTER, ip, |
255 | (void *)iovp, segs, *offset, ioflags); | 255 | (void *)iovp, segs, *offset, ioflags); |
256 | 256 | ||
257 | iocb->ki_pos = *offset; | 257 | iocb->ki_pos = *offset; |
258 | ret = generic_file_aio_read(iocb, iovp, segs, *offset); | 258 | ret = generic_file_aio_read(iocb, iovp, segs, *offset); |
259 | if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) | 259 | if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) |
260 | ret = wait_on_sync_kiocb(iocb); | 260 | ret = wait_on_sync_kiocb(iocb); |
261 | if (ret > 0) | 261 | if (ret > 0) |
262 | XFS_STATS_ADD(xs_read_bytes, ret); | 262 | XFS_STATS_ADD(xs_read_bytes, ret); |
263 | 263 | ||
264 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 264 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
265 | return ret; | 265 | return ret; |
266 | } | 266 | } |
267 | 267 | ||
268 | ssize_t | 268 | ssize_t |
269 | xfs_splice_read( | 269 | xfs_splice_read( |
270 | xfs_inode_t *ip, | 270 | xfs_inode_t *ip, |
271 | struct file *infilp, | 271 | struct file *infilp, |
272 | loff_t *ppos, | 272 | loff_t *ppos, |
273 | struct pipe_inode_info *pipe, | 273 | struct pipe_inode_info *pipe, |
274 | size_t count, | 274 | size_t count, |
275 | int flags, | 275 | int flags, |
276 | int ioflags) | 276 | int ioflags) |
277 | { | 277 | { |
278 | xfs_mount_t *mp = ip->i_mount; | 278 | xfs_mount_t *mp = ip->i_mount; |
279 | ssize_t ret; | 279 | ssize_t ret; |
280 | 280 | ||
281 | XFS_STATS_INC(xs_read_calls); | 281 | XFS_STATS_INC(xs_read_calls); |
282 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 282 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
283 | return -EIO; | 283 | return -EIO; |
284 | 284 | ||
285 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 285 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
286 | 286 | ||
287 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { | 287 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { |
288 | int iolock = XFS_IOLOCK_SHARED; | 288 | int iolock = XFS_IOLOCK_SHARED; |
289 | int error; | 289 | int error; |
290 | 290 | ||
291 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, | 291 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, |
292 | FILP_DELAY_FLAG(infilp), &iolock); | 292 | FILP_DELAY_FLAG(infilp), &iolock); |
293 | if (error) { | 293 | if (error) { |
294 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 294 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
295 | return -error; | 295 | return -error; |
296 | } | 296 | } |
297 | } | 297 | } |
298 | xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip, | 298 | xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip, |
299 | pipe, count, *ppos, ioflags); | 299 | pipe, count, *ppos, ioflags); |
300 | ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); | 300 | ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); |
301 | if (ret > 0) | 301 | if (ret > 0) |
302 | XFS_STATS_ADD(xs_read_bytes, ret); | 302 | XFS_STATS_ADD(xs_read_bytes, ret); |
303 | 303 | ||
304 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 304 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
305 | return ret; | 305 | return ret; |
306 | } | 306 | } |
307 | 307 | ||
308 | ssize_t | 308 | ssize_t |
309 | xfs_splice_write( | 309 | xfs_splice_write( |
310 | xfs_inode_t *ip, | 310 | xfs_inode_t *ip, |
311 | struct pipe_inode_info *pipe, | 311 | struct pipe_inode_info *pipe, |
312 | struct file *outfilp, | 312 | struct file *outfilp, |
313 | loff_t *ppos, | 313 | loff_t *ppos, |
314 | size_t count, | 314 | size_t count, |
315 | int flags, | 315 | int flags, |
316 | int ioflags) | 316 | int ioflags) |
317 | { | 317 | { |
318 | xfs_mount_t *mp = ip->i_mount; | 318 | xfs_mount_t *mp = ip->i_mount; |
319 | ssize_t ret; | 319 | ssize_t ret; |
320 | struct inode *inode = outfilp->f_mapping->host; | 320 | struct inode *inode = outfilp->f_mapping->host; |
321 | xfs_fsize_t isize, new_size; | 321 | xfs_fsize_t isize, new_size; |
322 | 322 | ||
323 | XFS_STATS_INC(xs_write_calls); | 323 | XFS_STATS_INC(xs_write_calls); |
324 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 324 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
325 | return -EIO; | 325 | return -EIO; |
326 | 326 | ||
327 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 327 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
328 | 328 | ||
329 | if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { | 329 | if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { |
330 | int iolock = XFS_IOLOCK_EXCL; | 330 | int iolock = XFS_IOLOCK_EXCL; |
331 | int error; | 331 | int error; |
332 | 332 | ||
333 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, | 333 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, |
334 | FILP_DELAY_FLAG(outfilp), &iolock); | 334 | FILP_DELAY_FLAG(outfilp), &iolock); |
335 | if (error) { | 335 | if (error) { |
336 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 336 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
337 | return -error; | 337 | return -error; |
338 | } | 338 | } |
339 | } | 339 | } |
340 | 340 | ||
341 | new_size = *ppos + count; | 341 | new_size = *ppos + count; |
342 | 342 | ||
343 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 343 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
344 | if (new_size > ip->i_size) | 344 | if (new_size > ip->i_size) |
345 | ip->i_new_size = new_size; | 345 | ip->i_new_size = new_size; |
346 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 346 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
347 | 347 | ||
348 | xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip, | 348 | xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip, |
349 | pipe, count, *ppos, ioflags); | 349 | pipe, count, *ppos, ioflags); |
350 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 350 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
351 | if (ret > 0) | 351 | if (ret > 0) |
352 | XFS_STATS_ADD(xs_write_bytes, ret); | 352 | XFS_STATS_ADD(xs_write_bytes, ret); |
353 | 353 | ||
354 | isize = i_size_read(inode); | 354 | isize = i_size_read(inode); |
355 | if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) | 355 | if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) |
356 | *ppos = isize; | 356 | *ppos = isize; |
357 | 357 | ||
358 | if (*ppos > ip->i_size) { | 358 | if (*ppos > ip->i_size) { |
359 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 359 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
360 | if (*ppos > ip->i_size) | 360 | if (*ppos > ip->i_size) |
361 | ip->i_size = *ppos; | 361 | ip->i_size = *ppos; |
362 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 362 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
363 | } | 363 | } |
364 | 364 | ||
365 | if (ip->i_new_size) { | 365 | if (ip->i_new_size) { |
366 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 366 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
367 | ip->i_new_size = 0; | 367 | ip->i_new_size = 0; |
368 | if (ip->i_d.di_size > ip->i_size) | 368 | if (ip->i_d.di_size > ip->i_size) |
369 | ip->i_d.di_size = ip->i_size; | 369 | ip->i_d.di_size = ip->i_size; |
370 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 370 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
371 | } | 371 | } |
372 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 372 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
373 | return ret; | 373 | return ret; |
374 | } | 374 | } |
375 | 375 | ||
376 | /* | 376 | /* |
377 | * This routine is called to handle zeroing any space in the last | 377 | * This routine is called to handle zeroing any space in the last |
378 | * block of the file that is beyond the EOF. We do this since the | 378 | * block of the file that is beyond the EOF. We do this since the |
379 | * size is being increased without writing anything to that block | 379 | * size is being increased without writing anything to that block |
380 | * and we don't want anyone to read the garbage on the disk. | 380 | * and we don't want anyone to read the garbage on the disk. |
381 | */ | 381 | */ |
382 | STATIC int /* error (positive) */ | 382 | STATIC int /* error (positive) */ |
383 | xfs_zero_last_block( | 383 | xfs_zero_last_block( |
384 | xfs_inode_t *ip, | 384 | xfs_inode_t *ip, |
385 | xfs_fsize_t offset, | 385 | xfs_fsize_t offset, |
386 | xfs_fsize_t isize) | 386 | xfs_fsize_t isize) |
387 | { | 387 | { |
388 | xfs_fileoff_t last_fsb; | 388 | xfs_fileoff_t last_fsb; |
389 | xfs_mount_t *mp = ip->i_mount; | 389 | xfs_mount_t *mp = ip->i_mount; |
390 | int nimaps; | 390 | int nimaps; |
391 | int zero_offset; | 391 | int zero_offset; |
392 | int zero_len; | 392 | int zero_len; |
393 | int error = 0; | 393 | int error = 0; |
394 | xfs_bmbt_irec_t imap; | 394 | xfs_bmbt_irec_t imap; |
395 | 395 | ||
396 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 396 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
397 | 397 | ||
398 | zero_offset = XFS_B_FSB_OFFSET(mp, isize); | 398 | zero_offset = XFS_B_FSB_OFFSET(mp, isize); |
399 | if (zero_offset == 0) { | 399 | if (zero_offset == 0) { |
400 | /* | 400 | /* |
401 | * There are no extra bytes in the last block on disk to | 401 | * There are no extra bytes in the last block on disk to |
402 | * zero, so return. | 402 | * zero, so return. |
403 | */ | 403 | */ |
404 | return 0; | 404 | return 0; |
405 | } | 405 | } |
406 | 406 | ||
407 | last_fsb = XFS_B_TO_FSBT(mp, isize); | 407 | last_fsb = XFS_B_TO_FSBT(mp, isize); |
408 | nimaps = 1; | 408 | nimaps = 1; |
409 | error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, | 409 | error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, |
410 | &nimaps, NULL, NULL); | 410 | &nimaps, NULL, NULL); |
411 | if (error) { | 411 | if (error) { |
412 | return error; | 412 | return error; |
413 | } | 413 | } |
414 | ASSERT(nimaps > 0); | 414 | ASSERT(nimaps > 0); |
415 | /* | 415 | /* |
416 | * If the block underlying isize is just a hole, then there | 416 | * If the block underlying isize is just a hole, then there |
417 | * is nothing to zero. | 417 | * is nothing to zero. |
418 | */ | 418 | */ |
419 | if (imap.br_startblock == HOLESTARTBLOCK) { | 419 | if (imap.br_startblock == HOLESTARTBLOCK) { |
420 | return 0; | 420 | return 0; |
421 | } | 421 | } |
422 | /* | 422 | /* |
423 | * Zero the part of the last block beyond the EOF, and write it | 423 | * Zero the part of the last block beyond the EOF, and write it |
424 | * out sync. We need to drop the ilock while we do this so we | 424 | * out sync. We need to drop the ilock while we do this so we |
425 | * don't deadlock when the buffer cache calls back to us. | 425 | * don't deadlock when the buffer cache calls back to us. |
426 | */ | 426 | */ |
427 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 427 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
428 | 428 | ||
429 | zero_len = mp->m_sb.sb_blocksize - zero_offset; | 429 | zero_len = mp->m_sb.sb_blocksize - zero_offset; |
430 | if (isize + zero_len > offset) | 430 | if (isize + zero_len > offset) |
431 | zero_len = offset - isize; | 431 | zero_len = offset - isize; |
432 | error = xfs_iozero(ip, isize, zero_len); | 432 | error = xfs_iozero(ip, isize, zero_len); |
433 | 433 | ||
434 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 434 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
435 | ASSERT(error >= 0); | 435 | ASSERT(error >= 0); |
436 | return error; | 436 | return error; |
437 | } | 437 | } |
438 | 438 | ||
439 | /* | 439 | /* |
440 | * Zero any on disk space between the current EOF and the new, | 440 | * Zero any on disk space between the current EOF and the new, |
441 | * larger EOF. This handles the normal case of zeroing the remainder | 441 | * larger EOF. This handles the normal case of zeroing the remainder |
442 | * of the last block in the file and the unusual case of zeroing blocks | 442 | * of the last block in the file and the unusual case of zeroing blocks |
443 | * out beyond the size of the file. This second case only happens | 443 | * out beyond the size of the file. This second case only happens |
444 | * with fixed size extents and when the system crashes before the inode | 444 | * with fixed size extents and when the system crashes before the inode |
445 | * size was updated but after blocks were allocated. If fill is set, | 445 | * size was updated but after blocks were allocated. If fill is set, |
446 | * then any holes in the range are filled and zeroed. If not, the holes | 446 | * then any holes in the range are filled and zeroed. If not, the holes |
447 | * are left alone as holes. | 447 | * are left alone as holes. |
448 | */ | 448 | */ |
449 | 449 | ||
450 | int /* error (positive) */ | 450 | int /* error (positive) */ |
451 | xfs_zero_eof( | 451 | xfs_zero_eof( |
452 | xfs_inode_t *ip, | 452 | xfs_inode_t *ip, |
453 | xfs_off_t offset, /* starting I/O offset */ | 453 | xfs_off_t offset, /* starting I/O offset */ |
454 | xfs_fsize_t isize) /* current inode size */ | 454 | xfs_fsize_t isize) /* current inode size */ |
455 | { | 455 | { |
456 | xfs_mount_t *mp = ip->i_mount; | 456 | xfs_mount_t *mp = ip->i_mount; |
457 | xfs_fileoff_t start_zero_fsb; | 457 | xfs_fileoff_t start_zero_fsb; |
458 | xfs_fileoff_t end_zero_fsb; | 458 | xfs_fileoff_t end_zero_fsb; |
459 | xfs_fileoff_t zero_count_fsb; | 459 | xfs_fileoff_t zero_count_fsb; |
460 | xfs_fileoff_t last_fsb; | 460 | xfs_fileoff_t last_fsb; |
461 | xfs_fileoff_t zero_off; | 461 | xfs_fileoff_t zero_off; |
462 | xfs_fsize_t zero_len; | 462 | xfs_fsize_t zero_len; |
463 | int nimaps; | 463 | int nimaps; |
464 | int error = 0; | 464 | int error = 0; |
465 | xfs_bmbt_irec_t imap; | 465 | xfs_bmbt_irec_t imap; |
466 | 466 | ||
467 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 467 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); |
468 | ASSERT(offset > isize); | 468 | ASSERT(offset > isize); |
469 | 469 | ||
470 | /* | 470 | /* |
471 | * First handle zeroing the block on which isize resides. | 471 | * First handle zeroing the block on which isize resides. |
472 | * We only zero a part of that block so it is handled specially. | 472 | * We only zero a part of that block so it is handled specially. |
473 | */ | 473 | */ |
474 | error = xfs_zero_last_block(ip, offset, isize); | 474 | error = xfs_zero_last_block(ip, offset, isize); |
475 | if (error) { | 475 | if (error) { |
476 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 476 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); |
477 | return error; | 477 | return error; |
478 | } | 478 | } |
479 | 479 | ||
480 | /* | 480 | /* |
481 | * Calculate the range between the new size and the old | 481 | * Calculate the range between the new size and the old |
482 | * where blocks needing to be zeroed may exist. To get the | 482 | * where blocks needing to be zeroed may exist. To get the |
483 | * block where the last byte in the file currently resides, | 483 | * block where the last byte in the file currently resides, |
484 | * we need to subtract one from the size and truncate back | 484 | * we need to subtract one from the size and truncate back |
485 | * to a block boundary. We subtract 1 in case the size is | 485 | * to a block boundary. We subtract 1 in case the size is |
486 | * exactly on a block boundary. | 486 | * exactly on a block boundary. |
487 | */ | 487 | */ |
488 | last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; | 488 | last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; |
489 | start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); | 489 | start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); |
490 | end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); | 490 | end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); |
491 | ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); | 491 | ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); |
492 | if (last_fsb == end_zero_fsb) { | 492 | if (last_fsb == end_zero_fsb) { |
493 | /* | 493 | /* |
494 | * The size was only incremented on its last block. | 494 | * The size was only incremented on its last block. |
495 | * We took care of that above, so just return. | 495 | * We took care of that above, so just return. |
496 | */ | 496 | */ |
497 | return 0; | 497 | return 0; |
498 | } | 498 | } |
499 | 499 | ||
500 | ASSERT(start_zero_fsb <= end_zero_fsb); | 500 | ASSERT(start_zero_fsb <= end_zero_fsb); |
501 | while (start_zero_fsb <= end_zero_fsb) { | 501 | while (start_zero_fsb <= end_zero_fsb) { |
502 | nimaps = 1; | 502 | nimaps = 1; |
503 | zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; | 503 | zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; |
504 | error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, | 504 | error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, |
505 | 0, NULL, 0, &imap, &nimaps, NULL, NULL); | 505 | 0, NULL, 0, &imap, &nimaps, NULL, NULL); |
506 | if (error) { | 506 | if (error) { |
507 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 507 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); |
508 | return error; | 508 | return error; |
509 | } | 509 | } |
510 | ASSERT(nimaps > 0); | 510 | ASSERT(nimaps > 0); |
511 | 511 | ||
512 | if (imap.br_state == XFS_EXT_UNWRITTEN || | 512 | if (imap.br_state == XFS_EXT_UNWRITTEN || |
513 | imap.br_startblock == HOLESTARTBLOCK) { | 513 | imap.br_startblock == HOLESTARTBLOCK) { |
514 | /* | 514 | /* |
515 | * This loop handles initializing pages that were | 515 | * This loop handles initializing pages that were |
516 | * partially initialized by the code below this | 516 | * partially initialized by the code below this |
517 | * loop. It basically zeroes the part of the page | 517 | * loop. It basically zeroes the part of the page |
518 | * that sits on a hole and sets the page as P_HOLE | 518 | * that sits on a hole and sets the page as P_HOLE |
519 | * and calls remapf if it is a mapped file. | 519 | * and calls remapf if it is a mapped file. |
520 | */ | 520 | */ |
521 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; | 521 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; |
522 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); | 522 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); |
523 | continue; | 523 | continue; |
524 | } | 524 | } |
525 | 525 | ||
526 | /* | 526 | /* |
527 | * There are blocks we need to zero. | 527 | * There are blocks we need to zero. |
528 | * Drop the inode lock while we're doing the I/O. | 528 | * Drop the inode lock while we're doing the I/O. |
529 | * We'll still have the iolock to protect us. | 529 | * We'll still have the iolock to protect us. |
530 | */ | 530 | */ |
531 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 531 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
532 | 532 | ||
533 | zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); | 533 | zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); |
534 | zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); | 534 | zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); |
535 | 535 | ||
536 | if ((zero_off + zero_len) > offset) | 536 | if ((zero_off + zero_len) > offset) |
537 | zero_len = offset - zero_off; | 537 | zero_len = offset - zero_off; |
538 | 538 | ||
539 | error = xfs_iozero(ip, zero_off, zero_len); | 539 | error = xfs_iozero(ip, zero_off, zero_len); |
540 | if (error) { | 540 | if (error) { |
541 | goto out_lock; | 541 | goto out_lock; |
542 | } | 542 | } |
543 | 543 | ||
544 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; | 544 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; |
545 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); | 545 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); |
546 | 546 | ||
547 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 547 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
548 | } | 548 | } |
549 | 549 | ||
550 | return 0; | 550 | return 0; |
551 | 551 | ||
552 | out_lock: | 552 | out_lock: |
553 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 553 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
554 | ASSERT(error >= 0); | 554 | ASSERT(error >= 0); |
555 | return error; | 555 | return error; |
556 | } | 556 | } |
557 | 557 | ||
558 | ssize_t /* bytes written, or (-) error */ | 558 | ssize_t /* bytes written, or (-) error */ |
559 | xfs_write( | 559 | xfs_write( |
560 | struct xfs_inode *xip, | 560 | struct xfs_inode *xip, |
561 | struct kiocb *iocb, | 561 | struct kiocb *iocb, |
562 | const struct iovec *iovp, | 562 | const struct iovec *iovp, |
563 | unsigned int nsegs, | 563 | unsigned int nsegs, |
564 | loff_t *offset, | 564 | loff_t *offset, |
565 | int ioflags) | 565 | int ioflags) |
566 | { | 566 | { |
567 | struct file *file = iocb->ki_filp; | 567 | struct file *file = iocb->ki_filp; |
568 | struct address_space *mapping = file->f_mapping; | 568 | struct address_space *mapping = file->f_mapping; |
569 | struct inode *inode = mapping->host; | 569 | struct inode *inode = mapping->host; |
570 | unsigned long segs = nsegs; | 570 | unsigned long segs = nsegs; |
571 | xfs_mount_t *mp; | 571 | xfs_mount_t *mp; |
572 | ssize_t ret = 0, error = 0; | 572 | ssize_t ret = 0, error = 0; |
573 | xfs_fsize_t isize, new_size; | 573 | xfs_fsize_t isize, new_size; |
574 | int iolock; | 574 | int iolock; |
575 | int eventsent = 0; | 575 | int eventsent = 0; |
576 | size_t ocount = 0, count; | 576 | size_t ocount = 0, count; |
577 | loff_t pos; | 577 | loff_t pos; |
578 | int need_i_mutex; | 578 | int need_i_mutex; |
579 | 579 | ||
580 | XFS_STATS_INC(xs_write_calls); | 580 | XFS_STATS_INC(xs_write_calls); |
581 | 581 | ||
582 | error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ); | 582 | error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ); |
583 | if (error) | 583 | if (error) |
584 | return error; | 584 | return error; |
585 | 585 | ||
586 | count = ocount; | 586 | count = ocount; |
587 | pos = *offset; | 587 | pos = *offset; |
588 | 588 | ||
589 | if (count == 0) | 589 | if (count == 0) |
590 | return 0; | 590 | return 0; |
591 | 591 | ||
592 | mp = xip->i_mount; | 592 | mp = xip->i_mount; |
593 | 593 | ||
594 | xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); | 594 | xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); |
595 | 595 | ||
596 | if (XFS_FORCED_SHUTDOWN(mp)) | 596 | if (XFS_FORCED_SHUTDOWN(mp)) |
597 | return -EIO; | 597 | return -EIO; |
598 | 598 | ||
599 | relock: | 599 | relock: |
600 | if (ioflags & IO_ISDIRECT) { | 600 | if (ioflags & IO_ISDIRECT) { |
601 | iolock = XFS_IOLOCK_SHARED; | 601 | iolock = XFS_IOLOCK_SHARED; |
602 | need_i_mutex = 0; | 602 | need_i_mutex = 0; |
603 | } else { | 603 | } else { |
604 | iolock = XFS_IOLOCK_EXCL; | 604 | iolock = XFS_IOLOCK_EXCL; |
605 | need_i_mutex = 1; | 605 | need_i_mutex = 1; |
606 | mutex_lock(&inode->i_mutex); | 606 | mutex_lock(&inode->i_mutex); |
607 | } | 607 | } |
608 | 608 | ||
609 | xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); | 609 | xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); |
610 | 610 | ||
611 | start: | 611 | start: |
612 | error = -generic_write_checks(file, &pos, &count, | 612 | error = -generic_write_checks(file, &pos, &count, |
613 | S_ISBLK(inode->i_mode)); | 613 | S_ISBLK(inode->i_mode)); |
614 | if (error) { | 614 | if (error) { |
615 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); | 615 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); |
616 | goto out_unlock_mutex; | 616 | goto out_unlock_mutex; |
617 | } | 617 | } |
618 | 618 | ||
619 | if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) && | 619 | if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) && |
620 | !(ioflags & IO_INVIS) && !eventsent)) { | 620 | !(ioflags & IO_INVIS) && !eventsent)) { |
621 | int dmflags = FILP_DELAY_FLAG(file); | 621 | int dmflags = FILP_DELAY_FLAG(file); |
622 | 622 | ||
623 | if (need_i_mutex) | 623 | if (need_i_mutex) |
624 | dmflags |= DM_FLAGS_IMUX; | 624 | dmflags |= DM_FLAGS_IMUX; |
625 | 625 | ||
626 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | 626 | xfs_iunlock(xip, XFS_ILOCK_EXCL); |
627 | error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip, | 627 | error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip, |
628 | pos, count, dmflags, &iolock); | 628 | pos, count, dmflags, &iolock); |
629 | if (error) { | 629 | if (error) { |
630 | goto out_unlock_internal; | 630 | goto out_unlock_internal; |
631 | } | 631 | } |
632 | xfs_ilock(xip, XFS_ILOCK_EXCL); | 632 | xfs_ilock(xip, XFS_ILOCK_EXCL); |
633 | eventsent = 1; | 633 | eventsent = 1; |
634 | 634 | ||
635 | /* | 635 | /* |
636 | * The iolock was dropped and reacquired in XFS_SEND_DATA | 636 | * The iolock was dropped and reacquired in XFS_SEND_DATA |
637 | * so we have to recheck the size when appending. | 637 | * so we have to recheck the size when appending. |
638 | * We will only "goto start;" once, since having sent the | 638 | * We will only "goto start;" once, since having sent the |
639 | * event prevents another call to XFS_SEND_DATA, which is | 639 | * event prevents another call to XFS_SEND_DATA, which is |
640 | * what allows the size to change in the first place. | 640 | * what allows the size to change in the first place. |
641 | */ | 641 | */ |
642 | if ((file->f_flags & O_APPEND) && pos != xip->i_size) | 642 | if ((file->f_flags & O_APPEND) && pos != xip->i_size) |
643 | goto start; | 643 | goto start; |
644 | } | 644 | } |
645 | 645 | ||
646 | if (ioflags & IO_ISDIRECT) { | 646 | if (ioflags & IO_ISDIRECT) { |
647 | xfs_buftarg_t *target = | 647 | xfs_buftarg_t *target = |
648 | XFS_IS_REALTIME_INODE(xip) ? | 648 | XFS_IS_REALTIME_INODE(xip) ? |
649 | mp->m_rtdev_targp : mp->m_ddev_targp; | 649 | mp->m_rtdev_targp : mp->m_ddev_targp; |
650 | 650 | ||
651 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { | 651 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { |
652 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); | 652 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); |
653 | return XFS_ERROR(-EINVAL); | 653 | return XFS_ERROR(-EINVAL); |
654 | } | 654 | } |
655 | 655 | ||
656 | if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) { | 656 | if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) { |
657 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); | 657 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); |
658 | iolock = XFS_IOLOCK_EXCL; | 658 | iolock = XFS_IOLOCK_EXCL; |
659 | need_i_mutex = 1; | 659 | need_i_mutex = 1; |
660 | mutex_lock(&inode->i_mutex); | 660 | mutex_lock(&inode->i_mutex); |
661 | xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); | 661 | xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); |
662 | goto start; | 662 | goto start; |
663 | } | 663 | } |
664 | } | 664 | } |
665 | 665 | ||
666 | new_size = pos + count; | 666 | new_size = pos + count; |
667 | if (new_size > xip->i_size) | 667 | if (new_size > xip->i_size) |
668 | xip->i_new_size = new_size; | 668 | xip->i_new_size = new_size; |
669 | 669 | ||
670 | if (likely(!(ioflags & IO_INVIS))) | 670 | if (likely(!(ioflags & IO_INVIS))) |
671 | xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 671 | xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
672 | 672 | ||
673 | /* | 673 | /* |
674 | * If the offset is beyond the size of the file, we have a couple | 674 | * If the offset is beyond the size of the file, we have a couple |
675 | * of things to do. First, if there is already space allocated | 675 | * of things to do. First, if there is already space allocated |
676 | * we need to either create holes or zero the disk or ... | 676 | * we need to either create holes or zero the disk or ... |
677 | * | 677 | * |
678 | * If there is a page where the previous size lands, we need | 678 | * If there is a page where the previous size lands, we need |
679 | * to zero it out up to the new size. | 679 | * to zero it out up to the new size. |
680 | */ | 680 | */ |
681 | 681 | ||
682 | if (pos > xip->i_size) { | 682 | if (pos > xip->i_size) { |
683 | error = xfs_zero_eof(xip, pos, xip->i_size); | 683 | error = xfs_zero_eof(xip, pos, xip->i_size); |
684 | if (error) { | 684 | if (error) { |
685 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | 685 | xfs_iunlock(xip, XFS_ILOCK_EXCL); |
686 | goto out_unlock_internal; | 686 | goto out_unlock_internal; |
687 | } | 687 | } |
688 | } | 688 | } |
689 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | 689 | xfs_iunlock(xip, XFS_ILOCK_EXCL); |
690 | 690 | ||
691 | /* | 691 | /* |
692 | * If we're writing the file then make sure to clear the | 692 | * If we're writing the file then make sure to clear the |
693 | * setuid and setgid bits if the process is not being run | 693 | * setuid and setgid bits if the process is not being run |
694 | * by root. This keeps people from modifying setuid and | 694 | * by root. This keeps people from modifying setuid and |
695 | * setgid binaries. | 695 | * setgid binaries. |
696 | */ | 696 | */ |
697 | 697 | ||
698 | if (((xip->i_d.di_mode & S_ISUID) || | 698 | if (((xip->i_d.di_mode & S_ISUID) || |
699 | ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == | 699 | ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == |
700 | (S_ISGID | S_IXGRP))) && | 700 | (S_ISGID | S_IXGRP))) && |
701 | !capable(CAP_FSETID)) { | 701 | !capable(CAP_FSETID)) { |
702 | error = xfs_write_clear_setuid(xip); | 702 | error = xfs_write_clear_setuid(xip); |
703 | if (likely(!error)) | 703 | if (likely(!error)) |
704 | error = -file_remove_suid(file); | 704 | error = -file_remove_suid(file); |
705 | if (unlikely(error)) { | 705 | if (unlikely(error)) { |
706 | goto out_unlock_internal; | 706 | goto out_unlock_internal; |
707 | } | 707 | } |
708 | } | 708 | } |
709 | 709 | ||
710 | retry: | 710 | retry: |
711 | /* We can write back this queue in page reclaim */ | 711 | /* We can write back this queue in page reclaim */ |
712 | current->backing_dev_info = mapping->backing_dev_info; | 712 | current->backing_dev_info = mapping->backing_dev_info; |
713 | 713 | ||
714 | if ((ioflags & IO_ISDIRECT)) { | 714 | if ((ioflags & IO_ISDIRECT)) { |
715 | if (mapping->nrpages) { | 715 | if (mapping->nrpages) { |
716 | WARN_ON(need_i_mutex == 0); | 716 | WARN_ON(need_i_mutex == 0); |
717 | xfs_inval_cached_trace(xip, pos, -1, | 717 | xfs_inval_cached_trace(xip, pos, -1, |
718 | (pos & PAGE_CACHE_MASK), -1); | 718 | (pos & PAGE_CACHE_MASK), -1); |
719 | error = xfs_flushinval_pages(xip, | 719 | error = xfs_flushinval_pages(xip, |
720 | (pos & PAGE_CACHE_MASK), | 720 | (pos & PAGE_CACHE_MASK), |
721 | -1, FI_REMAPF_LOCKED); | 721 | -1, FI_REMAPF_LOCKED); |
722 | if (error) | 722 | if (error) |
723 | goto out_unlock_internal; | 723 | goto out_unlock_internal; |
724 | } | 724 | } |
725 | 725 | ||
726 | if (need_i_mutex) { | 726 | if (need_i_mutex) { |
727 | /* demote the lock now the cached pages are gone */ | 727 | /* demote the lock now the cached pages are gone */ |
728 | xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); | 728 | xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); |
729 | mutex_unlock(&inode->i_mutex); | 729 | mutex_unlock(&inode->i_mutex); |
730 | 730 | ||
731 | iolock = XFS_IOLOCK_SHARED; | 731 | iolock = XFS_IOLOCK_SHARED; |
732 | need_i_mutex = 0; | 732 | need_i_mutex = 0; |
733 | } | 733 | } |
734 | 734 | ||
735 | xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, | 735 | xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, |
736 | *offset, ioflags); | 736 | *offset, ioflags); |
737 | ret = generic_file_direct_write(iocb, iovp, | 737 | ret = generic_file_direct_write(iocb, iovp, |
738 | &segs, pos, offset, count, ocount); | 738 | &segs, pos, offset, count, ocount); |
739 | 739 | ||
740 | /* | 740 | /* |
741 | * direct-io write to a hole: fall through to buffered I/O | 741 | * direct-io write to a hole: fall through to buffered I/O |
742 | * for completing the rest of the request. | 742 | * for completing the rest of the request. |
743 | */ | 743 | */ |
744 | if (ret >= 0 && ret != count) { | 744 | if (ret >= 0 && ret != count) { |
745 | XFS_STATS_ADD(xs_write_bytes, ret); | 745 | XFS_STATS_ADD(xs_write_bytes, ret); |
746 | 746 | ||
747 | pos += ret; | 747 | pos += ret; |
748 | count -= ret; | 748 | count -= ret; |
749 | 749 | ||
750 | ioflags &= ~IO_ISDIRECT; | 750 | ioflags &= ~IO_ISDIRECT; |
751 | xfs_iunlock(xip, iolock); | 751 | xfs_iunlock(xip, iolock); |
752 | goto relock; | 752 | goto relock; |
753 | } | 753 | } |
754 | } else { | 754 | } else { |
755 | xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, | 755 | xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, |
756 | *offset, ioflags); | 756 | *offset, ioflags); |
757 | ret = generic_file_buffered_write(iocb, iovp, segs, | 757 | ret = generic_file_buffered_write(iocb, iovp, segs, |
758 | pos, offset, count, ret); | 758 | pos, offset, count, ret); |
759 | } | 759 | } |
760 | 760 | ||
761 | current->backing_dev_info = NULL; | 761 | current->backing_dev_info = NULL; |
762 | 762 | ||
763 | if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) | 763 | if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) |
764 | ret = wait_on_sync_kiocb(iocb); | 764 | ret = wait_on_sync_kiocb(iocb); |
765 | 765 | ||
766 | if (ret == -ENOSPC && | 766 | if (ret == -ENOSPC && |
767 | DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { | 767 | DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { |
768 | xfs_iunlock(xip, iolock); | 768 | xfs_iunlock(xip, iolock); |
769 | if (need_i_mutex) | 769 | if (need_i_mutex) |
770 | mutex_unlock(&inode->i_mutex); | 770 | mutex_unlock(&inode->i_mutex); |
771 | error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip, | 771 | error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip, |
772 | DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL, | 772 | DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL, |
773 | 0, 0, 0); /* Delay flag intentionally unused */ | 773 | 0, 0, 0); /* Delay flag intentionally unused */ |
774 | if (need_i_mutex) | 774 | if (need_i_mutex) |
775 | mutex_lock(&inode->i_mutex); | 775 | mutex_lock(&inode->i_mutex); |
776 | xfs_ilock(xip, iolock); | 776 | xfs_ilock(xip, iolock); |
777 | if (error) | 777 | if (error) |
778 | goto out_unlock_internal; | 778 | goto out_unlock_internal; |
779 | pos = xip->i_size; | 779 | pos = xip->i_size; |
780 | ret = 0; | 780 | ret = 0; |
781 | goto retry; | 781 | goto retry; |
782 | } | 782 | } |
783 | 783 | ||
784 | isize = i_size_read(inode); | 784 | isize = i_size_read(inode); |
785 | if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) | 785 | if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) |
786 | *offset = isize; | 786 | *offset = isize; |
787 | 787 | ||
788 | if (*offset > xip->i_size) { | 788 | if (*offset > xip->i_size) { |
789 | xfs_ilock(xip, XFS_ILOCK_EXCL); | 789 | xfs_ilock(xip, XFS_ILOCK_EXCL); |
790 | if (*offset > xip->i_size) | 790 | if (*offset > xip->i_size) |
791 | xip->i_size = *offset; | 791 | xip->i_size = *offset; |
792 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | 792 | xfs_iunlock(xip, XFS_ILOCK_EXCL); |
793 | } | 793 | } |
794 | 794 | ||
795 | error = -ret; | 795 | error = -ret; |
796 | if (ret <= 0) | 796 | if (ret <= 0) |
797 | goto out_unlock_internal; | 797 | goto out_unlock_internal; |
798 | 798 | ||
799 | XFS_STATS_ADD(xs_write_bytes, ret); | 799 | XFS_STATS_ADD(xs_write_bytes, ret); |
800 | 800 | ||
801 | /* Handle various SYNC-type writes */ | 801 | /* Handle various SYNC-type writes */ |
802 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { | 802 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { |
803 | int error2; | 803 | int error2; |
804 | 804 | ||
805 | xfs_iunlock(xip, iolock); | 805 | xfs_iunlock(xip, iolock); |
806 | if (need_i_mutex) | 806 | if (need_i_mutex) |
807 | mutex_unlock(&inode->i_mutex); | 807 | mutex_unlock(&inode->i_mutex); |
808 | error2 = sync_page_range(inode, mapping, pos, ret); | 808 | error2 = sync_page_range(inode, mapping, pos, ret); |
809 | if (!error) | 809 | if (!error) |
810 | error = error2; | 810 | error = error2; |
811 | if (need_i_mutex) | 811 | if (need_i_mutex) |
812 | mutex_lock(&inode->i_mutex); | 812 | mutex_lock(&inode->i_mutex); |
813 | xfs_ilock(xip, iolock); | 813 | xfs_ilock(xip, iolock); |
814 | error2 = xfs_write_sync_logforce(mp, xip); | 814 | error2 = xfs_write_sync_logforce(mp, xip); |
815 | if (!error) | 815 | if (!error) |
816 | error = error2; | 816 | error = error2; |
817 | } | 817 | } |
818 | 818 | ||
819 | out_unlock_internal: | 819 | out_unlock_internal: |
820 | if (xip->i_new_size) { | 820 | if (xip->i_new_size) { |
821 | xfs_ilock(xip, XFS_ILOCK_EXCL); | 821 | xfs_ilock(xip, XFS_ILOCK_EXCL); |
822 | xip->i_new_size = 0; | 822 | xip->i_new_size = 0; |
823 | /* | 823 | /* |
824 | * If this was a direct or synchronous I/O that failed (such | 824 | * If this was a direct or synchronous I/O that failed (such |
825 | * as ENOSPC) then part of the I/O may have been written to | 825 | * as ENOSPC) then part of the I/O may have been written to |
826 | * disk before the error occured. In this case the on-disk | 826 | * disk before the error occured. In this case the on-disk |
827 | * file size may have been adjusted beyond the in-memory file | 827 | * file size may have been adjusted beyond the in-memory file |
828 | * size and now needs to be truncated back. | 828 | * size and now needs to be truncated back. |
829 | */ | 829 | */ |
830 | if (xip->i_d.di_size > xip->i_size) | 830 | if (xip->i_d.di_size > xip->i_size) |
831 | xip->i_d.di_size = xip->i_size; | 831 | xip->i_d.di_size = xip->i_size; |
832 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | 832 | xfs_iunlock(xip, XFS_ILOCK_EXCL); |
833 | } | 833 | } |
834 | xfs_iunlock(xip, iolock); | 834 | xfs_iunlock(xip, iolock); |
835 | out_unlock_mutex: | 835 | out_unlock_mutex: |
836 | if (need_i_mutex) | 836 | if (need_i_mutex) |
837 | mutex_unlock(&inode->i_mutex); | 837 | mutex_unlock(&inode->i_mutex); |
838 | return -error; | 838 | return -error; |
839 | } | 839 | } |
840 | 840 | ||
841 | /* | 841 | /* |
842 | * All xfs metadata buffers except log state machine buffers | 842 | * All xfs metadata buffers except log state machine buffers |
843 | * get this attached as their b_bdstrat callback function. | 843 | * get this attached as their b_bdstrat callback function. |
844 | * This is so that we can catch a buffer | 844 | * This is so that we can catch a buffer |
845 | * after prematurely unpinning it to forcibly shutdown the filesystem. | 845 | * after prematurely unpinning it to forcibly shutdown the filesystem. |
846 | */ | 846 | */ |
847 | int | 847 | int |
848 | xfs_bdstrat_cb(struct xfs_buf *bp) | 848 | xfs_bdstrat_cb(struct xfs_buf *bp) |
849 | { | 849 | { |
850 | xfs_mount_t *mp; | 850 | if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { |
851 | |||
852 | mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); | ||
853 | if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
854 | xfs_buf_iorequest(bp); | ||
855 | return 0; | ||
856 | } else { | ||
857 | xfs_buftrace("XFS__BDSTRAT IOERROR", bp); | 851 | xfs_buftrace("XFS__BDSTRAT IOERROR", bp); |
858 | /* | 852 | /* |
859 | * Metadata write that didn't get logged but | 853 | * Metadata write that didn't get logged but |
860 | * written delayed anyway. These aren't associated | 854 | * written delayed anyway. These aren't associated |
861 | * with a transaction, and can be ignored. | 855 | * with a transaction, and can be ignored. |
862 | */ | 856 | */ |
863 | if (XFS_BUF_IODONE_FUNC(bp) == NULL && | 857 | if (XFS_BUF_IODONE_FUNC(bp) == NULL && |
864 | (XFS_BUF_ISREAD(bp)) == 0) | 858 | (XFS_BUF_ISREAD(bp)) == 0) |
865 | return (xfs_bioerror_relse(bp)); | 859 | return (xfs_bioerror_relse(bp)); |
866 | else | 860 | else |
867 | return (xfs_bioerror(bp)); | 861 | return (xfs_bioerror(bp)); |
868 | } | 862 | } |
863 | |||
864 | xfs_buf_iorequest(bp); | ||
865 | return 0; | ||
869 | } | 866 | } |
870 | 867 | ||
871 | /* | 868 | /* |
872 | * Wrapper around bdstrat so that we can stop data from going to disk in case | 869 | * Wrapper around bdstrat so that we can stop data from going to disk in case |
873 | * we are shutting down the filesystem. Typically user data goes thru this | 870 | * we are shutting down the filesystem. Typically user data goes thru this |
874 | * path; one of the exceptions is the superblock. | 871 | * path; one of the exceptions is the superblock. |
875 | */ | 872 | */ |
876 | void | 873 | void |
877 | xfsbdstrat( | 874 | xfsbdstrat( |
878 | struct xfs_mount *mp, | 875 | struct xfs_mount *mp, |
879 | struct xfs_buf *bp) | 876 | struct xfs_buf *bp) |
880 | { | 877 | { |
881 | ASSERT(mp); | 878 | ASSERT(mp); |
882 | if (!XFS_FORCED_SHUTDOWN(mp)) { | 879 | if (!XFS_FORCED_SHUTDOWN(mp)) { |
883 | xfs_buf_iorequest(bp); | 880 | xfs_buf_iorequest(bp); |
884 | return; | 881 | return; |
885 | } | 882 | } |
886 | 883 | ||
887 | xfs_buftrace("XFSBDSTRAT IOERROR", bp); | 884 | xfs_buftrace("XFSBDSTRAT IOERROR", bp); |
888 | xfs_bioerror_relse(bp); | 885 | xfs_bioerror_relse(bp); |
889 | } | 886 | } |
890 | 887 | ||
891 | /* | 888 | /* |
892 | * If the underlying (data/log/rt) device is readonly, there are some | 889 | * If the underlying (data/log/rt) device is readonly, there are some |
893 | * operations that cannot proceed. | 890 | * operations that cannot proceed. |
894 | */ | 891 | */ |
895 | int | 892 | int |
896 | xfs_dev_is_read_only( | 893 | xfs_dev_is_read_only( |
897 | xfs_mount_t *mp, | 894 | xfs_mount_t *mp, |
898 | char *message) | 895 | char *message) |
899 | { | 896 | { |
900 | if (xfs_readonly_buftarg(mp->m_ddev_targp) || | 897 | if (xfs_readonly_buftarg(mp->m_ddev_targp) || |
901 | xfs_readonly_buftarg(mp->m_logdev_targp) || | 898 | xfs_readonly_buftarg(mp->m_logdev_targp) || |
902 | (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { | 899 | (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { |
903 | cmn_err(CE_NOTE, | 900 | cmn_err(CE_NOTE, |
904 | "XFS: %s required on read-only device.", message); | 901 | "XFS: %s required on read-only device.", message); |
905 | cmn_err(CE_NOTE, | 902 | cmn_err(CE_NOTE, |
906 | "XFS: write access unavailable, cannot proceed."); | 903 | "XFS: write access unavailable, cannot proceed."); |
907 | return EROFS; | 904 | return EROFS; |
908 | } | 905 | } |
fs/xfs/xfs_buf_item.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
27 | #include "xfs_dmapi.h" | 27 | #include "xfs_dmapi.h" |
28 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
29 | #include "xfs_buf_item.h" | 29 | #include "xfs_buf_item.h" |
30 | #include "xfs_trans_priv.h" | 30 | #include "xfs_trans_priv.h" |
31 | #include "xfs_error.h" | 31 | #include "xfs_error.h" |
32 | 32 | ||
33 | 33 | ||
34 | kmem_zone_t *xfs_buf_item_zone; | 34 | kmem_zone_t *xfs_buf_item_zone; |
35 | 35 | ||
36 | #ifdef XFS_TRANS_DEBUG | 36 | #ifdef XFS_TRANS_DEBUG |
37 | /* | 37 | /* |
38 | * This function uses an alternate strategy for tracking the bytes | 38 | * This function uses an alternate strategy for tracking the bytes |
39 | * that the user requests to be logged. This can then be used | 39 | * that the user requests to be logged. This can then be used |
40 | * in conjunction with the bli_orig array in the buf log item to | 40 | * in conjunction with the bli_orig array in the buf log item to |
41 | * catch bugs in our callers' code. | 41 | * catch bugs in our callers' code. |
42 | * | 42 | * |
43 | * We also double check the bits set in xfs_buf_item_log using a | 43 | * We also double check the bits set in xfs_buf_item_log using a |
44 | * simple algorithm to check that every byte is accounted for. | 44 | * simple algorithm to check that every byte is accounted for. |
45 | */ | 45 | */ |
46 | STATIC void | 46 | STATIC void |
47 | xfs_buf_item_log_debug( | 47 | xfs_buf_item_log_debug( |
48 | xfs_buf_log_item_t *bip, | 48 | xfs_buf_log_item_t *bip, |
49 | uint first, | 49 | uint first, |
50 | uint last) | 50 | uint last) |
51 | { | 51 | { |
52 | uint x; | 52 | uint x; |
53 | uint byte; | 53 | uint byte; |
54 | uint nbytes; | 54 | uint nbytes; |
55 | uint chunk_num; | 55 | uint chunk_num; |
56 | uint word_num; | 56 | uint word_num; |
57 | uint bit_num; | 57 | uint bit_num; |
58 | uint bit_set; | 58 | uint bit_set; |
59 | uint *wordp; | 59 | uint *wordp; |
60 | 60 | ||
61 | ASSERT(bip->bli_logged != NULL); | 61 | ASSERT(bip->bli_logged != NULL); |
62 | byte = first; | 62 | byte = first; |
63 | nbytes = last - first + 1; | 63 | nbytes = last - first + 1; |
64 | bfset(bip->bli_logged, first, nbytes); | 64 | bfset(bip->bli_logged, first, nbytes); |
65 | for (x = 0; x < nbytes; x++) { | 65 | for (x = 0; x < nbytes; x++) { |
66 | chunk_num = byte >> XFS_BLI_SHIFT; | 66 | chunk_num = byte >> XFS_BLI_SHIFT; |
67 | word_num = chunk_num >> BIT_TO_WORD_SHIFT; | 67 | word_num = chunk_num >> BIT_TO_WORD_SHIFT; |
68 | bit_num = chunk_num & (NBWORD - 1); | 68 | bit_num = chunk_num & (NBWORD - 1); |
69 | wordp = &(bip->bli_format.blf_data_map[word_num]); | 69 | wordp = &(bip->bli_format.blf_data_map[word_num]); |
70 | bit_set = *wordp & (1 << bit_num); | 70 | bit_set = *wordp & (1 << bit_num); |
71 | ASSERT(bit_set); | 71 | ASSERT(bit_set); |
72 | byte++; | 72 | byte++; |
73 | } | 73 | } |
74 | } | 74 | } |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * This function is called when we flush something into a buffer without | 77 | * This function is called when we flush something into a buffer without |
78 | * logging it. This happens for things like inodes which are logged | 78 | * logging it. This happens for things like inodes which are logged |
79 | * separately from the buffer. | 79 | * separately from the buffer. |
80 | */ | 80 | */ |
81 | void | 81 | void |
82 | xfs_buf_item_flush_log_debug( | 82 | xfs_buf_item_flush_log_debug( |
83 | xfs_buf_t *bp, | 83 | xfs_buf_t *bp, |
84 | uint first, | 84 | uint first, |
85 | uint last) | 85 | uint last) |
86 | { | 86 | { |
87 | xfs_buf_log_item_t *bip; | 87 | xfs_buf_log_item_t *bip; |
88 | uint nbytes; | 88 | uint nbytes; |
89 | 89 | ||
90 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); | 90 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); |
91 | if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) { | 91 | if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) { |
92 | return; | 92 | return; |
93 | } | 93 | } |
94 | 94 | ||
95 | ASSERT(bip->bli_logged != NULL); | 95 | ASSERT(bip->bli_logged != NULL); |
96 | nbytes = last - first + 1; | 96 | nbytes = last - first + 1; |
97 | bfset(bip->bli_logged, first, nbytes); | 97 | bfset(bip->bli_logged, first, nbytes); |
98 | } | 98 | } |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * This function is called to verify that our callers have logged | 101 | * This function is called to verify that our callers have logged |
102 | * all the bytes that they changed. | 102 | * all the bytes that they changed. |
103 | * | 103 | * |
104 | * It does this by comparing the original copy of the buffer stored in | 104 | * It does this by comparing the original copy of the buffer stored in |
105 | * the buf log item's bli_orig array to the current copy of the buffer | 105 | * the buf log item's bli_orig array to the current copy of the buffer |
106 | * and ensuring that all bytes which mismatch are set in the bli_logged | 106 | * and ensuring that all bytes which mismatch are set in the bli_logged |
107 | * array of the buf log item. | 107 | * array of the buf log item. |
108 | */ | 108 | */ |
109 | STATIC void | 109 | STATIC void |
110 | xfs_buf_item_log_check( | 110 | xfs_buf_item_log_check( |
111 | xfs_buf_log_item_t *bip) | 111 | xfs_buf_log_item_t *bip) |
112 | { | 112 | { |
113 | char *orig; | 113 | char *orig; |
114 | char *buffer; | 114 | char *buffer; |
115 | int x; | 115 | int x; |
116 | xfs_buf_t *bp; | 116 | xfs_buf_t *bp; |
117 | 117 | ||
118 | ASSERT(bip->bli_orig != NULL); | 118 | ASSERT(bip->bli_orig != NULL); |
119 | ASSERT(bip->bli_logged != NULL); | 119 | ASSERT(bip->bli_logged != NULL); |
120 | 120 | ||
121 | bp = bip->bli_buf; | 121 | bp = bip->bli_buf; |
122 | ASSERT(XFS_BUF_COUNT(bp) > 0); | 122 | ASSERT(XFS_BUF_COUNT(bp) > 0); |
123 | ASSERT(XFS_BUF_PTR(bp) != NULL); | 123 | ASSERT(XFS_BUF_PTR(bp) != NULL); |
124 | orig = bip->bli_orig; | 124 | orig = bip->bli_orig; |
125 | buffer = XFS_BUF_PTR(bp); | 125 | buffer = XFS_BUF_PTR(bp); |
126 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { | 126 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { |
127 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) | 127 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) |
128 | cmn_err(CE_PANIC, | 128 | cmn_err(CE_PANIC, |
129 | "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", | 129 | "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", |
130 | bip, bp, orig, x); | 130 | bip, bp, orig, x); |
131 | } | 131 | } |
132 | } | 132 | } |
133 | #else | 133 | #else |
134 | #define xfs_buf_item_log_debug(x,y,z) | 134 | #define xfs_buf_item_log_debug(x,y,z) |
135 | #define xfs_buf_item_log_check(x) | 135 | #define xfs_buf_item_log_check(x) |
136 | #endif | 136 | #endif |
137 | 137 | ||
138 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); | 138 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); |
139 | STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); | 139 | STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * This returns the number of log iovecs needed to log the | 142 | * This returns the number of log iovecs needed to log the |
143 | * given buf log item. | 143 | * given buf log item. |
144 | * | 144 | * |
145 | * It calculates this as 1 iovec for the buf log format structure | 145 | * It calculates this as 1 iovec for the buf log format structure |
146 | * and 1 for each stretch of non-contiguous chunks to be logged. | 146 | * and 1 for each stretch of non-contiguous chunks to be logged. |
147 | * Contiguous chunks are logged in a single iovec. | 147 | * Contiguous chunks are logged in a single iovec. |
148 | * | 148 | * |
149 | * If the XFS_BLI_STALE flag has been set, then log nothing. | 149 | * If the XFS_BLI_STALE flag has been set, then log nothing. |
150 | */ | 150 | */ |
151 | STATIC uint | 151 | STATIC uint |
152 | xfs_buf_item_size( | 152 | xfs_buf_item_size( |
153 | xfs_buf_log_item_t *bip) | 153 | xfs_buf_log_item_t *bip) |
154 | { | 154 | { |
155 | uint nvecs; | 155 | uint nvecs; |
156 | int next_bit; | 156 | int next_bit; |
157 | int last_bit; | 157 | int last_bit; |
158 | xfs_buf_t *bp; | 158 | xfs_buf_t *bp; |
159 | 159 | ||
160 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 160 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
161 | if (bip->bli_flags & XFS_BLI_STALE) { | 161 | if (bip->bli_flags & XFS_BLI_STALE) { |
162 | /* | 162 | /* |
163 | * The buffer is stale, so all we need to log | 163 | * The buffer is stale, so all we need to log |
164 | * is the buf log format structure with the | 164 | * is the buf log format structure with the |
165 | * cancel flag in it. | 165 | * cancel flag in it. |
166 | */ | 166 | */ |
167 | xfs_buf_item_trace("SIZE STALE", bip); | 167 | xfs_buf_item_trace("SIZE STALE", bip); |
168 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 168 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); |
169 | return 1; | 169 | return 1; |
170 | } | 170 | } |
171 | 171 | ||
172 | bp = bip->bli_buf; | 172 | bp = bip->bli_buf; |
173 | ASSERT(bip->bli_flags & XFS_BLI_LOGGED); | 173 | ASSERT(bip->bli_flags & XFS_BLI_LOGGED); |
174 | nvecs = 1; | 174 | nvecs = 1; |
175 | last_bit = xfs_next_bit(bip->bli_format.blf_data_map, | 175 | last_bit = xfs_next_bit(bip->bli_format.blf_data_map, |
176 | bip->bli_format.blf_map_size, 0); | 176 | bip->bli_format.blf_map_size, 0); |
177 | ASSERT(last_bit != -1); | 177 | ASSERT(last_bit != -1); |
178 | nvecs++; | 178 | nvecs++; |
179 | while (last_bit != -1) { | 179 | while (last_bit != -1) { |
180 | /* | 180 | /* |
181 | * This takes the bit number to start looking from and | 181 | * This takes the bit number to start looking from and |
182 | * returns the next set bit from there. It returns -1 | 182 | * returns the next set bit from there. It returns -1 |
183 | * if there are no more bits set or the start bit is | 183 | * if there are no more bits set or the start bit is |
184 | * beyond the end of the bitmap. | 184 | * beyond the end of the bitmap. |
185 | */ | 185 | */ |
186 | next_bit = xfs_next_bit(bip->bli_format.blf_data_map, | 186 | next_bit = xfs_next_bit(bip->bli_format.blf_data_map, |
187 | bip->bli_format.blf_map_size, | 187 | bip->bli_format.blf_map_size, |
188 | last_bit + 1); | 188 | last_bit + 1); |
189 | /* | 189 | /* |
190 | * If we run out of bits, leave the loop, | 190 | * If we run out of bits, leave the loop, |
191 | * else if we find a new set of bits bump the number of vecs, | 191 | * else if we find a new set of bits bump the number of vecs, |
192 | * else keep scanning the current set of bits. | 192 | * else keep scanning the current set of bits. |
193 | */ | 193 | */ |
194 | if (next_bit == -1) { | 194 | if (next_bit == -1) { |
195 | last_bit = -1; | 195 | last_bit = -1; |
196 | } else if (next_bit != last_bit + 1) { | 196 | } else if (next_bit != last_bit + 1) { |
197 | last_bit = next_bit; | 197 | last_bit = next_bit; |
198 | nvecs++; | 198 | nvecs++; |
199 | } else if (xfs_buf_offset(bp, next_bit * XFS_BLI_CHUNK) != | 199 | } else if (xfs_buf_offset(bp, next_bit * XFS_BLI_CHUNK) != |
200 | (xfs_buf_offset(bp, last_bit * XFS_BLI_CHUNK) + | 200 | (xfs_buf_offset(bp, last_bit * XFS_BLI_CHUNK) + |
201 | XFS_BLI_CHUNK)) { | 201 | XFS_BLI_CHUNK)) { |
202 | last_bit = next_bit; | 202 | last_bit = next_bit; |
203 | nvecs++; | 203 | nvecs++; |
204 | } else { | 204 | } else { |
205 | last_bit++; | 205 | last_bit++; |
206 | } | 206 | } |
207 | } | 207 | } |
208 | 208 | ||
209 | xfs_buf_item_trace("SIZE NORM", bip); | 209 | xfs_buf_item_trace("SIZE NORM", bip); |
210 | return nvecs; | 210 | return nvecs; |
211 | } | 211 | } |
212 | 212 | ||
213 | /* | 213 | /* |
214 | * This is called to fill in the vector of log iovecs for the | 214 | * This is called to fill in the vector of log iovecs for the |
215 | * given log buf item. It fills the first entry with a buf log | 215 | * given log buf item. It fills the first entry with a buf log |
216 | * format structure, and the rest point to contiguous chunks | 216 | * format structure, and the rest point to contiguous chunks |
217 | * within the buffer. | 217 | * within the buffer. |
218 | */ | 218 | */ |
219 | STATIC void | 219 | STATIC void |
220 | xfs_buf_item_format( | 220 | xfs_buf_item_format( |
221 | xfs_buf_log_item_t *bip, | 221 | xfs_buf_log_item_t *bip, |
222 | xfs_log_iovec_t *log_vector) | 222 | xfs_log_iovec_t *log_vector) |
223 | { | 223 | { |
224 | uint base_size; | 224 | uint base_size; |
225 | uint nvecs; | 225 | uint nvecs; |
226 | xfs_log_iovec_t *vecp; | 226 | xfs_log_iovec_t *vecp; |
227 | xfs_buf_t *bp; | 227 | xfs_buf_t *bp; |
228 | int first_bit; | 228 | int first_bit; |
229 | int last_bit; | 229 | int last_bit; |
230 | int next_bit; | 230 | int next_bit; |
231 | uint nbits; | 231 | uint nbits; |
232 | uint buffer_offset; | 232 | uint buffer_offset; |
233 | 233 | ||
234 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 234 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
235 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 235 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
236 | (bip->bli_flags & XFS_BLI_STALE)); | 236 | (bip->bli_flags & XFS_BLI_STALE)); |
237 | bp = bip->bli_buf; | 237 | bp = bip->bli_buf; |
238 | vecp = log_vector; | 238 | vecp = log_vector; |
239 | 239 | ||
240 | /* | 240 | /* |
241 | * The size of the base structure is the size of the | 241 | * The size of the base structure is the size of the |
242 | * declared structure plus the space for the extra words | 242 | * declared structure plus the space for the extra words |
243 | * of the bitmap. We subtract one from the map size, because | 243 | * of the bitmap. We subtract one from the map size, because |
244 | * the first element of the bitmap is accounted for in the | 244 | * the first element of the bitmap is accounted for in the |
245 | * size of the base structure. | 245 | * size of the base structure. |
246 | */ | 246 | */ |
247 | base_size = | 247 | base_size = |
248 | (uint)(sizeof(xfs_buf_log_format_t) + | 248 | (uint)(sizeof(xfs_buf_log_format_t) + |
249 | ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); | 249 | ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); |
250 | vecp->i_addr = (xfs_caddr_t)&bip->bli_format; | 250 | vecp->i_addr = (xfs_caddr_t)&bip->bli_format; |
251 | vecp->i_len = base_size; | 251 | vecp->i_len = base_size; |
252 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); | 252 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); |
253 | vecp++; | 253 | vecp++; |
254 | nvecs = 1; | 254 | nvecs = 1; |
255 | 255 | ||
256 | if (bip->bli_flags & XFS_BLI_STALE) { | 256 | if (bip->bli_flags & XFS_BLI_STALE) { |
257 | /* | 257 | /* |
258 | * The buffer is stale, so all we need to log | 258 | * The buffer is stale, so all we need to log |
259 | * is the buf log format structure with the | 259 | * is the buf log format structure with the |
260 | * cancel flag in it. | 260 | * cancel flag in it. |
261 | */ | 261 | */ |
262 | xfs_buf_item_trace("FORMAT STALE", bip); | 262 | xfs_buf_item_trace("FORMAT STALE", bip); |
263 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 263 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); |
264 | bip->bli_format.blf_size = nvecs; | 264 | bip->bli_format.blf_size = nvecs; |
265 | return; | 265 | return; |
266 | } | 266 | } |
267 | 267 | ||
268 | /* | 268 | /* |
269 | * Fill in an iovec for each set of contiguous chunks. | 269 | * Fill in an iovec for each set of contiguous chunks. |
270 | */ | 270 | */ |
271 | first_bit = xfs_next_bit(bip->bli_format.blf_data_map, | 271 | first_bit = xfs_next_bit(bip->bli_format.blf_data_map, |
272 | bip->bli_format.blf_map_size, 0); | 272 | bip->bli_format.blf_map_size, 0); |
273 | ASSERT(first_bit != -1); | 273 | ASSERT(first_bit != -1); |
274 | last_bit = first_bit; | 274 | last_bit = first_bit; |
275 | nbits = 1; | 275 | nbits = 1; |
276 | for (;;) { | 276 | for (;;) { |
277 | /* | 277 | /* |
278 | * This takes the bit number to start looking from and | 278 | * This takes the bit number to start looking from and |
279 | * returns the next set bit from there. It returns -1 | 279 | * returns the next set bit from there. It returns -1 |
280 | * if there are no more bits set or the start bit is | 280 | * if there are no more bits set or the start bit is |
281 | * beyond the end of the bitmap. | 281 | * beyond the end of the bitmap. |
282 | */ | 282 | */ |
283 | next_bit = xfs_next_bit(bip->bli_format.blf_data_map, | 283 | next_bit = xfs_next_bit(bip->bli_format.blf_data_map, |
284 | bip->bli_format.blf_map_size, | 284 | bip->bli_format.blf_map_size, |
285 | (uint)last_bit + 1); | 285 | (uint)last_bit + 1); |
286 | /* | 286 | /* |
287 | * If we run out of bits fill in the last iovec and get | 287 | * If we run out of bits fill in the last iovec and get |
288 | * out of the loop. | 288 | * out of the loop. |
289 | * Else if we start a new set of bits then fill in the | 289 | * Else if we start a new set of bits then fill in the |
290 | * iovec for the series we were looking at and start | 290 | * iovec for the series we were looking at and start |
291 | * counting the bits in the new one. | 291 | * counting the bits in the new one. |
292 | * Else we're still in the same set of bits so just | 292 | * Else we're still in the same set of bits so just |
293 | * keep counting and scanning. | 293 | * keep counting and scanning. |
294 | */ | 294 | */ |
295 | if (next_bit == -1) { | 295 | if (next_bit == -1) { |
296 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 296 | buffer_offset = first_bit * XFS_BLI_CHUNK; |
297 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 297 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
298 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 298 | vecp->i_len = nbits * XFS_BLI_CHUNK; |
299 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); | 299 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); |
300 | nvecs++; | 300 | nvecs++; |
301 | break; | 301 | break; |
302 | } else if (next_bit != last_bit + 1) { | 302 | } else if (next_bit != last_bit + 1) { |
303 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 303 | buffer_offset = first_bit * XFS_BLI_CHUNK; |
304 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 304 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
305 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 305 | vecp->i_len = nbits * XFS_BLI_CHUNK; |
306 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); | 306 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); |
307 | nvecs++; | 307 | nvecs++; |
308 | vecp++; | 308 | vecp++; |
309 | first_bit = next_bit; | 309 | first_bit = next_bit; |
310 | last_bit = next_bit; | 310 | last_bit = next_bit; |
311 | nbits = 1; | 311 | nbits = 1; |
312 | } else if (xfs_buf_offset(bp, next_bit << XFS_BLI_SHIFT) != | 312 | } else if (xfs_buf_offset(bp, next_bit << XFS_BLI_SHIFT) != |
313 | (xfs_buf_offset(bp, last_bit << XFS_BLI_SHIFT) + | 313 | (xfs_buf_offset(bp, last_bit << XFS_BLI_SHIFT) + |
314 | XFS_BLI_CHUNK)) { | 314 | XFS_BLI_CHUNK)) { |
315 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 315 | buffer_offset = first_bit * XFS_BLI_CHUNK; |
316 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 316 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
317 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 317 | vecp->i_len = nbits * XFS_BLI_CHUNK; |
318 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); | 318 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); |
319 | /* You would think we need to bump the nvecs here too, but we do not | 319 | /* You would think we need to bump the nvecs here too, but we do not |
320 | * this number is used by recovery, and it gets confused by the boundary | 320 | * this number is used by recovery, and it gets confused by the boundary |
321 | * split here | 321 | * split here |
322 | * nvecs++; | 322 | * nvecs++; |
323 | */ | 323 | */ |
324 | vecp++; | 324 | vecp++; |
325 | first_bit = next_bit; | 325 | first_bit = next_bit; |
326 | last_bit = next_bit; | 326 | last_bit = next_bit; |
327 | nbits = 1; | 327 | nbits = 1; |
328 | } else { | 328 | } else { |
329 | last_bit++; | 329 | last_bit++; |
330 | nbits++; | 330 | nbits++; |
331 | } | 331 | } |
332 | } | 332 | } |
333 | bip->bli_format.blf_size = nvecs; | 333 | bip->bli_format.blf_size = nvecs; |
334 | 334 | ||
335 | /* | 335 | /* |
336 | * Check to make sure everything is consistent. | 336 | * Check to make sure everything is consistent. |
337 | */ | 337 | */ |
338 | xfs_buf_item_trace("FORMAT NORM", bip); | 338 | xfs_buf_item_trace("FORMAT NORM", bip); |
339 | xfs_buf_item_log_check(bip); | 339 | xfs_buf_item_log_check(bip); |
340 | } | 340 | } |
341 | 341 | ||
342 | /* | 342 | /* |
343 | * This is called to pin the buffer associated with the buf log | 343 | * This is called to pin the buffer associated with the buf log |
344 | * item in memory so it cannot be written out. Simply call bpin() | 344 | * item in memory so it cannot be written out. Simply call bpin() |
345 | * on the buffer to do this. | 345 | * on the buffer to do this. |
346 | */ | 346 | */ |
347 | STATIC void | 347 | STATIC void |
348 | xfs_buf_item_pin( | 348 | xfs_buf_item_pin( |
349 | xfs_buf_log_item_t *bip) | 349 | xfs_buf_log_item_t *bip) |
350 | { | 350 | { |
351 | xfs_buf_t *bp; | 351 | xfs_buf_t *bp; |
352 | 352 | ||
353 | bp = bip->bli_buf; | 353 | bp = bip->bli_buf; |
354 | ASSERT(XFS_BUF_ISBUSY(bp)); | 354 | ASSERT(XFS_BUF_ISBUSY(bp)); |
355 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 355 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
356 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 356 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
357 | (bip->bli_flags & XFS_BLI_STALE)); | 357 | (bip->bli_flags & XFS_BLI_STALE)); |
358 | xfs_buf_item_trace("PIN", bip); | 358 | xfs_buf_item_trace("PIN", bip); |
359 | xfs_buftrace("XFS_PIN", bp); | 359 | xfs_buftrace("XFS_PIN", bp); |
360 | xfs_bpin(bp); | 360 | xfs_bpin(bp); |
361 | } | 361 | } |
362 | 362 | ||
363 | 363 | ||
364 | /* | 364 | /* |
365 | * This is called to unpin the buffer associated with the buf log | 365 | * This is called to unpin the buffer associated with the buf log |
366 | * item which was previously pinned with a call to xfs_buf_item_pin(). | 366 | * item which was previously pinned with a call to xfs_buf_item_pin(). |
367 | * Just call bunpin() on the buffer to do this. | 367 | * Just call bunpin() on the buffer to do this. |
368 | * | 368 | * |
369 | * Also drop the reference to the buf item for the current transaction. | 369 | * Also drop the reference to the buf item for the current transaction. |
370 | * If the XFS_BLI_STALE flag is set and we are the last reference, | 370 | * If the XFS_BLI_STALE flag is set and we are the last reference, |
371 | * then free up the buf log item and unlock the buffer. | 371 | * then free up the buf log item and unlock the buffer. |
372 | */ | 372 | */ |
373 | STATIC void | 373 | STATIC void |
374 | xfs_buf_item_unpin( | 374 | xfs_buf_item_unpin( |
375 | xfs_buf_log_item_t *bip, | 375 | xfs_buf_log_item_t *bip, |
376 | int stale) | 376 | int stale) |
377 | { | 377 | { |
378 | struct xfs_ail *ailp; | 378 | struct xfs_ail *ailp; |
379 | xfs_buf_t *bp; | 379 | xfs_buf_t *bp; |
380 | int freed; | 380 | int freed; |
381 | 381 | ||
382 | bp = bip->bli_buf; | 382 | bp = bip->bli_buf; |
383 | ASSERT(bp != NULL); | 383 | ASSERT(bp != NULL); |
384 | ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); | 384 | ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); |
385 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 385 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
386 | xfs_buf_item_trace("UNPIN", bip); | 386 | xfs_buf_item_trace("UNPIN", bip); |
387 | xfs_buftrace("XFS_UNPIN", bp); | 387 | xfs_buftrace("XFS_UNPIN", bp); |
388 | 388 | ||
389 | freed = atomic_dec_and_test(&bip->bli_refcount); | 389 | freed = atomic_dec_and_test(&bip->bli_refcount); |
390 | ailp = bip->bli_item.li_ailp; | 390 | ailp = bip->bli_item.li_ailp; |
391 | xfs_bunpin(bp); | 391 | xfs_bunpin(bp); |
392 | if (freed && stale) { | 392 | if (freed && stale) { |
393 | ASSERT(bip->bli_flags & XFS_BLI_STALE); | 393 | ASSERT(bip->bli_flags & XFS_BLI_STALE); |
394 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 394 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
395 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | 395 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); |
396 | ASSERT(XFS_BUF_ISSTALE(bp)); | 396 | ASSERT(XFS_BUF_ISSTALE(bp)); |
397 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 397 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); |
398 | xfs_buf_item_trace("UNPIN STALE", bip); | 398 | xfs_buf_item_trace("UNPIN STALE", bip); |
399 | xfs_buftrace("XFS_UNPIN STALE", bp); | 399 | xfs_buftrace("XFS_UNPIN STALE", bp); |
400 | /* | 400 | /* |
401 | * If we get called here because of an IO error, we may | 401 | * If we get called here because of an IO error, we may |
402 | * or may not have the item on the AIL. xfs_trans_ail_delete() | 402 | * or may not have the item on the AIL. xfs_trans_ail_delete() |
403 | * will take care of that situation. | 403 | * will take care of that situation. |
404 | * xfs_trans_ail_delete() drops the AIL lock. | 404 | * xfs_trans_ail_delete() drops the AIL lock. |
405 | */ | 405 | */ |
406 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { | 406 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { |
407 | xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); | 407 | xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); |
408 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 408 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
409 | XFS_BUF_CLR_IODONE_FUNC(bp); | 409 | XFS_BUF_CLR_IODONE_FUNC(bp); |
410 | } else { | 410 | } else { |
411 | spin_lock(&ailp->xa_lock); | 411 | spin_lock(&ailp->xa_lock); |
412 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); | 412 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); |
413 | xfs_buf_item_relse(bp); | 413 | xfs_buf_item_relse(bp); |
414 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); | 414 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); |
415 | } | 415 | } |
416 | xfs_buf_relse(bp); | 416 | xfs_buf_relse(bp); |
417 | } | 417 | } |
418 | } | 418 | } |
419 | 419 | ||
420 | /* | 420 | /* |
421 | * this is called from uncommit in the forced-shutdown path. | 421 | * this is called from uncommit in the forced-shutdown path. |
422 | * we need to check to see if the reference count on the log item | 422 | * we need to check to see if the reference count on the log item |
423 | * is going to drop to zero. If so, unpin will free the log item | 423 | * is going to drop to zero. If so, unpin will free the log item |
424 | * so we need to free the item's descriptor (that points to the item) | 424 | * so we need to free the item's descriptor (that points to the item) |
425 | * in the transaction. | 425 | * in the transaction. |
426 | */ | 426 | */ |
427 | STATIC void | 427 | STATIC void |
428 | xfs_buf_item_unpin_remove( | 428 | xfs_buf_item_unpin_remove( |
429 | xfs_buf_log_item_t *bip, | 429 | xfs_buf_log_item_t *bip, |
430 | xfs_trans_t *tp) | 430 | xfs_trans_t *tp) |
431 | { | 431 | { |
432 | xfs_buf_t *bp; | 432 | xfs_buf_t *bp; |
433 | xfs_log_item_desc_t *lidp; | 433 | xfs_log_item_desc_t *lidp; |
434 | int stale = 0; | 434 | int stale = 0; |
435 | 435 | ||
436 | bp = bip->bli_buf; | 436 | bp = bip->bli_buf; |
437 | /* | 437 | /* |
438 | * will xfs_buf_item_unpin() call xfs_buf_item_relse()? | 438 | * will xfs_buf_item_unpin() call xfs_buf_item_relse()? |
439 | */ | 439 | */ |
440 | if ((atomic_read(&bip->bli_refcount) == 1) && | 440 | if ((atomic_read(&bip->bli_refcount) == 1) && |
441 | (bip->bli_flags & XFS_BLI_STALE)) { | 441 | (bip->bli_flags & XFS_BLI_STALE)) { |
442 | ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); | 442 | ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); |
443 | xfs_buf_item_trace("UNPIN REMOVE", bip); | 443 | xfs_buf_item_trace("UNPIN REMOVE", bip); |
444 | xfs_buftrace("XFS_UNPIN_REMOVE", bp); | 444 | xfs_buftrace("XFS_UNPIN_REMOVE", bp); |
445 | /* | 445 | /* |
446 | * yes -- clear the xaction descriptor in-use flag | 446 | * yes -- clear the xaction descriptor in-use flag |
447 | * and free the chunk if required. We can safely | 447 | * and free the chunk if required. We can safely |
448 | * do some work here and then call buf_item_unpin | 448 | * do some work here and then call buf_item_unpin |
449 | * to do the rest because if the if is true, then | 449 | * to do the rest because if the if is true, then |
450 | * we are holding the buffer locked so no one else | 450 | * we are holding the buffer locked so no one else |
451 | * will be able to bump up the refcount. | 451 | * will be able to bump up the refcount. |
452 | */ | 452 | */ |
453 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) bip); | 453 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) bip); |
454 | stale = lidp->lid_flags & XFS_LID_BUF_STALE; | 454 | stale = lidp->lid_flags & XFS_LID_BUF_STALE; |
455 | xfs_trans_free_item(tp, lidp); | 455 | xfs_trans_free_item(tp, lidp); |
456 | /* | 456 | /* |
457 | * Since the transaction no longer refers to the buffer, | 457 | * Since the transaction no longer refers to the buffer, |
458 | * the buffer should no longer refer to the transaction. | 458 | * the buffer should no longer refer to the transaction. |
459 | */ | 459 | */ |
460 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); | 460 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); |
461 | } | 461 | } |
462 | 462 | ||
463 | xfs_buf_item_unpin(bip, stale); | 463 | xfs_buf_item_unpin(bip, stale); |
464 | 464 | ||
465 | return; | 465 | return; |
466 | } | 466 | } |
467 | 467 | ||
468 | /* | 468 | /* |
469 | * This is called to attempt to lock the buffer associated with this | 469 | * This is called to attempt to lock the buffer associated with this |
470 | * buf log item. Don't sleep on the buffer lock. If we can't get | 470 | * buf log item. Don't sleep on the buffer lock. If we can't get |
471 | * the lock right away, return 0. If we can get the lock, pull the | 471 | * the lock right away, return 0. If we can get the lock, pull the |
472 | * buffer from the free list, mark it busy, and return 1. | 472 | * buffer from the free list, mark it busy, and return 1. |
473 | */ | 473 | */ |
474 | STATIC uint | 474 | STATIC uint |
475 | xfs_buf_item_trylock( | 475 | xfs_buf_item_trylock( |
476 | xfs_buf_log_item_t *bip) | 476 | xfs_buf_log_item_t *bip) |
477 | { | 477 | { |
478 | xfs_buf_t *bp; | 478 | xfs_buf_t *bp; |
479 | 479 | ||
480 | bp = bip->bli_buf; | 480 | bp = bip->bli_buf; |
481 | 481 | ||
482 | if (XFS_BUF_ISPINNED(bp)) { | 482 | if (XFS_BUF_ISPINNED(bp)) { |
483 | return XFS_ITEM_PINNED; | 483 | return XFS_ITEM_PINNED; |
484 | } | 484 | } |
485 | 485 | ||
486 | if (!XFS_BUF_CPSEMA(bp)) { | 486 | if (!XFS_BUF_CPSEMA(bp)) { |
487 | return XFS_ITEM_LOCKED; | 487 | return XFS_ITEM_LOCKED; |
488 | } | 488 | } |
489 | 489 | ||
490 | /* | 490 | /* |
491 | * Remove the buffer from the free list. Only do this | 491 | * Remove the buffer from the free list. Only do this |
492 | * if it's on the free list. Private buffers like the | 492 | * if it's on the free list. Private buffers like the |
493 | * superblock buffer are not. | 493 | * superblock buffer are not. |
494 | */ | 494 | */ |
495 | XFS_BUF_HOLD(bp); | 495 | XFS_BUF_HOLD(bp); |
496 | 496 | ||
497 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 497 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
498 | xfs_buf_item_trace("TRYLOCK SUCCESS", bip); | 498 | xfs_buf_item_trace("TRYLOCK SUCCESS", bip); |
499 | return XFS_ITEM_SUCCESS; | 499 | return XFS_ITEM_SUCCESS; |
500 | } | 500 | } |
501 | 501 | ||
502 | /* | 502 | /* |
503 | * Release the buffer associated with the buf log item. | 503 | * Release the buffer associated with the buf log item. |
504 | * If there is no dirty logged data associated with the | 504 | * If there is no dirty logged data associated with the |
505 | * buffer recorded in the buf log item, then free the | 505 | * buffer recorded in the buf log item, then free the |
506 | * buf log item and remove the reference to it in the | 506 | * buf log item and remove the reference to it in the |
507 | * buffer. | 507 | * buffer. |
508 | * | 508 | * |
509 | * This call ignores the recursion count. It is only called | 509 | * This call ignores the recursion count. It is only called |
510 | * when the buffer should REALLY be unlocked, regardless | 510 | * when the buffer should REALLY be unlocked, regardless |
511 | * of the recursion count. | 511 | * of the recursion count. |
512 | * | 512 | * |
513 | * If the XFS_BLI_HOLD flag is set in the buf log item, then | 513 | * If the XFS_BLI_HOLD flag is set in the buf log item, then |
514 | * free the log item if necessary but do not unlock the buffer. | 514 | * free the log item if necessary but do not unlock the buffer. |
515 | * This is for support of xfs_trans_bhold(). Make sure the | 515 | * This is for support of xfs_trans_bhold(). Make sure the |
516 | * XFS_BLI_HOLD field is cleared if we don't free the item. | 516 | * XFS_BLI_HOLD field is cleared if we don't free the item. |
517 | */ | 517 | */ |
518 | STATIC void | 518 | STATIC void |
519 | xfs_buf_item_unlock( | 519 | xfs_buf_item_unlock( |
520 | xfs_buf_log_item_t *bip) | 520 | xfs_buf_log_item_t *bip) |
521 | { | 521 | { |
522 | int aborted; | 522 | int aborted; |
523 | xfs_buf_t *bp; | 523 | xfs_buf_t *bp; |
524 | uint hold; | 524 | uint hold; |
525 | 525 | ||
526 | bp = bip->bli_buf; | 526 | bp = bip->bli_buf; |
527 | xfs_buftrace("XFS_UNLOCK", bp); | 527 | xfs_buftrace("XFS_UNLOCK", bp); |
528 | 528 | ||
529 | /* | 529 | /* |
530 | * Clear the buffer's association with this transaction. | 530 | * Clear the buffer's association with this transaction. |
531 | */ | 531 | */ |
532 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); | 532 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); |
533 | 533 | ||
534 | /* | 534 | /* |
535 | * If this is a transaction abort, don't return early. | 535 | * If this is a transaction abort, don't return early. |
536 | * Instead, allow the brelse to happen. | 536 | * Instead, allow the brelse to happen. |
537 | * Normally it would be done for stale (cancelled) buffers | 537 | * Normally it would be done for stale (cancelled) buffers |
538 | * at unpin time, but we'll never go through the pin/unpin | 538 | * at unpin time, but we'll never go through the pin/unpin |
539 | * cycle if we abort inside commit. | 539 | * cycle if we abort inside commit. |
540 | */ | 540 | */ |
541 | aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; | 541 | aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; |
542 | 542 | ||
543 | /* | 543 | /* |
544 | * If the buf item is marked stale, then don't do anything. | 544 | * If the buf item is marked stale, then don't do anything. |
545 | * We'll unlock the buffer and free the buf item when the | 545 | * We'll unlock the buffer and free the buf item when the |
546 | * buffer is unpinned for the last time. | 546 | * buffer is unpinned for the last time. |
547 | */ | 547 | */ |
548 | if (bip->bli_flags & XFS_BLI_STALE) { | 548 | if (bip->bli_flags & XFS_BLI_STALE) { |
549 | bip->bli_flags &= ~XFS_BLI_LOGGED; | 549 | bip->bli_flags &= ~XFS_BLI_LOGGED; |
550 | xfs_buf_item_trace("UNLOCK STALE", bip); | 550 | xfs_buf_item_trace("UNLOCK STALE", bip); |
551 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 551 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); |
552 | if (!aborted) | 552 | if (!aborted) |
553 | return; | 553 | return; |
554 | } | 554 | } |
555 | 555 | ||
556 | /* | 556 | /* |
557 | * Drop the transaction's reference to the log item if | 557 | * Drop the transaction's reference to the log item if |
558 | * it was not logged as part of the transaction. Otherwise | 558 | * it was not logged as part of the transaction. Otherwise |
559 | * we'll drop the reference in xfs_buf_item_unpin() when | 559 | * we'll drop the reference in xfs_buf_item_unpin() when |
560 | * the transaction is really through with the buffer. | 560 | * the transaction is really through with the buffer. |
561 | */ | 561 | */ |
562 | if (!(bip->bli_flags & XFS_BLI_LOGGED)) { | 562 | if (!(bip->bli_flags & XFS_BLI_LOGGED)) { |
563 | atomic_dec(&bip->bli_refcount); | 563 | atomic_dec(&bip->bli_refcount); |
564 | } else { | 564 | } else { |
565 | /* | 565 | /* |
566 | * Clear the logged flag since this is per | 566 | * Clear the logged flag since this is per |
567 | * transaction state. | 567 | * transaction state. |
568 | */ | 568 | */ |
569 | bip->bli_flags &= ~XFS_BLI_LOGGED; | 569 | bip->bli_flags &= ~XFS_BLI_LOGGED; |
570 | } | 570 | } |
571 | 571 | ||
572 | /* | 572 | /* |
573 | * Before possibly freeing the buf item, determine if we should | 573 | * Before possibly freeing the buf item, determine if we should |
574 | * release the buffer at the end of this routine. | 574 | * release the buffer at the end of this routine. |
575 | */ | 575 | */ |
576 | hold = bip->bli_flags & XFS_BLI_HOLD; | 576 | hold = bip->bli_flags & XFS_BLI_HOLD; |
577 | xfs_buf_item_trace("UNLOCK", bip); | 577 | xfs_buf_item_trace("UNLOCK", bip); |
578 | 578 | ||
579 | /* | 579 | /* |
580 | * If the buf item isn't tracking any data, free it. | 580 | * If the buf item isn't tracking any data, free it. |
581 | * Otherwise, if XFS_BLI_HOLD is set clear it. | 581 | * Otherwise, if XFS_BLI_HOLD is set clear it. |
582 | */ | 582 | */ |
583 | if (xfs_bitmap_empty(bip->bli_format.blf_data_map, | 583 | if (xfs_bitmap_empty(bip->bli_format.blf_data_map, |
584 | bip->bli_format.blf_map_size)) { | 584 | bip->bli_format.blf_map_size)) { |
585 | xfs_buf_item_relse(bp); | 585 | xfs_buf_item_relse(bp); |
586 | } else if (hold) { | 586 | } else if (hold) { |
587 | bip->bli_flags &= ~XFS_BLI_HOLD; | 587 | bip->bli_flags &= ~XFS_BLI_HOLD; |
588 | } | 588 | } |
589 | 589 | ||
590 | /* | 590 | /* |
591 | * Release the buffer if XFS_BLI_HOLD was not set. | 591 | * Release the buffer if XFS_BLI_HOLD was not set. |
592 | */ | 592 | */ |
593 | if (!hold) { | 593 | if (!hold) { |
594 | xfs_buf_relse(bp); | 594 | xfs_buf_relse(bp); |
595 | } | 595 | } |
596 | } | 596 | } |
597 | 597 | ||
598 | /* | 598 | /* |
599 | * This is called to find out where the oldest active copy of the | 599 | * This is called to find out where the oldest active copy of the |
600 | * buf log item in the on disk log resides now that the last log | 600 | * buf log item in the on disk log resides now that the last log |
601 | * write of it completed at the given lsn. | 601 | * write of it completed at the given lsn. |
602 | * We always re-log all the dirty data in a buffer, so usually the | 602 | * We always re-log all the dirty data in a buffer, so usually the |
603 | * latest copy in the on disk log is the only one that matters. For | 603 | * latest copy in the on disk log is the only one that matters. For |
604 | * those cases we simply return the given lsn. | 604 | * those cases we simply return the given lsn. |
605 | * | 605 | * |
606 | * The one exception to this is for buffers full of newly allocated | 606 | * The one exception to this is for buffers full of newly allocated |
607 | * inodes. These buffers are only relogged with the XFS_BLI_INODE_BUF | 607 | * inodes. These buffers are only relogged with the XFS_BLI_INODE_BUF |
608 | * flag set, indicating that only the di_next_unlinked fields from the | 608 | * flag set, indicating that only the di_next_unlinked fields from the |
609 | * inodes in the buffers will be replayed during recovery. If the | 609 | * inodes in the buffers will be replayed during recovery. If the |
610 | * original newly allocated inode images have not yet been flushed | 610 | * original newly allocated inode images have not yet been flushed |
611 | * when the buffer is so relogged, then we need to make sure that we | 611 | * when the buffer is so relogged, then we need to make sure that we |
612 | * keep the old images in the 'active' portion of the log. We do this | 612 | * keep the old images in the 'active' portion of the log. We do this |
613 | * by returning the original lsn of that transaction here rather than | 613 | * by returning the original lsn of that transaction here rather than |
614 | * the current one. | 614 | * the current one. |
615 | */ | 615 | */ |
616 | STATIC xfs_lsn_t | 616 | STATIC xfs_lsn_t |
617 | xfs_buf_item_committed( | 617 | xfs_buf_item_committed( |
618 | xfs_buf_log_item_t *bip, | 618 | xfs_buf_log_item_t *bip, |
619 | xfs_lsn_t lsn) | 619 | xfs_lsn_t lsn) |
620 | { | 620 | { |
621 | xfs_buf_item_trace("COMMITTED", bip); | 621 | xfs_buf_item_trace("COMMITTED", bip); |
622 | if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && | 622 | if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && |
623 | (bip->bli_item.li_lsn != 0)) { | 623 | (bip->bli_item.li_lsn != 0)) { |
624 | return bip->bli_item.li_lsn; | 624 | return bip->bli_item.li_lsn; |
625 | } | 625 | } |
626 | return (lsn); | 626 | return (lsn); |
627 | } | 627 | } |
628 | 628 | ||
629 | /* | 629 | /* |
630 | * This is called to asynchronously write the buffer associated with this | 630 | * This is called to asynchronously write the buffer associated with this |
631 | * buf log item out to disk. The buffer will already have been locked by | 631 | * buf log item out to disk. The buffer will already have been locked by |
632 | * a successful call to xfs_buf_item_trylock(). If the buffer still has | 632 | * a successful call to xfs_buf_item_trylock(). If the buffer still has |
633 | * B_DELWRI set, then get it going out to disk with a call to bawrite(). | 633 | * B_DELWRI set, then get it going out to disk with a call to bawrite(). |
634 | * If not, then just release the buffer. | 634 | * If not, then just release the buffer. |
635 | */ | 635 | */ |
636 | STATIC void | 636 | STATIC void |
637 | xfs_buf_item_push( | 637 | xfs_buf_item_push( |
638 | xfs_buf_log_item_t *bip) | 638 | xfs_buf_log_item_t *bip) |
639 | { | 639 | { |
640 | xfs_buf_t *bp; | 640 | xfs_buf_t *bp; |
641 | 641 | ||
642 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 642 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
643 | xfs_buf_item_trace("PUSH", bip); | 643 | xfs_buf_item_trace("PUSH", bip); |
644 | 644 | ||
645 | bp = bip->bli_buf; | 645 | bp = bip->bli_buf; |
646 | 646 | ||
647 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 647 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
648 | int error; | 648 | int error; |
649 | error = xfs_bawrite(bip->bli_item.li_mountp, bp); | 649 | error = xfs_bawrite(bip->bli_item.li_mountp, bp); |
650 | if (error) | 650 | if (error) |
651 | xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, | 651 | xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, |
652 | "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", | 652 | "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", |
653 | error, bip, bp); | 653 | error, bip, bp); |
654 | } else { | 654 | } else { |
655 | xfs_buf_relse(bp); | 655 | xfs_buf_relse(bp); |
656 | } | 656 | } |
657 | } | 657 | } |
658 | 658 | ||
659 | /* ARGSUSED */ | 659 | /* ARGSUSED */ |
660 | STATIC void | 660 | STATIC void |
661 | xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) | 661 | xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) |
662 | { | 662 | { |
663 | } | 663 | } |
664 | 664 | ||
665 | /* | 665 | /* |
666 | * This is the ops vector shared by all buf log items. | 666 | * This is the ops vector shared by all buf log items. |
667 | */ | 667 | */ |
668 | static struct xfs_item_ops xfs_buf_item_ops = { | 668 | static struct xfs_item_ops xfs_buf_item_ops = { |
669 | .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size, | 669 | .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size, |
670 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) | 670 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) |
671 | xfs_buf_item_format, | 671 | xfs_buf_item_format, |
672 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin, | 672 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin, |
673 | .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_buf_item_unpin, | 673 | .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_buf_item_unpin, |
674 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) | 674 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) |
675 | xfs_buf_item_unpin_remove, | 675 | xfs_buf_item_unpin_remove, |
676 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock, | 676 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock, |
677 | .iop_unlock = (void(*)(xfs_log_item_t*))xfs_buf_item_unlock, | 677 | .iop_unlock = (void(*)(xfs_log_item_t*))xfs_buf_item_unlock, |
678 | .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) | 678 | .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) |
679 | xfs_buf_item_committed, | 679 | xfs_buf_item_committed, |
680 | .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, | 680 | .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, |
681 | .iop_pushbuf = NULL, | 681 | .iop_pushbuf = NULL, |
682 | .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) | 682 | .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) |
683 | xfs_buf_item_committing | 683 | xfs_buf_item_committing |
684 | }; | 684 | }; |
685 | 685 | ||
686 | 686 | ||
687 | /* | 687 | /* |
688 | * Allocate a new buf log item to go with the given buffer. | 688 | * Allocate a new buf log item to go with the given buffer. |
689 | * Set the buffer's b_fsprivate field to point to the new | 689 | * Set the buffer's b_fsprivate field to point to the new |
690 | * buf log item. If there are other item's attached to the | 690 | * buf log item. If there are other item's attached to the |
691 | * buffer (see xfs_buf_attach_iodone() below), then put the | 691 | * buffer (see xfs_buf_attach_iodone() below), then put the |
692 | * buf log item at the front. | 692 | * buf log item at the front. |
693 | */ | 693 | */ |
694 | void | 694 | void |
695 | xfs_buf_item_init( | 695 | xfs_buf_item_init( |
696 | xfs_buf_t *bp, | 696 | xfs_buf_t *bp, |
697 | xfs_mount_t *mp) | 697 | xfs_mount_t *mp) |
698 | { | 698 | { |
699 | xfs_log_item_t *lip; | 699 | xfs_log_item_t *lip; |
700 | xfs_buf_log_item_t *bip; | 700 | xfs_buf_log_item_t *bip; |
701 | int chunks; | 701 | int chunks; |
702 | int map_size; | 702 | int map_size; |
703 | 703 | ||
704 | /* | 704 | /* |
705 | * Check to see if there is already a buf log item for | 705 | * Check to see if there is already a buf log item for |
706 | * this buffer. If there is, it is guaranteed to be | 706 | * this buffer. If there is, it is guaranteed to be |
707 | * the first. If we do already have one, there is | 707 | * the first. If we do already have one, there is |
708 | * nothing to do here so return. | 708 | * nothing to do here so return. |
709 | */ | 709 | */ |
710 | if (XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *) != mp) | 710 | if (bp->b_mount != mp) |
711 | XFS_BUF_SET_FSPRIVATE3(bp, mp); | 711 | bp->b_mount = mp; |
712 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); | 712 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); |
713 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { | 713 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { |
714 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 714 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
715 | if (lip->li_type == XFS_LI_BUF) { | 715 | if (lip->li_type == XFS_LI_BUF) { |
716 | return; | 716 | return; |
717 | } | 717 | } |
718 | } | 718 | } |
719 | 719 | ||
720 | /* | 720 | /* |
721 | * chunks is the number of XFS_BLI_CHUNK size pieces | 721 | * chunks is the number of XFS_BLI_CHUNK size pieces |
722 | * the buffer can be divided into. Make sure not to | 722 | * the buffer can be divided into. Make sure not to |
723 | * truncate any pieces. map_size is the size of the | 723 | * truncate any pieces. map_size is the size of the |
724 | * bitmap needed to describe the chunks of the buffer. | 724 | * bitmap needed to describe the chunks of the buffer. |
725 | */ | 725 | */ |
726 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLI_CHUNK - 1)) >> XFS_BLI_SHIFT); | 726 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLI_CHUNK - 1)) >> XFS_BLI_SHIFT); |
727 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); | 727 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); |
728 | 728 | ||
729 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, | 729 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, |
730 | KM_SLEEP); | 730 | KM_SLEEP); |
731 | bip->bli_item.li_type = XFS_LI_BUF; | 731 | bip->bli_item.li_type = XFS_LI_BUF; |
732 | bip->bli_item.li_ops = &xfs_buf_item_ops; | 732 | bip->bli_item.li_ops = &xfs_buf_item_ops; |
733 | bip->bli_item.li_mountp = mp; | 733 | bip->bli_item.li_mountp = mp; |
734 | bip->bli_item.li_ailp = mp->m_ail; | 734 | bip->bli_item.li_ailp = mp->m_ail; |
735 | bip->bli_buf = bp; | 735 | bip->bli_buf = bp; |
736 | xfs_buf_hold(bp); | 736 | xfs_buf_hold(bp); |
737 | bip->bli_format.blf_type = XFS_LI_BUF; | 737 | bip->bli_format.blf_type = XFS_LI_BUF; |
738 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); | 738 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); |
739 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); | 739 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); |
740 | bip->bli_format.blf_map_size = map_size; | 740 | bip->bli_format.blf_map_size = map_size; |
741 | #ifdef XFS_BLI_TRACE | 741 | #ifdef XFS_BLI_TRACE |
742 | bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS); | 742 | bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS); |
743 | #endif | 743 | #endif |
744 | 744 | ||
745 | #ifdef XFS_TRANS_DEBUG | 745 | #ifdef XFS_TRANS_DEBUG |
746 | /* | 746 | /* |
747 | * Allocate the arrays for tracking what needs to be logged | 747 | * Allocate the arrays for tracking what needs to be logged |
748 | * and what our callers request to be logged. bli_orig | 748 | * and what our callers request to be logged. bli_orig |
749 | * holds a copy of the original, clean buffer for comparison | 749 | * holds a copy of the original, clean buffer for comparison |
750 | * against, and bli_logged keeps a 1 bit flag per byte in | 750 | * against, and bli_logged keeps a 1 bit flag per byte in |
751 | * the buffer to indicate which bytes the callers have asked | 751 | * the buffer to indicate which bytes the callers have asked |
752 | * to have logged. | 752 | * to have logged. |
753 | */ | 753 | */ |
754 | bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP); | 754 | bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP); |
755 | memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp)); | 755 | memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp)); |
756 | bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP); | 756 | bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP); |
757 | #endif | 757 | #endif |
758 | 758 | ||
759 | /* | 759 | /* |
760 | * Put the buf item into the list of items attached to the | 760 | * Put the buf item into the list of items attached to the |
761 | * buffer at the front. | 761 | * buffer at the front. |
762 | */ | 762 | */ |
763 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { | 763 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { |
764 | bip->bli_item.li_bio_list = | 764 | bip->bli_item.li_bio_list = |
765 | XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 765 | XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
766 | } | 766 | } |
767 | XFS_BUF_SET_FSPRIVATE(bp, bip); | 767 | XFS_BUF_SET_FSPRIVATE(bp, bip); |
768 | } | 768 | } |
769 | 769 | ||
770 | 770 | ||
771 | /* | 771 | /* |
772 | * Mark bytes first through last inclusive as dirty in the buf | 772 | * Mark bytes first through last inclusive as dirty in the buf |
773 | * item's bitmap. | 773 | * item's bitmap. |
774 | */ | 774 | */ |
775 | void | 775 | void |
776 | xfs_buf_item_log( | 776 | xfs_buf_item_log( |
777 | xfs_buf_log_item_t *bip, | 777 | xfs_buf_log_item_t *bip, |
778 | uint first, | 778 | uint first, |
779 | uint last) | 779 | uint last) |
780 | { | 780 | { |
781 | uint first_bit; | 781 | uint first_bit; |
782 | uint last_bit; | 782 | uint last_bit; |
783 | uint bits_to_set; | 783 | uint bits_to_set; |
784 | uint bits_set; | 784 | uint bits_set; |
785 | uint word_num; | 785 | uint word_num; |
786 | uint *wordp; | 786 | uint *wordp; |
787 | uint bit; | 787 | uint bit; |
788 | uint end_bit; | 788 | uint end_bit; |
789 | uint mask; | 789 | uint mask; |
790 | 790 | ||
791 | /* | 791 | /* |
792 | * Mark the item as having some dirty data for | 792 | * Mark the item as having some dirty data for |
793 | * quick reference in xfs_buf_item_dirty. | 793 | * quick reference in xfs_buf_item_dirty. |
794 | */ | 794 | */ |
795 | bip->bli_flags |= XFS_BLI_DIRTY; | 795 | bip->bli_flags |= XFS_BLI_DIRTY; |
796 | 796 | ||
797 | /* | 797 | /* |
798 | * Convert byte offsets to bit numbers. | 798 | * Convert byte offsets to bit numbers. |
799 | */ | 799 | */ |
800 | first_bit = first >> XFS_BLI_SHIFT; | 800 | first_bit = first >> XFS_BLI_SHIFT; |
801 | last_bit = last >> XFS_BLI_SHIFT; | 801 | last_bit = last >> XFS_BLI_SHIFT; |
802 | 802 | ||
803 | /* | 803 | /* |
804 | * Calculate the total number of bits to be set. | 804 | * Calculate the total number of bits to be set. |
805 | */ | 805 | */ |
806 | bits_to_set = last_bit - first_bit + 1; | 806 | bits_to_set = last_bit - first_bit + 1; |
807 | 807 | ||
808 | /* | 808 | /* |
809 | * Get a pointer to the first word in the bitmap | 809 | * Get a pointer to the first word in the bitmap |
810 | * to set a bit in. | 810 | * to set a bit in. |
811 | */ | 811 | */ |
812 | word_num = first_bit >> BIT_TO_WORD_SHIFT; | 812 | word_num = first_bit >> BIT_TO_WORD_SHIFT; |
813 | wordp = &(bip->bli_format.blf_data_map[word_num]); | 813 | wordp = &(bip->bli_format.blf_data_map[word_num]); |
814 | 814 | ||
815 | /* | 815 | /* |
816 | * Calculate the starting bit in the first word. | 816 | * Calculate the starting bit in the first word. |
817 | */ | 817 | */ |
818 | bit = first_bit & (uint)(NBWORD - 1); | 818 | bit = first_bit & (uint)(NBWORD - 1); |
819 | 819 | ||
820 | /* | 820 | /* |
821 | * First set any bits in the first word of our range. | 821 | * First set any bits in the first word of our range. |
822 | * If it starts at bit 0 of the word, it will be | 822 | * If it starts at bit 0 of the word, it will be |
823 | * set below rather than here. That is what the variable | 823 | * set below rather than here. That is what the variable |
824 | * bit tells us. The variable bits_set tracks the number | 824 | * bit tells us. The variable bits_set tracks the number |
825 | * of bits that have been set so far. End_bit is the number | 825 | * of bits that have been set so far. End_bit is the number |
826 | * of the last bit to be set in this word plus one. | 826 | * of the last bit to be set in this word plus one. |
827 | */ | 827 | */ |
828 | if (bit) { | 828 | if (bit) { |
829 | end_bit = MIN(bit + bits_to_set, (uint)NBWORD); | 829 | end_bit = MIN(bit + bits_to_set, (uint)NBWORD); |
830 | mask = ((1 << (end_bit - bit)) - 1) << bit; | 830 | mask = ((1 << (end_bit - bit)) - 1) << bit; |
831 | *wordp |= mask; | 831 | *wordp |= mask; |
832 | wordp++; | 832 | wordp++; |
833 | bits_set = end_bit - bit; | 833 | bits_set = end_bit - bit; |
834 | } else { | 834 | } else { |
835 | bits_set = 0; | 835 | bits_set = 0; |
836 | } | 836 | } |
837 | 837 | ||
838 | /* | 838 | /* |
839 | * Now set bits a whole word at a time that are between | 839 | * Now set bits a whole word at a time that are between |
840 | * first_bit and last_bit. | 840 | * first_bit and last_bit. |
841 | */ | 841 | */ |
842 | while ((bits_to_set - bits_set) >= NBWORD) { | 842 | while ((bits_to_set - bits_set) >= NBWORD) { |
843 | *wordp |= 0xffffffff; | 843 | *wordp |= 0xffffffff; |
844 | bits_set += NBWORD; | 844 | bits_set += NBWORD; |
845 | wordp++; | 845 | wordp++; |
846 | } | 846 | } |
847 | 847 | ||
848 | /* | 848 | /* |
849 | * Finally, set any bits left to be set in one last partial word. | 849 | * Finally, set any bits left to be set in one last partial word. |
850 | */ | 850 | */ |
851 | end_bit = bits_to_set - bits_set; | 851 | end_bit = bits_to_set - bits_set; |
852 | if (end_bit) { | 852 | if (end_bit) { |
853 | mask = (1 << end_bit) - 1; | 853 | mask = (1 << end_bit) - 1; |
854 | *wordp |= mask; | 854 | *wordp |= mask; |
855 | } | 855 | } |
856 | 856 | ||
857 | xfs_buf_item_log_debug(bip, first, last); | 857 | xfs_buf_item_log_debug(bip, first, last); |
858 | } | 858 | } |
859 | 859 | ||
860 | 860 | ||
861 | /* | 861 | /* |
862 | * Return 1 if the buffer has some data that has been logged (at any | 862 | * Return 1 if the buffer has some data that has been logged (at any |
863 | * point, not just the current transaction) and 0 if not. | 863 | * point, not just the current transaction) and 0 if not. |
864 | */ | 864 | */ |
865 | uint | 865 | uint |
866 | xfs_buf_item_dirty( | 866 | xfs_buf_item_dirty( |
867 | xfs_buf_log_item_t *bip) | 867 | xfs_buf_log_item_t *bip) |
868 | { | 868 | { |
869 | return (bip->bli_flags & XFS_BLI_DIRTY); | 869 | return (bip->bli_flags & XFS_BLI_DIRTY); |
870 | } | 870 | } |
871 | 871 | ||
872 | STATIC void | 872 | STATIC void |
873 | xfs_buf_item_free( | 873 | xfs_buf_item_free( |
874 | xfs_buf_log_item_t *bip) | 874 | xfs_buf_log_item_t *bip) |
875 | { | 875 | { |
876 | #ifdef XFS_TRANS_DEBUG | 876 | #ifdef XFS_TRANS_DEBUG |
877 | kmem_free(bip->bli_orig); | 877 | kmem_free(bip->bli_orig); |
878 | kmem_free(bip->bli_logged); | 878 | kmem_free(bip->bli_logged); |
879 | #endif /* XFS_TRANS_DEBUG */ | 879 | #endif /* XFS_TRANS_DEBUG */ |
880 | 880 | ||
881 | #ifdef XFS_BLI_TRACE | 881 | #ifdef XFS_BLI_TRACE |
882 | ktrace_free(bip->bli_trace); | 882 | ktrace_free(bip->bli_trace); |
883 | #endif | 883 | #endif |
884 | kmem_zone_free(xfs_buf_item_zone, bip); | 884 | kmem_zone_free(xfs_buf_item_zone, bip); |
885 | } | 885 | } |
886 | 886 | ||
887 | /* | 887 | /* |
888 | * This is called when the buf log item is no longer needed. It should | 888 | * This is called when the buf log item is no longer needed. It should |
889 | * free the buf log item associated with the given buffer and clear | 889 | * free the buf log item associated with the given buffer and clear |
890 | * the buffer's pointer to the buf log item. If there are no more | 890 | * the buffer's pointer to the buf log item. If there are no more |
891 | * items in the list, clear the b_iodone field of the buffer (see | 891 | * items in the list, clear the b_iodone field of the buffer (see |
892 | * xfs_buf_attach_iodone() below). | 892 | * xfs_buf_attach_iodone() below). |
893 | */ | 893 | */ |
894 | void | 894 | void |
895 | xfs_buf_item_relse( | 895 | xfs_buf_item_relse( |
896 | xfs_buf_t *bp) | 896 | xfs_buf_t *bp) |
897 | { | 897 | { |
898 | xfs_buf_log_item_t *bip; | 898 | xfs_buf_log_item_t *bip; |
899 | 899 | ||
900 | xfs_buftrace("XFS_RELSE", bp); | 900 | xfs_buftrace("XFS_RELSE", bp); |
901 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); | 901 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); |
902 | XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); | 902 | XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); |
903 | if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && | 903 | if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && |
904 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { | 904 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { |
905 | XFS_BUF_CLR_IODONE_FUNC(bp); | 905 | XFS_BUF_CLR_IODONE_FUNC(bp); |
906 | } | 906 | } |
907 | xfs_buf_rele(bp); | 907 | xfs_buf_rele(bp); |
908 | xfs_buf_item_free(bip); | 908 | xfs_buf_item_free(bip); |
909 | } | 909 | } |
910 | 910 | ||
911 | 911 | ||
912 | /* | 912 | /* |
913 | * Add the given log item with its callback to the list of callbacks | 913 | * Add the given log item with its callback to the list of callbacks |
914 | * to be called when the buffer's I/O completes. If it is not set | 914 | * to be called when the buffer's I/O completes. If it is not set |
915 | * already, set the buffer's b_iodone() routine to be | 915 | * already, set the buffer's b_iodone() routine to be |
916 | * xfs_buf_iodone_callbacks() and link the log item into the list of | 916 | * xfs_buf_iodone_callbacks() and link the log item into the list of |
917 | * items rooted at b_fsprivate. Items are always added as the second | 917 | * items rooted at b_fsprivate. Items are always added as the second |
918 | * entry in the list if there is a first, because the buf item code | 918 | * entry in the list if there is a first, because the buf item code |
919 | * assumes that the buf log item is first. | 919 | * assumes that the buf log item is first. |
920 | */ | 920 | */ |
921 | void | 921 | void |
922 | xfs_buf_attach_iodone( | 922 | xfs_buf_attach_iodone( |
923 | xfs_buf_t *bp, | 923 | xfs_buf_t *bp, |
924 | void (*cb)(xfs_buf_t *, xfs_log_item_t *), | 924 | void (*cb)(xfs_buf_t *, xfs_log_item_t *), |
925 | xfs_log_item_t *lip) | 925 | xfs_log_item_t *lip) |
926 | { | 926 | { |
927 | xfs_log_item_t *head_lip; | 927 | xfs_log_item_t *head_lip; |
928 | 928 | ||
929 | ASSERT(XFS_BUF_ISBUSY(bp)); | 929 | ASSERT(XFS_BUF_ISBUSY(bp)); |
930 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 930 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
931 | 931 | ||
932 | lip->li_cb = cb; | 932 | lip->li_cb = cb; |
933 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { | 933 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { |
934 | head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 934 | head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
935 | lip->li_bio_list = head_lip->li_bio_list; | 935 | lip->li_bio_list = head_lip->li_bio_list; |
936 | head_lip->li_bio_list = lip; | 936 | head_lip->li_bio_list = lip; |
937 | } else { | 937 | } else { |
938 | XFS_BUF_SET_FSPRIVATE(bp, lip); | 938 | XFS_BUF_SET_FSPRIVATE(bp, lip); |
939 | } | 939 | } |
940 | 940 | ||
941 | ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) || | 941 | ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) || |
942 | (XFS_BUF_IODONE_FUNC(bp) == NULL)); | 942 | (XFS_BUF_IODONE_FUNC(bp) == NULL)); |
943 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); | 943 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); |
944 | } | 944 | } |
945 | 945 | ||
946 | STATIC void | 946 | STATIC void |
947 | xfs_buf_do_callbacks( | 947 | xfs_buf_do_callbacks( |
948 | xfs_buf_t *bp, | 948 | xfs_buf_t *bp, |
949 | xfs_log_item_t *lip) | 949 | xfs_log_item_t *lip) |
950 | { | 950 | { |
951 | xfs_log_item_t *nlip; | 951 | xfs_log_item_t *nlip; |
952 | 952 | ||
953 | while (lip != NULL) { | 953 | while (lip != NULL) { |
954 | nlip = lip->li_bio_list; | 954 | nlip = lip->li_bio_list; |
955 | ASSERT(lip->li_cb != NULL); | 955 | ASSERT(lip->li_cb != NULL); |
956 | /* | 956 | /* |
957 | * Clear the next pointer so we don't have any | 957 | * Clear the next pointer so we don't have any |
958 | * confusion if the item is added to another buf. | 958 | * confusion if the item is added to another buf. |
959 | * Don't touch the log item after calling its | 959 | * Don't touch the log item after calling its |
960 | * callback, because it could have freed itself. | 960 | * callback, because it could have freed itself. |
961 | */ | 961 | */ |
962 | lip->li_bio_list = NULL; | 962 | lip->li_bio_list = NULL; |
963 | lip->li_cb(bp, lip); | 963 | lip->li_cb(bp, lip); |
964 | lip = nlip; | 964 | lip = nlip; |
965 | } | 965 | } |
966 | } | 966 | } |
967 | 967 | ||
968 | /* | 968 | /* |
969 | * This is the iodone() function for buffers which have had callbacks | 969 | * This is the iodone() function for buffers which have had callbacks |
970 | * attached to them by xfs_buf_attach_iodone(). It should remove each | 970 | * attached to them by xfs_buf_attach_iodone(). It should remove each |
971 | * log item from the buffer's list and call the callback of each in turn. | 971 | * log item from the buffer's list and call the callback of each in turn. |
972 | * When done, the buffer's fsprivate field is set to NULL and the buffer | 972 | * When done, the buffer's fsprivate field is set to NULL and the buffer |
973 | * is unlocked with a call to iodone(). | 973 | * is unlocked with a call to iodone(). |
974 | */ | 974 | */ |
975 | void | 975 | void |
976 | xfs_buf_iodone_callbacks( | 976 | xfs_buf_iodone_callbacks( |
977 | xfs_buf_t *bp) | 977 | xfs_buf_t *bp) |
978 | { | 978 | { |
979 | xfs_log_item_t *lip; | 979 | xfs_log_item_t *lip; |
980 | static ulong lasttime; | 980 | static ulong lasttime; |
981 | static xfs_buftarg_t *lasttarg; | 981 | static xfs_buftarg_t *lasttarg; |
982 | xfs_mount_t *mp; | 982 | xfs_mount_t *mp; |
983 | 983 | ||
984 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | 984 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); |
985 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 985 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
986 | 986 | ||
987 | if (XFS_BUF_GETERROR(bp) != 0) { | 987 | if (XFS_BUF_GETERROR(bp) != 0) { |
988 | /* | 988 | /* |
989 | * If we've already decided to shutdown the filesystem | 989 | * If we've already decided to shutdown the filesystem |
990 | * because of IO errors, there's no point in giving this | 990 | * because of IO errors, there's no point in giving this |
991 | * a retry. | 991 | * a retry. |
992 | */ | 992 | */ |
993 | mp = lip->li_mountp; | 993 | mp = lip->li_mountp; |
994 | if (XFS_FORCED_SHUTDOWN(mp)) { | 994 | if (XFS_FORCED_SHUTDOWN(mp)) { |
995 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | 995 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); |
996 | XFS_BUF_SUPER_STALE(bp); | 996 | XFS_BUF_SUPER_STALE(bp); |
997 | xfs_buftrace("BUF_IODONE_CB", bp); | 997 | xfs_buftrace("BUF_IODONE_CB", bp); |
998 | xfs_buf_do_callbacks(bp, lip); | 998 | xfs_buf_do_callbacks(bp, lip); |
999 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 999 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1000 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1000 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1001 | 1001 | ||
1002 | /* | 1002 | /* |
1003 | * XFS_SHUT flag gets set when we go thru the | 1003 | * XFS_SHUT flag gets set when we go thru the |
1004 | * entire buffer cache and deliberately start | 1004 | * entire buffer cache and deliberately start |
1005 | * throwing away delayed write buffers. | 1005 | * throwing away delayed write buffers. |
1006 | * Since there's no biowait done on those, | 1006 | * Since there's no biowait done on those, |
1007 | * we should just brelse them. | 1007 | * we should just brelse them. |
1008 | */ | 1008 | */ |
1009 | if (XFS_BUF_ISSHUT(bp)) { | 1009 | if (XFS_BUF_ISSHUT(bp)) { |
1010 | XFS_BUF_UNSHUT(bp); | 1010 | XFS_BUF_UNSHUT(bp); |
1011 | xfs_buf_relse(bp); | 1011 | xfs_buf_relse(bp); |
1012 | } else { | 1012 | } else { |
1013 | xfs_biodone(bp); | 1013 | xfs_biodone(bp); |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | return; | 1016 | return; |
1017 | } | 1017 | } |
1018 | 1018 | ||
1019 | if ((XFS_BUF_TARGET(bp) != lasttarg) || | 1019 | if ((XFS_BUF_TARGET(bp) != lasttarg) || |
1020 | (time_after(jiffies, (lasttime + 5*HZ)))) { | 1020 | (time_after(jiffies, (lasttime + 5*HZ)))) { |
1021 | lasttime = jiffies; | 1021 | lasttime = jiffies; |
1022 | cmn_err(CE_ALERT, "Device %s, XFS metadata write error" | 1022 | cmn_err(CE_ALERT, "Device %s, XFS metadata write error" |
1023 | " block 0x%llx in %s", | 1023 | " block 0x%llx in %s", |
1024 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 1024 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), |
1025 | (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); | 1025 | (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); |
1026 | } | 1026 | } |
1027 | lasttarg = XFS_BUF_TARGET(bp); | 1027 | lasttarg = XFS_BUF_TARGET(bp); |
1028 | 1028 | ||
1029 | if (XFS_BUF_ISASYNC(bp)) { | 1029 | if (XFS_BUF_ISASYNC(bp)) { |
1030 | /* | 1030 | /* |
1031 | * If the write was asynchronous then noone will be | 1031 | * If the write was asynchronous then noone will be |
1032 | * looking for the error. Clear the error state | 1032 | * looking for the error. Clear the error state |
1033 | * and write the buffer out again delayed write. | 1033 | * and write the buffer out again delayed write. |
1034 | * | 1034 | * |
1035 | * XXXsup This is OK, so long as we catch these | 1035 | * XXXsup This is OK, so long as we catch these |
1036 | * before we start the umount; we don't want these | 1036 | * before we start the umount; we don't want these |
1037 | * DELWRI metadata bufs to be hanging around. | 1037 | * DELWRI metadata bufs to be hanging around. |
1038 | */ | 1038 | */ |
1039 | XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */ | 1039 | XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */ |
1040 | 1040 | ||
1041 | if (!(XFS_BUF_ISSTALE(bp))) { | 1041 | if (!(XFS_BUF_ISSTALE(bp))) { |
1042 | XFS_BUF_DELAYWRITE(bp); | 1042 | XFS_BUF_DELAYWRITE(bp); |
1043 | XFS_BUF_DONE(bp); | 1043 | XFS_BUF_DONE(bp); |
1044 | XFS_BUF_SET_START(bp); | 1044 | XFS_BUF_SET_START(bp); |
1045 | } | 1045 | } |
1046 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); | 1046 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); |
1047 | xfs_buftrace("BUF_IODONE ASYNC", bp); | 1047 | xfs_buftrace("BUF_IODONE ASYNC", bp); |
1048 | xfs_buf_relse(bp); | 1048 | xfs_buf_relse(bp); |
1049 | } else { | 1049 | } else { |
1050 | /* | 1050 | /* |
1051 | * If the write of the buffer was not asynchronous, | 1051 | * If the write of the buffer was not asynchronous, |
1052 | * then we want to make sure to return the error | 1052 | * then we want to make sure to return the error |
1053 | * to the caller of bwrite(). Because of this we | 1053 | * to the caller of bwrite(). Because of this we |
1054 | * cannot clear the B_ERROR state at this point. | 1054 | * cannot clear the B_ERROR state at this point. |
1055 | * Instead we install a callback function that | 1055 | * Instead we install a callback function that |
1056 | * will be called when the buffer is released, and | 1056 | * will be called when the buffer is released, and |
1057 | * that routine will clear the error state and | 1057 | * that routine will clear the error state and |
1058 | * set the buffer to be written out again after | 1058 | * set the buffer to be written out again after |
1059 | * some delay. | 1059 | * some delay. |
1060 | */ | 1060 | */ |
1061 | /* We actually overwrite the existing b-relse | 1061 | /* We actually overwrite the existing b-relse |
1062 | function at times, but we're gonna be shutting down | 1062 | function at times, but we're gonna be shutting down |
1063 | anyway. */ | 1063 | anyway. */ |
1064 | XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); | 1064 | XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); |
1065 | XFS_BUF_DONE(bp); | 1065 | XFS_BUF_DONE(bp); |
1066 | XFS_BUF_FINISH_IOWAIT(bp); | 1066 | XFS_BUF_FINISH_IOWAIT(bp); |
1067 | } | 1067 | } |
1068 | return; | 1068 | return; |
1069 | } | 1069 | } |
1070 | #ifdef XFSERRORDEBUG | 1070 | #ifdef XFSERRORDEBUG |
1071 | xfs_buftrace("XFS BUFCB NOERR", bp); | 1071 | xfs_buftrace("XFS BUFCB NOERR", bp); |
1072 | #endif | 1072 | #endif |
1073 | xfs_buf_do_callbacks(bp, lip); | 1073 | xfs_buf_do_callbacks(bp, lip); |
1074 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1074 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1075 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1075 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1076 | xfs_biodone(bp); | 1076 | xfs_biodone(bp); |
1077 | } | 1077 | } |
1078 | 1078 | ||
1079 | /* | 1079 | /* |
1080 | * This is a callback routine attached to a buffer which gets an error | 1080 | * This is a callback routine attached to a buffer which gets an error |
1081 | * when being written out synchronously. | 1081 | * when being written out synchronously. |
1082 | */ | 1082 | */ |
1083 | STATIC void | 1083 | STATIC void |
1084 | xfs_buf_error_relse( | 1084 | xfs_buf_error_relse( |
1085 | xfs_buf_t *bp) | 1085 | xfs_buf_t *bp) |
1086 | { | 1086 | { |
1087 | xfs_log_item_t *lip; | 1087 | xfs_log_item_t *lip; |
1088 | xfs_mount_t *mp; | 1088 | xfs_mount_t *mp; |
1089 | 1089 | ||
1090 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 1090 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
1091 | mp = (xfs_mount_t *)lip->li_mountp; | 1091 | mp = (xfs_mount_t *)lip->li_mountp; |
1092 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | 1092 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); |
1093 | 1093 | ||
1094 | XFS_BUF_STALE(bp); | 1094 | XFS_BUF_STALE(bp); |
1095 | XFS_BUF_DONE(bp); | 1095 | XFS_BUF_DONE(bp); |
1096 | XFS_BUF_UNDELAYWRITE(bp); | 1096 | XFS_BUF_UNDELAYWRITE(bp); |
1097 | XFS_BUF_ERROR(bp,0); | 1097 | XFS_BUF_ERROR(bp,0); |
1098 | xfs_buftrace("BUF_ERROR_RELSE", bp); | 1098 | xfs_buftrace("BUF_ERROR_RELSE", bp); |
1099 | if (! XFS_FORCED_SHUTDOWN(mp)) | 1099 | if (! XFS_FORCED_SHUTDOWN(mp)) |
1100 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 1100 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); |
1101 | /* | 1101 | /* |
1102 | * We have to unpin the pinned buffers so do the | 1102 | * We have to unpin the pinned buffers so do the |
1103 | * callbacks. | 1103 | * callbacks. |
1104 | */ | 1104 | */ |
1105 | xfs_buf_do_callbacks(bp, lip); | 1105 | xfs_buf_do_callbacks(bp, lip); |
1106 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1106 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1107 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1107 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1108 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); | 1108 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); |
1109 | xfs_buf_relse(bp); | 1109 | xfs_buf_relse(bp); |
1110 | } | 1110 | } |
1111 | 1111 | ||
1112 | 1112 | ||
1113 | /* | 1113 | /* |
1114 | * This is the iodone() function for buffers which have been | 1114 | * This is the iodone() function for buffers which have been |
1115 | * logged. It is called when they are eventually flushed out. | 1115 | * logged. It is called when they are eventually flushed out. |
1116 | * It should remove the buf item from the AIL, and free the buf item. | 1116 | * It should remove the buf item from the AIL, and free the buf item. |
1117 | * It is called by xfs_buf_iodone_callbacks() above which will take | 1117 | * It is called by xfs_buf_iodone_callbacks() above which will take |
1118 | * care of cleaning up the buffer itself. | 1118 | * care of cleaning up the buffer itself. |
1119 | */ | 1119 | */ |
1120 | /* ARGSUSED */ | 1120 | /* ARGSUSED */ |
1121 | void | 1121 | void |
1122 | xfs_buf_iodone( | 1122 | xfs_buf_iodone( |
1123 | xfs_buf_t *bp, | 1123 | xfs_buf_t *bp, |
1124 | xfs_buf_log_item_t *bip) | 1124 | xfs_buf_log_item_t *bip) |
1125 | { | 1125 | { |
1126 | struct xfs_ail *ailp = bip->bli_item.li_ailp; | 1126 | struct xfs_ail *ailp = bip->bli_item.li_ailp; |
1127 | 1127 | ||
1128 | ASSERT(bip->bli_buf == bp); | 1128 | ASSERT(bip->bli_buf == bp); |
1129 | 1129 | ||
1130 | xfs_buf_rele(bp); | 1130 | xfs_buf_rele(bp); |
1131 | 1131 | ||
1132 | /* | 1132 | /* |
1133 | * If we are forcibly shutting down, this may well be | 1133 | * If we are forcibly shutting down, this may well be |
1134 | * off the AIL already. That's because we simulate the | 1134 | * off the AIL already. That's because we simulate the |
1135 | * log-committed callbacks to unpin these buffers. Or we may never | 1135 | * log-committed callbacks to unpin these buffers. Or we may never |
1136 | * have put this item on AIL because of the transaction was | 1136 | * have put this item on AIL because of the transaction was |
1137 | * aborted forcibly. xfs_trans_ail_delete() takes care of these. | 1137 | * aborted forcibly. xfs_trans_ail_delete() takes care of these. |
1138 | * | 1138 | * |
1139 | * Either way, AIL is useless if we're forcing a shutdown. | 1139 | * Either way, AIL is useless if we're forcing a shutdown. |
1140 | */ | 1140 | */ |
1141 | spin_lock(&ailp->xa_lock); | 1141 | spin_lock(&ailp->xa_lock); |
1142 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); | 1142 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); |
1143 | xfs_buf_item_free(bip); | 1143 | xfs_buf_item_free(bip); |
1144 | } | 1144 | } |
1145 | 1145 | ||
1146 | #if defined(XFS_BLI_TRACE) | 1146 | #if defined(XFS_BLI_TRACE) |
1147 | void | 1147 | void |
1148 | xfs_buf_item_trace( | 1148 | xfs_buf_item_trace( |
1149 | char *id, | 1149 | char *id, |
1150 | xfs_buf_log_item_t *bip) | 1150 | xfs_buf_log_item_t *bip) |
1151 | { | 1151 | { |
1152 | xfs_buf_t *bp; | 1152 | xfs_buf_t *bp; |
1153 | ASSERT(bip->bli_trace != NULL); | 1153 | ASSERT(bip->bli_trace != NULL); |
1154 | 1154 | ||
1155 | bp = bip->bli_buf; | 1155 | bp = bip->bli_buf; |
1156 | ktrace_enter(bip->bli_trace, | 1156 | ktrace_enter(bip->bli_trace, |
1157 | (void *)id, | 1157 | (void *)id, |
1158 | (void *)bip->bli_buf, | 1158 | (void *)bip->bli_buf, |
1159 | (void *)((unsigned long)bip->bli_flags), | 1159 | (void *)((unsigned long)bip->bli_flags), |
1160 | (void *)((unsigned long)bip->bli_recur), | 1160 | (void *)((unsigned long)bip->bli_recur), |
1161 | (void *)((unsigned long)atomic_read(&bip->bli_refcount)), | 1161 | (void *)((unsigned long)atomic_read(&bip->bli_refcount)), |
1162 | (void *)((unsigned long) | 1162 | (void *)((unsigned long) |
1163 | (0xFFFFFFFF & XFS_BUF_ADDR(bp) >> 32)), | 1163 | (0xFFFFFFFF & XFS_BUF_ADDR(bp) >> 32)), |
1164 | (void *)((unsigned long)(0xFFFFFFFF & XFS_BUF_ADDR(bp))), | 1164 | (void *)((unsigned long)(0xFFFFFFFF & XFS_BUF_ADDR(bp))), |
1165 | (void *)((unsigned long)XFS_BUF_COUNT(bp)), | 1165 | (void *)((unsigned long)XFS_BUF_COUNT(bp)), |
1166 | (void *)((unsigned long)XFS_BUF_BFLAGS(bp)), | 1166 | (void *)((unsigned long)XFS_BUF_BFLAGS(bp)), |
1167 | XFS_BUF_FSPRIVATE(bp, void *), | 1167 | XFS_BUF_FSPRIVATE(bp, void *), |
1168 | XFS_BUF_FSPRIVATE2(bp, void *), | 1168 | XFS_BUF_FSPRIVATE2(bp, void *), |
1169 | (void *)(unsigned long)XFS_BUF_ISPINNED(bp), | 1169 | (void *)(unsigned long)XFS_BUF_ISPINNED(bp), |
1170 | (void *)XFS_BUF_IODONE_FUNC(bp), | 1170 | (void *)XFS_BUF_IODONE_FUNC(bp), |
1171 | (void *)((unsigned long)(XFS_BUF_VALUSEMA(bp))), | 1171 | (void *)((unsigned long)(XFS_BUF_VALUSEMA(bp))), |
1172 | (void *)bip->bli_item.li_desc, | 1172 | (void *)bip->bli_item.li_desc, |
1173 | (void *)((unsigned long)bip->bli_item.li_flags)); | 1173 | (void *)((unsigned long)bip->bli_item.li_flags)); |
1174 | } | 1174 | } |
1175 | #endif /* XFS_BLI_TRACE */ | 1175 | #endif /* XFS_BLI_TRACE */ |
1176 | 1176 |
fs/xfs/xfs_log_recover.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
27 | #include "xfs_dir2.h" | 27 | #include "xfs_dir2.h" |
28 | #include "xfs_dmapi.h" | 28 | #include "xfs_dmapi.h" |
29 | #include "xfs_mount.h" | 29 | #include "xfs_mount.h" |
30 | #include "xfs_error.h" | 30 | #include "xfs_error.h" |
31 | #include "xfs_bmap_btree.h" | 31 | #include "xfs_bmap_btree.h" |
32 | #include "xfs_alloc_btree.h" | 32 | #include "xfs_alloc_btree.h" |
33 | #include "xfs_ialloc_btree.h" | 33 | #include "xfs_ialloc_btree.h" |
34 | #include "xfs_dir2_sf.h" | 34 | #include "xfs_dir2_sf.h" |
35 | #include "xfs_attr_sf.h" | 35 | #include "xfs_attr_sf.h" |
36 | #include "xfs_dinode.h" | 36 | #include "xfs_dinode.h" |
37 | #include "xfs_inode.h" | 37 | #include "xfs_inode.h" |
38 | #include "xfs_inode_item.h" | 38 | #include "xfs_inode_item.h" |
39 | #include "xfs_alloc.h" | 39 | #include "xfs_alloc.h" |
40 | #include "xfs_ialloc.h" | 40 | #include "xfs_ialloc.h" |
41 | #include "xfs_log_priv.h" | 41 | #include "xfs_log_priv.h" |
42 | #include "xfs_buf_item.h" | 42 | #include "xfs_buf_item.h" |
43 | #include "xfs_log_recover.h" | 43 | #include "xfs_log_recover.h" |
44 | #include "xfs_extfree_item.h" | 44 | #include "xfs_extfree_item.h" |
45 | #include "xfs_trans_priv.h" | 45 | #include "xfs_trans_priv.h" |
46 | #include "xfs_quota.h" | 46 | #include "xfs_quota.h" |
47 | #include "xfs_rw.h" | 47 | #include "xfs_rw.h" |
48 | #include "xfs_utils.h" | 48 | #include "xfs_utils.h" |
49 | 49 | ||
50 | STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); | 50 | STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); |
51 | STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); | 51 | STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); |
52 | STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, | 52 | STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, |
53 | xlog_recover_item_t *item); | 53 | xlog_recover_item_t *item); |
54 | #if defined(DEBUG) | 54 | #if defined(DEBUG) |
55 | STATIC void xlog_recover_check_summary(xlog_t *); | 55 | STATIC void xlog_recover_check_summary(xlog_t *); |
56 | #else | 56 | #else |
57 | #define xlog_recover_check_summary(log) | 57 | #define xlog_recover_check_summary(log) |
58 | #endif | 58 | #endif |
59 | 59 | ||
60 | 60 | ||
61 | /* | 61 | /* |
62 | * Sector aligned buffer routines for buffer create/read/write/access | 62 | * Sector aligned buffer routines for buffer create/read/write/access |
63 | */ | 63 | */ |
64 | 64 | ||
65 | #define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ | 65 | #define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ |
66 | ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ | 66 | ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ |
67 | ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) | 67 | ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) |
68 | #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) | 68 | #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) |
69 | 69 | ||
70 | xfs_buf_t * | 70 | xfs_buf_t * |
71 | xlog_get_bp( | 71 | xlog_get_bp( |
72 | xlog_t *log, | 72 | xlog_t *log, |
73 | int num_bblks) | 73 | int num_bblks) |
74 | { | 74 | { |
75 | ASSERT(num_bblks > 0); | 75 | ASSERT(num_bblks > 0); |
76 | 76 | ||
77 | if (log->l_sectbb_log) { | 77 | if (log->l_sectbb_log) { |
78 | if (num_bblks > 1) | 78 | if (num_bblks > 1) |
79 | num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); | 79 | num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); |
80 | num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); | 80 | num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); |
81 | } | 81 | } |
82 | return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); | 82 | return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); |
83 | } | 83 | } |
84 | 84 | ||
85 | void | 85 | void |
86 | xlog_put_bp( | 86 | xlog_put_bp( |
87 | xfs_buf_t *bp) | 87 | xfs_buf_t *bp) |
88 | { | 88 | { |
89 | xfs_buf_free(bp); | 89 | xfs_buf_free(bp); |
90 | } | 90 | } |
91 | 91 | ||
92 | 92 | ||
93 | /* | 93 | /* |
94 | * nbblks should be uint, but oh well. Just want to catch that 32-bit length. | 94 | * nbblks should be uint, but oh well. Just want to catch that 32-bit length. |
95 | */ | 95 | */ |
96 | int | 96 | int |
97 | xlog_bread( | 97 | xlog_bread( |
98 | xlog_t *log, | 98 | xlog_t *log, |
99 | xfs_daddr_t blk_no, | 99 | xfs_daddr_t blk_no, |
100 | int nbblks, | 100 | int nbblks, |
101 | xfs_buf_t *bp) | 101 | xfs_buf_t *bp) |
102 | { | 102 | { |
103 | int error; | 103 | int error; |
104 | 104 | ||
105 | if (log->l_sectbb_log) { | 105 | if (log->l_sectbb_log) { |
106 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); | 106 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); |
107 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); | 107 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); |
108 | } | 108 | } |
109 | 109 | ||
110 | ASSERT(nbblks > 0); | 110 | ASSERT(nbblks > 0); |
111 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 111 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); |
112 | ASSERT(bp); | 112 | ASSERT(bp); |
113 | 113 | ||
114 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 114 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
115 | XFS_BUF_READ(bp); | 115 | XFS_BUF_READ(bp); |
116 | XFS_BUF_BUSY(bp); | 116 | XFS_BUF_BUSY(bp); |
117 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 117 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); |
118 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | 118 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); |
119 | 119 | ||
120 | xfsbdstrat(log->l_mp, bp); | 120 | xfsbdstrat(log->l_mp, bp); |
121 | error = xfs_iowait(bp); | 121 | error = xfs_iowait(bp); |
122 | if (error) | 122 | if (error) |
123 | xfs_ioerror_alert("xlog_bread", log->l_mp, | 123 | xfs_ioerror_alert("xlog_bread", log->l_mp, |
124 | bp, XFS_BUF_ADDR(bp)); | 124 | bp, XFS_BUF_ADDR(bp)); |
125 | return error; | 125 | return error; |
126 | } | 126 | } |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * Write out the buffer at the given block for the given number of blocks. | 129 | * Write out the buffer at the given block for the given number of blocks. |
130 | * The buffer is kept locked across the write and is returned locked. | 130 | * The buffer is kept locked across the write and is returned locked. |
131 | * This can only be used for synchronous log writes. | 131 | * This can only be used for synchronous log writes. |
132 | */ | 132 | */ |
133 | STATIC int | 133 | STATIC int |
134 | xlog_bwrite( | 134 | xlog_bwrite( |
135 | xlog_t *log, | 135 | xlog_t *log, |
136 | xfs_daddr_t blk_no, | 136 | xfs_daddr_t blk_no, |
137 | int nbblks, | 137 | int nbblks, |
138 | xfs_buf_t *bp) | 138 | xfs_buf_t *bp) |
139 | { | 139 | { |
140 | int error; | 140 | int error; |
141 | 141 | ||
142 | if (log->l_sectbb_log) { | 142 | if (log->l_sectbb_log) { |
143 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); | 143 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); |
144 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); | 144 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); |
145 | } | 145 | } |
146 | 146 | ||
147 | ASSERT(nbblks > 0); | 147 | ASSERT(nbblks > 0); |
148 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 148 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); |
149 | 149 | ||
150 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 150 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
151 | XFS_BUF_ZEROFLAGS(bp); | 151 | XFS_BUF_ZEROFLAGS(bp); |
152 | XFS_BUF_BUSY(bp); | 152 | XFS_BUF_BUSY(bp); |
153 | XFS_BUF_HOLD(bp); | 153 | XFS_BUF_HOLD(bp); |
154 | XFS_BUF_PSEMA(bp, PRIBIO); | 154 | XFS_BUF_PSEMA(bp, PRIBIO); |
155 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 155 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); |
156 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | 156 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); |
157 | 157 | ||
158 | if ((error = xfs_bwrite(log->l_mp, bp))) | 158 | if ((error = xfs_bwrite(log->l_mp, bp))) |
159 | xfs_ioerror_alert("xlog_bwrite", log->l_mp, | 159 | xfs_ioerror_alert("xlog_bwrite", log->l_mp, |
160 | bp, XFS_BUF_ADDR(bp)); | 160 | bp, XFS_BUF_ADDR(bp)); |
161 | return error; | 161 | return error; |
162 | } | 162 | } |
163 | 163 | ||
164 | STATIC xfs_caddr_t | 164 | STATIC xfs_caddr_t |
165 | xlog_align( | 165 | xlog_align( |
166 | xlog_t *log, | 166 | xlog_t *log, |
167 | xfs_daddr_t blk_no, | 167 | xfs_daddr_t blk_no, |
168 | int nbblks, | 168 | int nbblks, |
169 | xfs_buf_t *bp) | 169 | xfs_buf_t *bp) |
170 | { | 170 | { |
171 | xfs_caddr_t ptr; | 171 | xfs_caddr_t ptr; |
172 | 172 | ||
173 | if (!log->l_sectbb_log) | 173 | if (!log->l_sectbb_log) |
174 | return XFS_BUF_PTR(bp); | 174 | return XFS_BUF_PTR(bp); |
175 | 175 | ||
176 | ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); | 176 | ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); |
177 | ASSERT(XFS_BUF_SIZE(bp) >= | 177 | ASSERT(XFS_BUF_SIZE(bp) >= |
178 | BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); | 178 | BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); |
179 | return ptr; | 179 | return ptr; |
180 | } | 180 | } |
181 | 181 | ||
182 | #ifdef DEBUG | 182 | #ifdef DEBUG |
183 | /* | 183 | /* |
184 | * dump debug superblock and log record information | 184 | * dump debug superblock and log record information |
185 | */ | 185 | */ |
186 | STATIC void | 186 | STATIC void |
187 | xlog_header_check_dump( | 187 | xlog_header_check_dump( |
188 | xfs_mount_t *mp, | 188 | xfs_mount_t *mp, |
189 | xlog_rec_header_t *head) | 189 | xlog_rec_header_t *head) |
190 | { | 190 | { |
191 | int b; | 191 | int b; |
192 | 192 | ||
193 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); | 193 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); |
194 | for (b = 0; b < 16; b++) | 194 | for (b = 0; b < 16; b++) |
195 | cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); | 195 | cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); |
196 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); | 196 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); |
197 | cmn_err(CE_DEBUG, " log : uuid = "); | 197 | cmn_err(CE_DEBUG, " log : uuid = "); |
198 | for (b = 0; b < 16; b++) | 198 | for (b = 0; b < 16; b++) |
199 | cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); | 199 | cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); |
200 | cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); | 200 | cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); |
201 | } | 201 | } |
202 | #else | 202 | #else |
203 | #define xlog_header_check_dump(mp, head) | 203 | #define xlog_header_check_dump(mp, head) |
204 | #endif | 204 | #endif |
205 | 205 | ||
206 | /* | 206 | /* |
207 | * check log record header for recovery | 207 | * check log record header for recovery |
208 | */ | 208 | */ |
209 | STATIC int | 209 | STATIC int |
210 | xlog_header_check_recover( | 210 | xlog_header_check_recover( |
211 | xfs_mount_t *mp, | 211 | xfs_mount_t *mp, |
212 | xlog_rec_header_t *head) | 212 | xlog_rec_header_t *head) |
213 | { | 213 | { |
214 | ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); | 214 | ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); |
215 | 215 | ||
216 | /* | 216 | /* |
217 | * IRIX doesn't write the h_fmt field and leaves it zeroed | 217 | * IRIX doesn't write the h_fmt field and leaves it zeroed |
218 | * (XLOG_FMT_UNKNOWN). This stops us from trying to recover | 218 | * (XLOG_FMT_UNKNOWN). This stops us from trying to recover |
219 | * a dirty log created in IRIX. | 219 | * a dirty log created in IRIX. |
220 | */ | 220 | */ |
221 | if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { | 221 | if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { |
222 | xlog_warn( | 222 | xlog_warn( |
223 | "XFS: dirty log written in incompatible format - can't recover"); | 223 | "XFS: dirty log written in incompatible format - can't recover"); |
224 | xlog_header_check_dump(mp, head); | 224 | xlog_header_check_dump(mp, head); |
225 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", | 225 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", |
226 | XFS_ERRLEVEL_HIGH, mp); | 226 | XFS_ERRLEVEL_HIGH, mp); |
227 | return XFS_ERROR(EFSCORRUPTED); | 227 | return XFS_ERROR(EFSCORRUPTED); |
228 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 228 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
229 | xlog_warn( | 229 | xlog_warn( |
230 | "XFS: dirty log entry has mismatched uuid - can't recover"); | 230 | "XFS: dirty log entry has mismatched uuid - can't recover"); |
231 | xlog_header_check_dump(mp, head); | 231 | xlog_header_check_dump(mp, head); |
232 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", | 232 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", |
233 | XFS_ERRLEVEL_HIGH, mp); | 233 | XFS_ERRLEVEL_HIGH, mp); |
234 | return XFS_ERROR(EFSCORRUPTED); | 234 | return XFS_ERROR(EFSCORRUPTED); |
235 | } | 235 | } |
236 | return 0; | 236 | return 0; |
237 | } | 237 | } |
238 | 238 | ||
239 | /* | 239 | /* |
240 | * read the head block of the log and check the header | 240 | * read the head block of the log and check the header |
241 | */ | 241 | */ |
242 | STATIC int | 242 | STATIC int |
243 | xlog_header_check_mount( | 243 | xlog_header_check_mount( |
244 | xfs_mount_t *mp, | 244 | xfs_mount_t *mp, |
245 | xlog_rec_header_t *head) | 245 | xlog_rec_header_t *head) |
246 | { | 246 | { |
247 | ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); | 247 | ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); |
248 | 248 | ||
249 | if (uuid_is_nil(&head->h_fs_uuid)) { | 249 | if (uuid_is_nil(&head->h_fs_uuid)) { |
250 | /* | 250 | /* |
251 | * IRIX doesn't write the h_fs_uuid or h_fmt fields. If | 251 | * IRIX doesn't write the h_fs_uuid or h_fmt fields. If |
252 | * h_fs_uuid is nil, we assume this log was last mounted | 252 | * h_fs_uuid is nil, we assume this log was last mounted |
253 | * by IRIX and continue. | 253 | * by IRIX and continue. |
254 | */ | 254 | */ |
255 | xlog_warn("XFS: nil uuid in log - IRIX style log"); | 255 | xlog_warn("XFS: nil uuid in log - IRIX style log"); |
256 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 256 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
257 | xlog_warn("XFS: log has mismatched uuid - can't recover"); | 257 | xlog_warn("XFS: log has mismatched uuid - can't recover"); |
258 | xlog_header_check_dump(mp, head); | 258 | xlog_header_check_dump(mp, head); |
259 | XFS_ERROR_REPORT("xlog_header_check_mount", | 259 | XFS_ERROR_REPORT("xlog_header_check_mount", |
260 | XFS_ERRLEVEL_HIGH, mp); | 260 | XFS_ERRLEVEL_HIGH, mp); |
261 | return XFS_ERROR(EFSCORRUPTED); | 261 | return XFS_ERROR(EFSCORRUPTED); |
262 | } | 262 | } |
263 | return 0; | 263 | return 0; |
264 | } | 264 | } |
265 | 265 | ||
266 | STATIC void | 266 | STATIC void |
267 | xlog_recover_iodone( | 267 | xlog_recover_iodone( |
268 | struct xfs_buf *bp) | 268 | struct xfs_buf *bp) |
269 | { | 269 | { |
270 | xfs_mount_t *mp; | ||
271 | |||
272 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); | ||
273 | |||
274 | if (XFS_BUF_GETERROR(bp)) { | 270 | if (XFS_BUF_GETERROR(bp)) { |
275 | /* | 271 | /* |
276 | * We're not going to bother about retrying | 272 | * We're not going to bother about retrying |
277 | * this during recovery. One strike! | 273 | * this during recovery. One strike! |
278 | */ | 274 | */ |
279 | mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); | ||
280 | xfs_ioerror_alert("xlog_recover_iodone", | 275 | xfs_ioerror_alert("xlog_recover_iodone", |
281 | mp, bp, XFS_BUF_ADDR(bp)); | 276 | bp->b_mount, bp, XFS_BUF_ADDR(bp)); |
282 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 277 | xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); |
283 | } | 278 | } |
284 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 279 | bp->b_mount = NULL; |
285 | XFS_BUF_CLR_IODONE_FUNC(bp); | 280 | XFS_BUF_CLR_IODONE_FUNC(bp); |
286 | xfs_biodone(bp); | 281 | xfs_biodone(bp); |
287 | } | 282 | } |
288 | 283 | ||
289 | /* | 284 | /* |
290 | * This routine finds (to an approximation) the first block in the physical | 285 | * This routine finds (to an approximation) the first block in the physical |
291 | * log which contains the given cycle. It uses a binary search algorithm. | 286 | * log which contains the given cycle. It uses a binary search algorithm. |
292 | * Note that the algorithm can not be perfect because the disk will not | 287 | * Note that the algorithm can not be perfect because the disk will not |
293 | * necessarily be perfect. | 288 | * necessarily be perfect. |
294 | */ | 289 | */ |
295 | STATIC int | 290 | STATIC int |
296 | xlog_find_cycle_start( | 291 | xlog_find_cycle_start( |
297 | xlog_t *log, | 292 | xlog_t *log, |
298 | xfs_buf_t *bp, | 293 | xfs_buf_t *bp, |
299 | xfs_daddr_t first_blk, | 294 | xfs_daddr_t first_blk, |
300 | xfs_daddr_t *last_blk, | 295 | xfs_daddr_t *last_blk, |
301 | uint cycle) | 296 | uint cycle) |
302 | { | 297 | { |
303 | xfs_caddr_t offset; | 298 | xfs_caddr_t offset; |
304 | xfs_daddr_t mid_blk; | 299 | xfs_daddr_t mid_blk; |
305 | uint mid_cycle; | 300 | uint mid_cycle; |
306 | int error; | 301 | int error; |
307 | 302 | ||
308 | mid_blk = BLK_AVG(first_blk, *last_blk); | 303 | mid_blk = BLK_AVG(first_blk, *last_blk); |
309 | while (mid_blk != first_blk && mid_blk != *last_blk) { | 304 | while (mid_blk != first_blk && mid_blk != *last_blk) { |
310 | if ((error = xlog_bread(log, mid_blk, 1, bp))) | 305 | if ((error = xlog_bread(log, mid_blk, 1, bp))) |
311 | return error; | 306 | return error; |
312 | offset = xlog_align(log, mid_blk, 1, bp); | 307 | offset = xlog_align(log, mid_blk, 1, bp); |
313 | mid_cycle = xlog_get_cycle(offset); | 308 | mid_cycle = xlog_get_cycle(offset); |
314 | if (mid_cycle == cycle) { | 309 | if (mid_cycle == cycle) { |
315 | *last_blk = mid_blk; | 310 | *last_blk = mid_blk; |
316 | /* last_half_cycle == mid_cycle */ | 311 | /* last_half_cycle == mid_cycle */ |
317 | } else { | 312 | } else { |
318 | first_blk = mid_blk; | 313 | first_blk = mid_blk; |
319 | /* first_half_cycle == mid_cycle */ | 314 | /* first_half_cycle == mid_cycle */ |
320 | } | 315 | } |
321 | mid_blk = BLK_AVG(first_blk, *last_blk); | 316 | mid_blk = BLK_AVG(first_blk, *last_blk); |
322 | } | 317 | } |
323 | ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || | 318 | ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || |
324 | (mid_blk == *last_blk && mid_blk-1 == first_blk)); | 319 | (mid_blk == *last_blk && mid_blk-1 == first_blk)); |
325 | 320 | ||
326 | return 0; | 321 | return 0; |
327 | } | 322 | } |
328 | 323 | ||
329 | /* | 324 | /* |
330 | * Check that the range of blocks does not contain the cycle number | 325 | * Check that the range of blocks does not contain the cycle number |
331 | * given. The scan needs to occur from front to back and the ptr into the | 326 | * given. The scan needs to occur from front to back and the ptr into the |
332 | * region must be updated since a later routine will need to perform another | 327 | * region must be updated since a later routine will need to perform another |
333 | * test. If the region is completely good, we end up returning the same | 328 | * test. If the region is completely good, we end up returning the same |
334 | * last block number. | 329 | * last block number. |
335 | * | 330 | * |
336 | * Set blkno to -1 if we encounter no errors. This is an invalid block number | 331 | * Set blkno to -1 if we encounter no errors. This is an invalid block number |
337 | * since we don't ever expect logs to get this large. | 332 | * since we don't ever expect logs to get this large. |
338 | */ | 333 | */ |
339 | STATIC int | 334 | STATIC int |
340 | xlog_find_verify_cycle( | 335 | xlog_find_verify_cycle( |
341 | xlog_t *log, | 336 | xlog_t *log, |
342 | xfs_daddr_t start_blk, | 337 | xfs_daddr_t start_blk, |
343 | int nbblks, | 338 | int nbblks, |
344 | uint stop_on_cycle_no, | 339 | uint stop_on_cycle_no, |
345 | xfs_daddr_t *new_blk) | 340 | xfs_daddr_t *new_blk) |
346 | { | 341 | { |
347 | xfs_daddr_t i, j; | 342 | xfs_daddr_t i, j; |
348 | uint cycle; | 343 | uint cycle; |
349 | xfs_buf_t *bp; | 344 | xfs_buf_t *bp; |
350 | xfs_daddr_t bufblks; | 345 | xfs_daddr_t bufblks; |
351 | xfs_caddr_t buf = NULL; | 346 | xfs_caddr_t buf = NULL; |
352 | int error = 0; | 347 | int error = 0; |
353 | 348 | ||
354 | bufblks = 1 << ffs(nbblks); | 349 | bufblks = 1 << ffs(nbblks); |
355 | 350 | ||
356 | while (!(bp = xlog_get_bp(log, bufblks))) { | 351 | while (!(bp = xlog_get_bp(log, bufblks))) { |
357 | /* can't get enough memory to do everything in one big buffer */ | 352 | /* can't get enough memory to do everything in one big buffer */ |
358 | bufblks >>= 1; | 353 | bufblks >>= 1; |
359 | if (bufblks <= log->l_sectbb_log) | 354 | if (bufblks <= log->l_sectbb_log) |
360 | return ENOMEM; | 355 | return ENOMEM; |
361 | } | 356 | } |
362 | 357 | ||
363 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { | 358 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { |
364 | int bcount; | 359 | int bcount; |
365 | 360 | ||
366 | bcount = min(bufblks, (start_blk + nbblks - i)); | 361 | bcount = min(bufblks, (start_blk + nbblks - i)); |
367 | 362 | ||
368 | if ((error = xlog_bread(log, i, bcount, bp))) | 363 | if ((error = xlog_bread(log, i, bcount, bp))) |
369 | goto out; | 364 | goto out; |
370 | 365 | ||
371 | buf = xlog_align(log, i, bcount, bp); | 366 | buf = xlog_align(log, i, bcount, bp); |
372 | for (j = 0; j < bcount; j++) { | 367 | for (j = 0; j < bcount; j++) { |
373 | cycle = xlog_get_cycle(buf); | 368 | cycle = xlog_get_cycle(buf); |
374 | if (cycle == stop_on_cycle_no) { | 369 | if (cycle == stop_on_cycle_no) { |
375 | *new_blk = i+j; | 370 | *new_blk = i+j; |
376 | goto out; | 371 | goto out; |
377 | } | 372 | } |
378 | 373 | ||
379 | buf += BBSIZE; | 374 | buf += BBSIZE; |
380 | } | 375 | } |
381 | } | 376 | } |
382 | 377 | ||
383 | *new_blk = -1; | 378 | *new_blk = -1; |
384 | 379 | ||
385 | out: | 380 | out: |
386 | xlog_put_bp(bp); | 381 | xlog_put_bp(bp); |
387 | return error; | 382 | return error; |
388 | } | 383 | } |
389 | 384 | ||
390 | /* | 385 | /* |
391 | * Potentially backup over partial log record write. | 386 | * Potentially backup over partial log record write. |
392 | * | 387 | * |
393 | * In the typical case, last_blk is the number of the block directly after | 388 | * In the typical case, last_blk is the number of the block directly after |
394 | * a good log record. Therefore, we subtract one to get the block number | 389 | * a good log record. Therefore, we subtract one to get the block number |
395 | * of the last block in the given buffer. extra_bblks contains the number | 390 | * of the last block in the given buffer. extra_bblks contains the number |
396 | * of blocks we would have read on a previous read. This happens when the | 391 | * of blocks we would have read on a previous read. This happens when the |
397 | * last log record is split over the end of the physical log. | 392 | * last log record is split over the end of the physical log. |
398 | * | 393 | * |
399 | * extra_bblks is the number of blocks potentially verified on a previous | 394 | * extra_bblks is the number of blocks potentially verified on a previous |
400 | * call to this routine. | 395 | * call to this routine. |
401 | */ | 396 | */ |
402 | STATIC int | 397 | STATIC int |
403 | xlog_find_verify_log_record( | 398 | xlog_find_verify_log_record( |
404 | xlog_t *log, | 399 | xlog_t *log, |
405 | xfs_daddr_t start_blk, | 400 | xfs_daddr_t start_blk, |
406 | xfs_daddr_t *last_blk, | 401 | xfs_daddr_t *last_blk, |
407 | int extra_bblks) | 402 | int extra_bblks) |
408 | { | 403 | { |
409 | xfs_daddr_t i; | 404 | xfs_daddr_t i; |
410 | xfs_buf_t *bp; | 405 | xfs_buf_t *bp; |
411 | xfs_caddr_t offset = NULL; | 406 | xfs_caddr_t offset = NULL; |
412 | xlog_rec_header_t *head = NULL; | 407 | xlog_rec_header_t *head = NULL; |
413 | int error = 0; | 408 | int error = 0; |
414 | int smallmem = 0; | 409 | int smallmem = 0; |
415 | int num_blks = *last_blk - start_blk; | 410 | int num_blks = *last_blk - start_blk; |
416 | int xhdrs; | 411 | int xhdrs; |
417 | 412 | ||
418 | ASSERT(start_blk != 0 || *last_blk != start_blk); | 413 | ASSERT(start_blk != 0 || *last_blk != start_blk); |
419 | 414 | ||
420 | if (!(bp = xlog_get_bp(log, num_blks))) { | 415 | if (!(bp = xlog_get_bp(log, num_blks))) { |
421 | if (!(bp = xlog_get_bp(log, 1))) | 416 | if (!(bp = xlog_get_bp(log, 1))) |
422 | return ENOMEM; | 417 | return ENOMEM; |
423 | smallmem = 1; | 418 | smallmem = 1; |
424 | } else { | 419 | } else { |
425 | if ((error = xlog_bread(log, start_blk, num_blks, bp))) | 420 | if ((error = xlog_bread(log, start_blk, num_blks, bp))) |
426 | goto out; | 421 | goto out; |
427 | offset = xlog_align(log, start_blk, num_blks, bp); | 422 | offset = xlog_align(log, start_blk, num_blks, bp); |
428 | offset += ((num_blks - 1) << BBSHIFT); | 423 | offset += ((num_blks - 1) << BBSHIFT); |
429 | } | 424 | } |
430 | 425 | ||
431 | for (i = (*last_blk) - 1; i >= 0; i--) { | 426 | for (i = (*last_blk) - 1; i >= 0; i--) { |
432 | if (i < start_blk) { | 427 | if (i < start_blk) { |
433 | /* valid log record not found */ | 428 | /* valid log record not found */ |
434 | xlog_warn( | 429 | xlog_warn( |
435 | "XFS: Log inconsistent (didn't find previous header)"); | 430 | "XFS: Log inconsistent (didn't find previous header)"); |
436 | ASSERT(0); | 431 | ASSERT(0); |
437 | error = XFS_ERROR(EIO); | 432 | error = XFS_ERROR(EIO); |
438 | goto out; | 433 | goto out; |
439 | } | 434 | } |
440 | 435 | ||
441 | if (smallmem) { | 436 | if (smallmem) { |
442 | if ((error = xlog_bread(log, i, 1, bp))) | 437 | if ((error = xlog_bread(log, i, 1, bp))) |
443 | goto out; | 438 | goto out; |
444 | offset = xlog_align(log, i, 1, bp); | 439 | offset = xlog_align(log, i, 1, bp); |
445 | } | 440 | } |
446 | 441 | ||
447 | head = (xlog_rec_header_t *)offset; | 442 | head = (xlog_rec_header_t *)offset; |
448 | 443 | ||
449 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) | 444 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) |
450 | break; | 445 | break; |
451 | 446 | ||
452 | if (!smallmem) | 447 | if (!smallmem) |
453 | offset -= BBSIZE; | 448 | offset -= BBSIZE; |
454 | } | 449 | } |
455 | 450 | ||
456 | /* | 451 | /* |
457 | * We hit the beginning of the physical log & still no header. Return | 452 | * We hit the beginning of the physical log & still no header. Return |
458 | * to caller. If caller can handle a return of -1, then this routine | 453 | * to caller. If caller can handle a return of -1, then this routine |
459 | * will be called again for the end of the physical log. | 454 | * will be called again for the end of the physical log. |
460 | */ | 455 | */ |
461 | if (i == -1) { | 456 | if (i == -1) { |
462 | error = -1; | 457 | error = -1; |
463 | goto out; | 458 | goto out; |
464 | } | 459 | } |
465 | 460 | ||
466 | /* | 461 | /* |
467 | * We have the final block of the good log (the first block | 462 | * We have the final block of the good log (the first block |
468 | * of the log record _before_ the head. So we check the uuid. | 463 | * of the log record _before_ the head. So we check the uuid. |
469 | */ | 464 | */ |
470 | if ((error = xlog_header_check_mount(log->l_mp, head))) | 465 | if ((error = xlog_header_check_mount(log->l_mp, head))) |
471 | goto out; | 466 | goto out; |
472 | 467 | ||
473 | /* | 468 | /* |
474 | * We may have found a log record header before we expected one. | 469 | * We may have found a log record header before we expected one. |
475 | * last_blk will be the 1st block # with a given cycle #. We may end | 470 | * last_blk will be the 1st block # with a given cycle #. We may end |
476 | * up reading an entire log record. In this case, we don't want to | 471 | * up reading an entire log record. In this case, we don't want to |
477 | * reset last_blk. Only when last_blk points in the middle of a log | 472 | * reset last_blk. Only when last_blk points in the middle of a log |
478 | * record do we update last_blk. | 473 | * record do we update last_blk. |
479 | */ | 474 | */ |
480 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 475 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
481 | uint h_size = be32_to_cpu(head->h_size); | 476 | uint h_size = be32_to_cpu(head->h_size); |
482 | 477 | ||
483 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; | 478 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; |
484 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | 479 | if (h_size % XLOG_HEADER_CYCLE_SIZE) |
485 | xhdrs++; | 480 | xhdrs++; |
486 | } else { | 481 | } else { |
487 | xhdrs = 1; | 482 | xhdrs = 1; |
488 | } | 483 | } |
489 | 484 | ||
490 | if (*last_blk - i + extra_bblks != | 485 | if (*last_blk - i + extra_bblks != |
491 | BTOBB(be32_to_cpu(head->h_len)) + xhdrs) | 486 | BTOBB(be32_to_cpu(head->h_len)) + xhdrs) |
492 | *last_blk = i; | 487 | *last_blk = i; |
493 | 488 | ||
494 | out: | 489 | out: |
495 | xlog_put_bp(bp); | 490 | xlog_put_bp(bp); |
496 | return error; | 491 | return error; |
497 | } | 492 | } |
498 | 493 | ||
499 | /* | 494 | /* |
500 | * Head is defined to be the point of the log where the next log write | 495 | * Head is defined to be the point of the log where the next log write |
501 | * write could go. This means that incomplete LR writes at the end are | 496 | * write could go. This means that incomplete LR writes at the end are |
502 | * eliminated when calculating the head. We aren't guaranteed that previous | 497 | * eliminated when calculating the head. We aren't guaranteed that previous |
503 | * LR have complete transactions. We only know that a cycle number of | 498 | * LR have complete transactions. We only know that a cycle number of |
504 | * current cycle number -1 won't be present in the log if we start writing | 499 | * current cycle number -1 won't be present in the log if we start writing |
505 | * from our current block number. | 500 | * from our current block number. |
506 | * | 501 | * |
507 | * last_blk contains the block number of the first block with a given | 502 | * last_blk contains the block number of the first block with a given |
508 | * cycle number. | 503 | * cycle number. |
509 | * | 504 | * |
510 | * Return: zero if normal, non-zero if error. | 505 | * Return: zero if normal, non-zero if error. |
511 | */ | 506 | */ |
512 | STATIC int | 507 | STATIC int |
513 | xlog_find_head( | 508 | xlog_find_head( |
514 | xlog_t *log, | 509 | xlog_t *log, |
515 | xfs_daddr_t *return_head_blk) | 510 | xfs_daddr_t *return_head_blk) |
516 | { | 511 | { |
517 | xfs_buf_t *bp; | 512 | xfs_buf_t *bp; |
518 | xfs_caddr_t offset; | 513 | xfs_caddr_t offset; |
519 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; | 514 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; |
520 | int num_scan_bblks; | 515 | int num_scan_bblks; |
521 | uint first_half_cycle, last_half_cycle; | 516 | uint first_half_cycle, last_half_cycle; |
522 | uint stop_on_cycle; | 517 | uint stop_on_cycle; |
523 | int error, log_bbnum = log->l_logBBsize; | 518 | int error, log_bbnum = log->l_logBBsize; |
524 | 519 | ||
525 | /* Is the end of the log device zeroed? */ | 520 | /* Is the end of the log device zeroed? */ |
526 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { | 521 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { |
527 | *return_head_blk = first_blk; | 522 | *return_head_blk = first_blk; |
528 | 523 | ||
529 | /* Is the whole lot zeroed? */ | 524 | /* Is the whole lot zeroed? */ |
530 | if (!first_blk) { | 525 | if (!first_blk) { |
531 | /* Linux XFS shouldn't generate totally zeroed logs - | 526 | /* Linux XFS shouldn't generate totally zeroed logs - |
532 | * mkfs etc write a dummy unmount record to a fresh | 527 | * mkfs etc write a dummy unmount record to a fresh |
533 | * log so we can store the uuid in there | 528 | * log so we can store the uuid in there |
534 | */ | 529 | */ |
535 | xlog_warn("XFS: totally zeroed log"); | 530 | xlog_warn("XFS: totally zeroed log"); |
536 | } | 531 | } |
537 | 532 | ||
538 | return 0; | 533 | return 0; |
539 | } else if (error) { | 534 | } else if (error) { |
540 | xlog_warn("XFS: empty log check failed"); | 535 | xlog_warn("XFS: empty log check failed"); |
541 | return error; | 536 | return error; |
542 | } | 537 | } |
543 | 538 | ||
544 | first_blk = 0; /* get cycle # of 1st block */ | 539 | first_blk = 0; /* get cycle # of 1st block */ |
545 | bp = xlog_get_bp(log, 1); | 540 | bp = xlog_get_bp(log, 1); |
546 | if (!bp) | 541 | if (!bp) |
547 | return ENOMEM; | 542 | return ENOMEM; |
548 | if ((error = xlog_bread(log, 0, 1, bp))) | 543 | if ((error = xlog_bread(log, 0, 1, bp))) |
549 | goto bp_err; | 544 | goto bp_err; |
550 | offset = xlog_align(log, 0, 1, bp); | 545 | offset = xlog_align(log, 0, 1, bp); |
551 | first_half_cycle = xlog_get_cycle(offset); | 546 | first_half_cycle = xlog_get_cycle(offset); |
552 | 547 | ||
553 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ | 548 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ |
554 | if ((error = xlog_bread(log, last_blk, 1, bp))) | 549 | if ((error = xlog_bread(log, last_blk, 1, bp))) |
555 | goto bp_err; | 550 | goto bp_err; |
556 | offset = xlog_align(log, last_blk, 1, bp); | 551 | offset = xlog_align(log, last_blk, 1, bp); |
557 | last_half_cycle = xlog_get_cycle(offset); | 552 | last_half_cycle = xlog_get_cycle(offset); |
558 | ASSERT(last_half_cycle != 0); | 553 | ASSERT(last_half_cycle != 0); |
559 | 554 | ||
560 | /* | 555 | /* |
561 | * If the 1st half cycle number is equal to the last half cycle number, | 556 | * If the 1st half cycle number is equal to the last half cycle number, |
562 | * then the entire log is stamped with the same cycle number. In this | 557 | * then the entire log is stamped with the same cycle number. In this |
563 | * case, head_blk can't be set to zero (which makes sense). The below | 558 | * case, head_blk can't be set to zero (which makes sense). The below |
564 | * math doesn't work out properly with head_blk equal to zero. Instead, | 559 | * math doesn't work out properly with head_blk equal to zero. Instead, |
565 | * we set it to log_bbnum which is an invalid block number, but this | 560 | * we set it to log_bbnum which is an invalid block number, but this |
566 | * value makes the math correct. If head_blk doesn't changed through | 561 | * value makes the math correct. If head_blk doesn't changed through |
567 | * all the tests below, *head_blk is set to zero at the very end rather | 562 | * all the tests below, *head_blk is set to zero at the very end rather |
568 | * than log_bbnum. In a sense, log_bbnum and zero are the same block | 563 | * than log_bbnum. In a sense, log_bbnum and zero are the same block |
569 | * in a circular file. | 564 | * in a circular file. |
570 | */ | 565 | */ |
571 | if (first_half_cycle == last_half_cycle) { | 566 | if (first_half_cycle == last_half_cycle) { |
572 | /* | 567 | /* |
573 | * In this case we believe that the entire log should have | 568 | * In this case we believe that the entire log should have |
574 | * cycle number last_half_cycle. We need to scan backwards | 569 | * cycle number last_half_cycle. We need to scan backwards |
575 | * from the end verifying that there are no holes still | 570 | * from the end verifying that there are no holes still |
576 | * containing last_half_cycle - 1. If we find such a hole, | 571 | * containing last_half_cycle - 1. If we find such a hole, |
577 | * then the start of that hole will be the new head. The | 572 | * then the start of that hole will be the new head. The |
578 | * simple case looks like | 573 | * simple case looks like |
579 | * x | x ... | x - 1 | x | 574 | * x | x ... | x - 1 | x |
580 | * Another case that fits this picture would be | 575 | * Another case that fits this picture would be |
581 | * x | x + 1 | x ... | x | 576 | * x | x + 1 | x ... | x |
582 | * In this case the head really is somewhere at the end of the | 577 | * In this case the head really is somewhere at the end of the |
583 | * log, as one of the latest writes at the beginning was | 578 | * log, as one of the latest writes at the beginning was |
584 | * incomplete. | 579 | * incomplete. |
585 | * One more case is | 580 | * One more case is |
586 | * x | x + 1 | x ... | x - 1 | x | 581 | * x | x + 1 | x ... | x - 1 | x |
587 | * This is really the combination of the above two cases, and | 582 | * This is really the combination of the above two cases, and |
588 | * the head has to end up at the start of the x-1 hole at the | 583 | * the head has to end up at the start of the x-1 hole at the |
589 | * end of the log. | 584 | * end of the log. |
590 | * | 585 | * |
591 | * In the 256k log case, we will read from the beginning to the | 586 | * In the 256k log case, we will read from the beginning to the |
592 | * end of the log and search for cycle numbers equal to x-1. | 587 | * end of the log and search for cycle numbers equal to x-1. |
593 | * We don't worry about the x+1 blocks that we encounter, | 588 | * We don't worry about the x+1 blocks that we encounter, |
594 | * because we know that they cannot be the head since the log | 589 | * because we know that they cannot be the head since the log |
595 | * started with x. | 590 | * started with x. |
596 | */ | 591 | */ |
597 | head_blk = log_bbnum; | 592 | head_blk = log_bbnum; |
598 | stop_on_cycle = last_half_cycle - 1; | 593 | stop_on_cycle = last_half_cycle - 1; |
599 | } else { | 594 | } else { |
600 | /* | 595 | /* |
601 | * In this case we want to find the first block with cycle | 596 | * In this case we want to find the first block with cycle |
602 | * number matching last_half_cycle. We expect the log to be | 597 | * number matching last_half_cycle. We expect the log to be |
603 | * some variation on | 598 | * some variation on |
604 | * x + 1 ... | x ... | 599 | * x + 1 ... | x ... |
605 | * The first block with cycle number x (last_half_cycle) will | 600 | * The first block with cycle number x (last_half_cycle) will |
606 | * be where the new head belongs. First we do a binary search | 601 | * be where the new head belongs. First we do a binary search |
607 | * for the first occurrence of last_half_cycle. The binary | 602 | * for the first occurrence of last_half_cycle. The binary |
608 | * search may not be totally accurate, so then we scan back | 603 | * search may not be totally accurate, so then we scan back |
609 | * from there looking for occurrences of last_half_cycle before | 604 | * from there looking for occurrences of last_half_cycle before |
610 | * us. If that backwards scan wraps around the beginning of | 605 | * us. If that backwards scan wraps around the beginning of |
611 | * the log, then we look for occurrences of last_half_cycle - 1 | 606 | * the log, then we look for occurrences of last_half_cycle - 1 |
612 | * at the end of the log. The cases we're looking for look | 607 | * at the end of the log. The cases we're looking for look |
613 | * like | 608 | * like |
614 | * x + 1 ... | x | x + 1 | x ... | 609 | * x + 1 ... | x | x + 1 | x ... |
615 | * ^ binary search stopped here | 610 | * ^ binary search stopped here |
616 | * or | 611 | * or |
617 | * x + 1 ... | x ... | x - 1 | x | 612 | * x + 1 ... | x ... | x - 1 | x |
618 | * <---------> less than scan distance | 613 | * <---------> less than scan distance |
619 | */ | 614 | */ |
620 | stop_on_cycle = last_half_cycle; | 615 | stop_on_cycle = last_half_cycle; |
621 | if ((error = xlog_find_cycle_start(log, bp, first_blk, | 616 | if ((error = xlog_find_cycle_start(log, bp, first_blk, |
622 | &head_blk, last_half_cycle))) | 617 | &head_blk, last_half_cycle))) |
623 | goto bp_err; | 618 | goto bp_err; |
624 | } | 619 | } |
625 | 620 | ||
626 | /* | 621 | /* |
627 | * Now validate the answer. Scan back some number of maximum possible | 622 | * Now validate the answer. Scan back some number of maximum possible |
628 | * blocks and make sure each one has the expected cycle number. The | 623 | * blocks and make sure each one has the expected cycle number. The |
629 | * maximum is determined by the total possible amount of buffering | 624 | * maximum is determined by the total possible amount of buffering |
630 | * in the in-core log. The following number can be made tighter if | 625 | * in the in-core log. The following number can be made tighter if |
631 | * we actually look at the block size of the filesystem. | 626 | * we actually look at the block size of the filesystem. |
632 | */ | 627 | */ |
633 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); | 628 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
634 | if (head_blk >= num_scan_bblks) { | 629 | if (head_blk >= num_scan_bblks) { |
635 | /* | 630 | /* |
636 | * We are guaranteed that the entire check can be performed | 631 | * We are guaranteed that the entire check can be performed |
637 | * in one buffer. | 632 | * in one buffer. |
638 | */ | 633 | */ |
639 | start_blk = head_blk - num_scan_bblks; | 634 | start_blk = head_blk - num_scan_bblks; |
640 | if ((error = xlog_find_verify_cycle(log, | 635 | if ((error = xlog_find_verify_cycle(log, |
641 | start_blk, num_scan_bblks, | 636 | start_blk, num_scan_bblks, |
642 | stop_on_cycle, &new_blk))) | 637 | stop_on_cycle, &new_blk))) |
643 | goto bp_err; | 638 | goto bp_err; |
644 | if (new_blk != -1) | 639 | if (new_blk != -1) |
645 | head_blk = new_blk; | 640 | head_blk = new_blk; |
646 | } else { /* need to read 2 parts of log */ | 641 | } else { /* need to read 2 parts of log */ |
647 | /* | 642 | /* |
648 | * We are going to scan backwards in the log in two parts. | 643 | * We are going to scan backwards in the log in two parts. |
649 | * First we scan the physical end of the log. In this part | 644 | * First we scan the physical end of the log. In this part |
650 | * of the log, we are looking for blocks with cycle number | 645 | * of the log, we are looking for blocks with cycle number |
651 | * last_half_cycle - 1. | 646 | * last_half_cycle - 1. |
652 | * If we find one, then we know that the log starts there, as | 647 | * If we find one, then we know that the log starts there, as |
653 | * we've found a hole that didn't get written in going around | 648 | * we've found a hole that didn't get written in going around |
654 | * the end of the physical log. The simple case for this is | 649 | * the end of the physical log. The simple case for this is |
655 | * x + 1 ... | x ... | x - 1 | x | 650 | * x + 1 ... | x ... | x - 1 | x |
656 | * <---------> less than scan distance | 651 | * <---------> less than scan distance |
657 | * If all of the blocks at the end of the log have cycle number | 652 | * If all of the blocks at the end of the log have cycle number |
658 | * last_half_cycle, then we check the blocks at the start of | 653 | * last_half_cycle, then we check the blocks at the start of |
659 | * the log looking for occurrences of last_half_cycle. If we | 654 | * the log looking for occurrences of last_half_cycle. If we |
660 | * find one, then our current estimate for the location of the | 655 | * find one, then our current estimate for the location of the |
661 | * first occurrence of last_half_cycle is wrong and we move | 656 | * first occurrence of last_half_cycle is wrong and we move |
662 | * back to the hole we've found. This case looks like | 657 | * back to the hole we've found. This case looks like |
663 | * x + 1 ... | x | x + 1 | x ... | 658 | * x + 1 ... | x | x + 1 | x ... |
664 | * ^ binary search stopped here | 659 | * ^ binary search stopped here |
665 | * Another case we need to handle that only occurs in 256k | 660 | * Another case we need to handle that only occurs in 256k |
666 | * logs is | 661 | * logs is |
667 | * x + 1 ... | x ... | x+1 | x ... | 662 | * x + 1 ... | x ... | x+1 | x ... |
668 | * ^ binary search stops here | 663 | * ^ binary search stops here |
669 | * In a 256k log, the scan at the end of the log will see the | 664 | * In a 256k log, the scan at the end of the log will see the |
670 | * x + 1 blocks. We need to skip past those since that is | 665 | * x + 1 blocks. We need to skip past those since that is |
671 | * certainly not the head of the log. By searching for | 666 | * certainly not the head of the log. By searching for |
672 | * last_half_cycle-1 we accomplish that. | 667 | * last_half_cycle-1 we accomplish that. |
673 | */ | 668 | */ |
674 | start_blk = log_bbnum - num_scan_bblks + head_blk; | 669 | start_blk = log_bbnum - num_scan_bblks + head_blk; |
675 | ASSERT(head_blk <= INT_MAX && | 670 | ASSERT(head_blk <= INT_MAX && |
676 | (xfs_daddr_t) num_scan_bblks - head_blk >= 0); | 671 | (xfs_daddr_t) num_scan_bblks - head_blk >= 0); |
677 | if ((error = xlog_find_verify_cycle(log, start_blk, | 672 | if ((error = xlog_find_verify_cycle(log, start_blk, |
678 | num_scan_bblks - (int)head_blk, | 673 | num_scan_bblks - (int)head_blk, |
679 | (stop_on_cycle - 1), &new_blk))) | 674 | (stop_on_cycle - 1), &new_blk))) |
680 | goto bp_err; | 675 | goto bp_err; |
681 | if (new_blk != -1) { | 676 | if (new_blk != -1) { |
682 | head_blk = new_blk; | 677 | head_blk = new_blk; |
683 | goto bad_blk; | 678 | goto bad_blk; |
684 | } | 679 | } |
685 | 680 | ||
686 | /* | 681 | /* |
687 | * Scan beginning of log now. The last part of the physical | 682 | * Scan beginning of log now. The last part of the physical |
688 | * log is good. This scan needs to verify that it doesn't find | 683 | * log is good. This scan needs to verify that it doesn't find |
689 | * the last_half_cycle. | 684 | * the last_half_cycle. |
690 | */ | 685 | */ |
691 | start_blk = 0; | 686 | start_blk = 0; |
692 | ASSERT(head_blk <= INT_MAX); | 687 | ASSERT(head_blk <= INT_MAX); |
693 | if ((error = xlog_find_verify_cycle(log, | 688 | if ((error = xlog_find_verify_cycle(log, |
694 | start_blk, (int)head_blk, | 689 | start_blk, (int)head_blk, |
695 | stop_on_cycle, &new_blk))) | 690 | stop_on_cycle, &new_blk))) |
696 | goto bp_err; | 691 | goto bp_err; |
697 | if (new_blk != -1) | 692 | if (new_blk != -1) |
698 | head_blk = new_blk; | 693 | head_blk = new_blk; |
699 | } | 694 | } |
700 | 695 | ||
701 | bad_blk: | 696 | bad_blk: |
702 | /* | 697 | /* |
703 | * Now we need to make sure head_blk is not pointing to a block in | 698 | * Now we need to make sure head_blk is not pointing to a block in |
704 | * the middle of a log record. | 699 | * the middle of a log record. |
705 | */ | 700 | */ |
706 | num_scan_bblks = XLOG_REC_SHIFT(log); | 701 | num_scan_bblks = XLOG_REC_SHIFT(log); |
707 | if (head_blk >= num_scan_bblks) { | 702 | if (head_blk >= num_scan_bblks) { |
708 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ | 703 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ |
709 | 704 | ||
710 | /* start ptr at last block ptr before head_blk */ | 705 | /* start ptr at last block ptr before head_blk */ |
711 | if ((error = xlog_find_verify_log_record(log, start_blk, | 706 | if ((error = xlog_find_verify_log_record(log, start_blk, |
712 | &head_blk, 0)) == -1) { | 707 | &head_blk, 0)) == -1) { |
713 | error = XFS_ERROR(EIO); | 708 | error = XFS_ERROR(EIO); |
714 | goto bp_err; | 709 | goto bp_err; |
715 | } else if (error) | 710 | } else if (error) |
716 | goto bp_err; | 711 | goto bp_err; |
717 | } else { | 712 | } else { |
718 | start_blk = 0; | 713 | start_blk = 0; |
719 | ASSERT(head_blk <= INT_MAX); | 714 | ASSERT(head_blk <= INT_MAX); |
720 | if ((error = xlog_find_verify_log_record(log, start_blk, | 715 | if ((error = xlog_find_verify_log_record(log, start_blk, |
721 | &head_blk, 0)) == -1) { | 716 | &head_blk, 0)) == -1) { |
722 | /* We hit the beginning of the log during our search */ | 717 | /* We hit the beginning of the log during our search */ |
723 | start_blk = log_bbnum - num_scan_bblks + head_blk; | 718 | start_blk = log_bbnum - num_scan_bblks + head_blk; |
724 | new_blk = log_bbnum; | 719 | new_blk = log_bbnum; |
725 | ASSERT(start_blk <= INT_MAX && | 720 | ASSERT(start_blk <= INT_MAX && |
726 | (xfs_daddr_t) log_bbnum-start_blk >= 0); | 721 | (xfs_daddr_t) log_bbnum-start_blk >= 0); |
727 | ASSERT(head_blk <= INT_MAX); | 722 | ASSERT(head_blk <= INT_MAX); |
728 | if ((error = xlog_find_verify_log_record(log, | 723 | if ((error = xlog_find_verify_log_record(log, |
729 | start_blk, &new_blk, | 724 | start_blk, &new_blk, |
730 | (int)head_blk)) == -1) { | 725 | (int)head_blk)) == -1) { |
731 | error = XFS_ERROR(EIO); | 726 | error = XFS_ERROR(EIO); |
732 | goto bp_err; | 727 | goto bp_err; |
733 | } else if (error) | 728 | } else if (error) |
734 | goto bp_err; | 729 | goto bp_err; |
735 | if (new_blk != log_bbnum) | 730 | if (new_blk != log_bbnum) |
736 | head_blk = new_blk; | 731 | head_blk = new_blk; |
737 | } else if (error) | 732 | } else if (error) |
738 | goto bp_err; | 733 | goto bp_err; |
739 | } | 734 | } |
740 | 735 | ||
741 | xlog_put_bp(bp); | 736 | xlog_put_bp(bp); |
742 | if (head_blk == log_bbnum) | 737 | if (head_blk == log_bbnum) |
743 | *return_head_blk = 0; | 738 | *return_head_blk = 0; |
744 | else | 739 | else |
745 | *return_head_blk = head_blk; | 740 | *return_head_blk = head_blk; |
746 | /* | 741 | /* |
747 | * When returning here, we have a good block number. Bad block | 742 | * When returning here, we have a good block number. Bad block |
748 | * means that during a previous crash, we didn't have a clean break | 743 | * means that during a previous crash, we didn't have a clean break |
749 | * from cycle number N to cycle number N-1. In this case, we need | 744 | * from cycle number N to cycle number N-1. In this case, we need |
750 | * to find the first block with cycle number N-1. | 745 | * to find the first block with cycle number N-1. |
751 | */ | 746 | */ |
752 | return 0; | 747 | return 0; |
753 | 748 | ||
754 | bp_err: | 749 | bp_err: |
755 | xlog_put_bp(bp); | 750 | xlog_put_bp(bp); |
756 | 751 | ||
757 | if (error) | 752 | if (error) |
758 | xlog_warn("XFS: failed to find log head"); | 753 | xlog_warn("XFS: failed to find log head"); |
759 | return error; | 754 | return error; |
760 | } | 755 | } |
761 | 756 | ||
762 | /* | 757 | /* |
763 | * Find the sync block number or the tail of the log. | 758 | * Find the sync block number or the tail of the log. |
764 | * | 759 | * |
765 | * This will be the block number of the last record to have its | 760 | * This will be the block number of the last record to have its |
766 | * associated buffers synced to disk. Every log record header has | 761 | * associated buffers synced to disk. Every log record header has |
767 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy | 762 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy |
768 | * to get a sync block number. The only concern is to figure out which | 763 | * to get a sync block number. The only concern is to figure out which |
769 | * log record header to believe. | 764 | * log record header to believe. |
770 | * | 765 | * |
771 | * The following algorithm uses the log record header with the largest | 766 | * The following algorithm uses the log record header with the largest |
772 | * lsn. The entire log record does not need to be valid. We only care | 767 | * lsn. The entire log record does not need to be valid. We only care |
773 | * that the header is valid. | 768 | * that the header is valid. |
774 | * | 769 | * |
775 | * We could speed up search by using current head_blk buffer, but it is not | 770 | * We could speed up search by using current head_blk buffer, but it is not |
776 | * available. | 771 | * available. |
777 | */ | 772 | */ |
778 | int | 773 | int |
779 | xlog_find_tail( | 774 | xlog_find_tail( |
780 | xlog_t *log, | 775 | xlog_t *log, |
781 | xfs_daddr_t *head_blk, | 776 | xfs_daddr_t *head_blk, |
782 | xfs_daddr_t *tail_blk) | 777 | xfs_daddr_t *tail_blk) |
783 | { | 778 | { |
784 | xlog_rec_header_t *rhead; | 779 | xlog_rec_header_t *rhead; |
785 | xlog_op_header_t *op_head; | 780 | xlog_op_header_t *op_head; |
786 | xfs_caddr_t offset = NULL; | 781 | xfs_caddr_t offset = NULL; |
787 | xfs_buf_t *bp; | 782 | xfs_buf_t *bp; |
788 | int error, i, found; | 783 | int error, i, found; |
789 | xfs_daddr_t umount_data_blk; | 784 | xfs_daddr_t umount_data_blk; |
790 | xfs_daddr_t after_umount_blk; | 785 | xfs_daddr_t after_umount_blk; |
791 | xfs_lsn_t tail_lsn; | 786 | xfs_lsn_t tail_lsn; |
792 | int hblks; | 787 | int hblks; |
793 | 788 | ||
794 | found = 0; | 789 | found = 0; |
795 | 790 | ||
796 | /* | 791 | /* |
797 | * Find previous log record | 792 | * Find previous log record |
798 | */ | 793 | */ |
799 | if ((error = xlog_find_head(log, head_blk))) | 794 | if ((error = xlog_find_head(log, head_blk))) |
800 | return error; | 795 | return error; |
801 | 796 | ||
802 | bp = xlog_get_bp(log, 1); | 797 | bp = xlog_get_bp(log, 1); |
803 | if (!bp) | 798 | if (!bp) |
804 | return ENOMEM; | 799 | return ENOMEM; |
805 | if (*head_blk == 0) { /* special case */ | 800 | if (*head_blk == 0) { /* special case */ |
806 | if ((error = xlog_bread(log, 0, 1, bp))) | 801 | if ((error = xlog_bread(log, 0, 1, bp))) |
807 | goto bread_err; | 802 | goto bread_err; |
808 | offset = xlog_align(log, 0, 1, bp); | 803 | offset = xlog_align(log, 0, 1, bp); |
809 | if (xlog_get_cycle(offset) == 0) { | 804 | if (xlog_get_cycle(offset) == 0) { |
810 | *tail_blk = 0; | 805 | *tail_blk = 0; |
811 | /* leave all other log inited values alone */ | 806 | /* leave all other log inited values alone */ |
812 | goto exit; | 807 | goto exit; |
813 | } | 808 | } |
814 | } | 809 | } |
815 | 810 | ||
816 | /* | 811 | /* |
817 | * Search backwards looking for log record header block | 812 | * Search backwards looking for log record header block |
818 | */ | 813 | */ |
819 | ASSERT(*head_blk < INT_MAX); | 814 | ASSERT(*head_blk < INT_MAX); |
820 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { | 815 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { |
821 | if ((error = xlog_bread(log, i, 1, bp))) | 816 | if ((error = xlog_bread(log, i, 1, bp))) |
822 | goto bread_err; | 817 | goto bread_err; |
823 | offset = xlog_align(log, i, 1, bp); | 818 | offset = xlog_align(log, i, 1, bp); |
824 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { | 819 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { |
825 | found = 1; | 820 | found = 1; |
826 | break; | 821 | break; |
827 | } | 822 | } |
828 | } | 823 | } |
829 | /* | 824 | /* |
830 | * If we haven't found the log record header block, start looking | 825 | * If we haven't found the log record header block, start looking |
831 | * again from the end of the physical log. XXXmiken: There should be | 826 | * again from the end of the physical log. XXXmiken: There should be |
832 | * a check here to make sure we didn't search more than N blocks in | 827 | * a check here to make sure we didn't search more than N blocks in |
833 | * the previous code. | 828 | * the previous code. |
834 | */ | 829 | */ |
835 | if (!found) { | 830 | if (!found) { |
836 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { | 831 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { |
837 | if ((error = xlog_bread(log, i, 1, bp))) | 832 | if ((error = xlog_bread(log, i, 1, bp))) |
838 | goto bread_err; | 833 | goto bread_err; |
839 | offset = xlog_align(log, i, 1, bp); | 834 | offset = xlog_align(log, i, 1, bp); |
840 | if (XLOG_HEADER_MAGIC_NUM == | 835 | if (XLOG_HEADER_MAGIC_NUM == |
841 | be32_to_cpu(*(__be32 *)offset)) { | 836 | be32_to_cpu(*(__be32 *)offset)) { |
842 | found = 2; | 837 | found = 2; |
843 | break; | 838 | break; |
844 | } | 839 | } |
845 | } | 840 | } |
846 | } | 841 | } |
847 | if (!found) { | 842 | if (!found) { |
848 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); | 843 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); |
849 | ASSERT(0); | 844 | ASSERT(0); |
850 | return XFS_ERROR(EIO); | 845 | return XFS_ERROR(EIO); |
851 | } | 846 | } |
852 | 847 | ||
853 | /* find blk_no of tail of log */ | 848 | /* find blk_no of tail of log */ |
854 | rhead = (xlog_rec_header_t *)offset; | 849 | rhead = (xlog_rec_header_t *)offset; |
855 | *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); | 850 | *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); |
856 | 851 | ||
857 | /* | 852 | /* |
858 | * Reset log values according to the state of the log when we | 853 | * Reset log values according to the state of the log when we |
859 | * crashed. In the case where head_blk == 0, we bump curr_cycle | 854 | * crashed. In the case where head_blk == 0, we bump curr_cycle |
860 | * one because the next write starts a new cycle rather than | 855 | * one because the next write starts a new cycle rather than |
861 | * continuing the cycle of the last good log record. At this | 856 | * continuing the cycle of the last good log record. At this |
862 | * point we have guaranteed that all partial log records have been | 857 | * point we have guaranteed that all partial log records have been |
863 | * accounted for. Therefore, we know that the last good log record | 858 | * accounted for. Therefore, we know that the last good log record |
864 | * written was complete and ended exactly on the end boundary | 859 | * written was complete and ended exactly on the end boundary |
865 | * of the physical log. | 860 | * of the physical log. |
866 | */ | 861 | */ |
867 | log->l_prev_block = i; | 862 | log->l_prev_block = i; |
868 | log->l_curr_block = (int)*head_blk; | 863 | log->l_curr_block = (int)*head_blk; |
869 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); | 864 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); |
870 | if (found == 2) | 865 | if (found == 2) |
871 | log->l_curr_cycle++; | 866 | log->l_curr_cycle++; |
872 | log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); | 867 | log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); |
873 | log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); | 868 | log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); |
874 | log->l_grant_reserve_cycle = log->l_curr_cycle; | 869 | log->l_grant_reserve_cycle = log->l_curr_cycle; |
875 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | 870 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); |
876 | log->l_grant_write_cycle = log->l_curr_cycle; | 871 | log->l_grant_write_cycle = log->l_curr_cycle; |
877 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | 872 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); |
878 | 873 | ||
879 | /* | 874 | /* |
880 | * Look for unmount record. If we find it, then we know there | 875 | * Look for unmount record. If we find it, then we know there |
881 | * was a clean unmount. Since 'i' could be the last block in | 876 | * was a clean unmount. Since 'i' could be the last block in |
882 | * the physical log, we convert to a log block before comparing | 877 | * the physical log, we convert to a log block before comparing |
883 | * to the head_blk. | 878 | * to the head_blk. |
884 | * | 879 | * |
885 | * Save the current tail lsn to use to pass to | 880 | * Save the current tail lsn to use to pass to |
886 | * xlog_clear_stale_blocks() below. We won't want to clear the | 881 | * xlog_clear_stale_blocks() below. We won't want to clear the |
887 | * unmount record if there is one, so we pass the lsn of the | 882 | * unmount record if there is one, so we pass the lsn of the |
888 | * unmount record rather than the block after it. | 883 | * unmount record rather than the block after it. |
889 | */ | 884 | */ |
890 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 885 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
891 | int h_size = be32_to_cpu(rhead->h_size); | 886 | int h_size = be32_to_cpu(rhead->h_size); |
892 | int h_version = be32_to_cpu(rhead->h_version); | 887 | int h_version = be32_to_cpu(rhead->h_version); |
893 | 888 | ||
894 | if ((h_version & XLOG_VERSION_2) && | 889 | if ((h_version & XLOG_VERSION_2) && |
895 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | 890 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
896 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | 891 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
897 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | 892 | if (h_size % XLOG_HEADER_CYCLE_SIZE) |
898 | hblks++; | 893 | hblks++; |
899 | } else { | 894 | } else { |
900 | hblks = 1; | 895 | hblks = 1; |
901 | } | 896 | } |
902 | } else { | 897 | } else { |
903 | hblks = 1; | 898 | hblks = 1; |
904 | } | 899 | } |
905 | after_umount_blk = (i + hblks + (int) | 900 | after_umount_blk = (i + hblks + (int) |
906 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; | 901 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; |
907 | tail_lsn = log->l_tail_lsn; | 902 | tail_lsn = log->l_tail_lsn; |
908 | if (*head_blk == after_umount_blk && | 903 | if (*head_blk == after_umount_blk && |
909 | be32_to_cpu(rhead->h_num_logops) == 1) { | 904 | be32_to_cpu(rhead->h_num_logops) == 1) { |
910 | umount_data_blk = (i + hblks) % log->l_logBBsize; | 905 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
911 | if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { | 906 | if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { |
912 | goto bread_err; | 907 | goto bread_err; |
913 | } | 908 | } |
914 | offset = xlog_align(log, umount_data_blk, 1, bp); | 909 | offset = xlog_align(log, umount_data_blk, 1, bp); |
915 | op_head = (xlog_op_header_t *)offset; | 910 | op_head = (xlog_op_header_t *)offset; |
916 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { | 911 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { |
917 | /* | 912 | /* |
918 | * Set tail and last sync so that newly written | 913 | * Set tail and last sync so that newly written |
919 | * log records will point recovery to after the | 914 | * log records will point recovery to after the |
920 | * current unmount record. | 915 | * current unmount record. |
921 | */ | 916 | */ |
922 | log->l_tail_lsn = | 917 | log->l_tail_lsn = |
923 | xlog_assign_lsn(log->l_curr_cycle, | 918 | xlog_assign_lsn(log->l_curr_cycle, |
924 | after_umount_blk); | 919 | after_umount_blk); |
925 | log->l_last_sync_lsn = | 920 | log->l_last_sync_lsn = |
926 | xlog_assign_lsn(log->l_curr_cycle, | 921 | xlog_assign_lsn(log->l_curr_cycle, |
927 | after_umount_blk); | 922 | after_umount_blk); |
928 | *tail_blk = after_umount_blk; | 923 | *tail_blk = after_umount_blk; |
929 | 924 | ||
930 | /* | 925 | /* |
931 | * Note that the unmount was clean. If the unmount | 926 | * Note that the unmount was clean. If the unmount |
932 | * was not clean, we need to know this to rebuild the | 927 | * was not clean, we need to know this to rebuild the |
933 | * superblock counters from the perag headers if we | 928 | * superblock counters from the perag headers if we |
934 | * have a filesystem using non-persistent counters. | 929 | * have a filesystem using non-persistent counters. |
935 | */ | 930 | */ |
936 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; | 931 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; |
937 | } | 932 | } |
938 | } | 933 | } |
939 | 934 | ||
940 | /* | 935 | /* |
941 | * Make sure that there are no blocks in front of the head | 936 | * Make sure that there are no blocks in front of the head |
942 | * with the same cycle number as the head. This can happen | 937 | * with the same cycle number as the head. This can happen |
943 | * because we allow multiple outstanding log writes concurrently, | 938 | * because we allow multiple outstanding log writes concurrently, |
944 | * and the later writes might make it out before earlier ones. | 939 | * and the later writes might make it out before earlier ones. |
945 | * | 940 | * |
946 | * We use the lsn from before modifying it so that we'll never | 941 | * We use the lsn from before modifying it so that we'll never |
947 | * overwrite the unmount record after a clean unmount. | 942 | * overwrite the unmount record after a clean unmount. |
948 | * | 943 | * |
949 | * Do this only if we are going to recover the filesystem | 944 | * Do this only if we are going to recover the filesystem |
950 | * | 945 | * |
951 | * NOTE: This used to say "if (!readonly)" | 946 | * NOTE: This used to say "if (!readonly)" |
952 | * However on Linux, we can & do recover a read-only filesystem. | 947 | * However on Linux, we can & do recover a read-only filesystem. |
953 | * We only skip recovery if NORECOVERY is specified on mount, | 948 | * We only skip recovery if NORECOVERY is specified on mount, |
954 | * in which case we would not be here. | 949 | * in which case we would not be here. |
955 | * | 950 | * |
956 | * But... if the -device- itself is readonly, just skip this. | 951 | * But... if the -device- itself is readonly, just skip this. |
957 | * We can't recover this device anyway, so it won't matter. | 952 | * We can't recover this device anyway, so it won't matter. |
958 | */ | 953 | */ |
959 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { | 954 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { |
960 | error = xlog_clear_stale_blocks(log, tail_lsn); | 955 | error = xlog_clear_stale_blocks(log, tail_lsn); |
961 | } | 956 | } |
962 | 957 | ||
963 | bread_err: | 958 | bread_err: |
964 | exit: | 959 | exit: |
965 | xlog_put_bp(bp); | 960 | xlog_put_bp(bp); |
966 | 961 | ||
967 | if (error) | 962 | if (error) |
968 | xlog_warn("XFS: failed to locate log tail"); | 963 | xlog_warn("XFS: failed to locate log tail"); |
969 | return error; | 964 | return error; |
970 | } | 965 | } |
971 | 966 | ||
972 | /* | 967 | /* |
973 | * Is the log zeroed at all? | 968 | * Is the log zeroed at all? |
974 | * | 969 | * |
975 | * The last binary search should be changed to perform an X block read | 970 | * The last binary search should be changed to perform an X block read |
976 | * once X becomes small enough. You can then search linearly through | 971 | * once X becomes small enough. You can then search linearly through |
977 | * the X blocks. This will cut down on the number of reads we need to do. | 972 | * the X blocks. This will cut down on the number of reads we need to do. |
978 | * | 973 | * |
979 | * If the log is partially zeroed, this routine will pass back the blkno | 974 | * If the log is partially zeroed, this routine will pass back the blkno |
980 | * of the first block with cycle number 0. It won't have a complete LR | 975 | * of the first block with cycle number 0. It won't have a complete LR |
981 | * preceding it. | 976 | * preceding it. |
982 | * | 977 | * |
983 | * Return: | 978 | * Return: |
984 | * 0 => the log is completely written to | 979 | * 0 => the log is completely written to |
985 | * -1 => use *blk_no as the first block of the log | 980 | * -1 => use *blk_no as the first block of the log |
986 | * >0 => error has occurred | 981 | * >0 => error has occurred |
987 | */ | 982 | */ |
988 | STATIC int | 983 | STATIC int |
989 | xlog_find_zeroed( | 984 | xlog_find_zeroed( |
990 | xlog_t *log, | 985 | xlog_t *log, |
991 | xfs_daddr_t *blk_no) | 986 | xfs_daddr_t *blk_no) |
992 | { | 987 | { |
993 | xfs_buf_t *bp; | 988 | xfs_buf_t *bp; |
994 | xfs_caddr_t offset; | 989 | xfs_caddr_t offset; |
995 | uint first_cycle, last_cycle; | 990 | uint first_cycle, last_cycle; |
996 | xfs_daddr_t new_blk, last_blk, start_blk; | 991 | xfs_daddr_t new_blk, last_blk, start_blk; |
997 | xfs_daddr_t num_scan_bblks; | 992 | xfs_daddr_t num_scan_bblks; |
998 | int error, log_bbnum = log->l_logBBsize; | 993 | int error, log_bbnum = log->l_logBBsize; |
999 | 994 | ||
1000 | *blk_no = 0; | 995 | *blk_no = 0; |
1001 | 996 | ||
1002 | /* check totally zeroed log */ | 997 | /* check totally zeroed log */ |
1003 | bp = xlog_get_bp(log, 1); | 998 | bp = xlog_get_bp(log, 1); |
1004 | if (!bp) | 999 | if (!bp) |
1005 | return ENOMEM; | 1000 | return ENOMEM; |
1006 | if ((error = xlog_bread(log, 0, 1, bp))) | 1001 | if ((error = xlog_bread(log, 0, 1, bp))) |
1007 | goto bp_err; | 1002 | goto bp_err; |
1008 | offset = xlog_align(log, 0, 1, bp); | 1003 | offset = xlog_align(log, 0, 1, bp); |
1009 | first_cycle = xlog_get_cycle(offset); | 1004 | first_cycle = xlog_get_cycle(offset); |
1010 | if (first_cycle == 0) { /* completely zeroed log */ | 1005 | if (first_cycle == 0) { /* completely zeroed log */ |
1011 | *blk_no = 0; | 1006 | *blk_no = 0; |
1012 | xlog_put_bp(bp); | 1007 | xlog_put_bp(bp); |
1013 | return -1; | 1008 | return -1; |
1014 | } | 1009 | } |
1015 | 1010 | ||
1016 | /* check partially zeroed log */ | 1011 | /* check partially zeroed log */ |
1017 | if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) | 1012 | if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) |
1018 | goto bp_err; | 1013 | goto bp_err; |
1019 | offset = xlog_align(log, log_bbnum-1, 1, bp); | 1014 | offset = xlog_align(log, log_bbnum-1, 1, bp); |
1020 | last_cycle = xlog_get_cycle(offset); | 1015 | last_cycle = xlog_get_cycle(offset); |
1021 | if (last_cycle != 0) { /* log completely written to */ | 1016 | if (last_cycle != 0) { /* log completely written to */ |
1022 | xlog_put_bp(bp); | 1017 | xlog_put_bp(bp); |
1023 | return 0; | 1018 | return 0; |
1024 | } else if (first_cycle != 1) { | 1019 | } else if (first_cycle != 1) { |
1025 | /* | 1020 | /* |
1026 | * If the cycle of the last block is zero, the cycle of | 1021 | * If the cycle of the last block is zero, the cycle of |
1027 | * the first block must be 1. If it's not, maybe we're | 1022 | * the first block must be 1. If it's not, maybe we're |
1028 | * not looking at a log... Bail out. | 1023 | * not looking at a log... Bail out. |
1029 | */ | 1024 | */ |
1030 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); | 1025 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); |
1031 | return XFS_ERROR(EINVAL); | 1026 | return XFS_ERROR(EINVAL); |
1032 | } | 1027 | } |
1033 | 1028 | ||
1034 | /* we have a partially zeroed log */ | 1029 | /* we have a partially zeroed log */ |
1035 | last_blk = log_bbnum-1; | 1030 | last_blk = log_bbnum-1; |
1036 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) | 1031 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) |
1037 | goto bp_err; | 1032 | goto bp_err; |
1038 | 1033 | ||
1039 | /* | 1034 | /* |
1040 | * Validate the answer. Because there is no way to guarantee that | 1035 | * Validate the answer. Because there is no way to guarantee that |
1041 | * the entire log is made up of log records which are the same size, | 1036 | * the entire log is made up of log records which are the same size, |
1042 | * we scan over the defined maximum blocks. At this point, the maximum | 1037 | * we scan over the defined maximum blocks. At this point, the maximum |
1043 | * is not chosen to mean anything special. XXXmiken | 1038 | * is not chosen to mean anything special. XXXmiken |
1044 | */ | 1039 | */ |
1045 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); | 1040 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
1046 | ASSERT(num_scan_bblks <= INT_MAX); | 1041 | ASSERT(num_scan_bblks <= INT_MAX); |
1047 | 1042 | ||
1048 | if (last_blk < num_scan_bblks) | 1043 | if (last_blk < num_scan_bblks) |
1049 | num_scan_bblks = last_blk; | 1044 | num_scan_bblks = last_blk; |
1050 | start_blk = last_blk - num_scan_bblks; | 1045 | start_blk = last_blk - num_scan_bblks; |
1051 | 1046 | ||
1052 | /* | 1047 | /* |
1053 | * We search for any instances of cycle number 0 that occur before | 1048 | * We search for any instances of cycle number 0 that occur before |
1054 | * our current estimate of the head. What we're trying to detect is | 1049 | * our current estimate of the head. What we're trying to detect is |
1055 | * 1 ... | 0 | 1 | 0... | 1050 | * 1 ... | 0 | 1 | 0... |
1056 | * ^ binary search ends here | 1051 | * ^ binary search ends here |
1057 | */ | 1052 | */ |
1058 | if ((error = xlog_find_verify_cycle(log, start_blk, | 1053 | if ((error = xlog_find_verify_cycle(log, start_blk, |
1059 | (int)num_scan_bblks, 0, &new_blk))) | 1054 | (int)num_scan_bblks, 0, &new_blk))) |
1060 | goto bp_err; | 1055 | goto bp_err; |
1061 | if (new_blk != -1) | 1056 | if (new_blk != -1) |
1062 | last_blk = new_blk; | 1057 | last_blk = new_blk; |
1063 | 1058 | ||
1064 | /* | 1059 | /* |
1065 | * Potentially backup over partial log record write. We don't need | 1060 | * Potentially backup over partial log record write. We don't need |
1066 | * to search the end of the log because we know it is zero. | 1061 | * to search the end of the log because we know it is zero. |
1067 | */ | 1062 | */ |
1068 | if ((error = xlog_find_verify_log_record(log, start_blk, | 1063 | if ((error = xlog_find_verify_log_record(log, start_blk, |
1069 | &last_blk, 0)) == -1) { | 1064 | &last_blk, 0)) == -1) { |
1070 | error = XFS_ERROR(EIO); | 1065 | error = XFS_ERROR(EIO); |
1071 | goto bp_err; | 1066 | goto bp_err; |
1072 | } else if (error) | 1067 | } else if (error) |
1073 | goto bp_err; | 1068 | goto bp_err; |
1074 | 1069 | ||
1075 | *blk_no = last_blk; | 1070 | *blk_no = last_blk; |
1076 | bp_err: | 1071 | bp_err: |
1077 | xlog_put_bp(bp); | 1072 | xlog_put_bp(bp); |
1078 | if (error) | 1073 | if (error) |
1079 | return error; | 1074 | return error; |
1080 | return -1; | 1075 | return -1; |
1081 | } | 1076 | } |
1082 | 1077 | ||
1083 | /* | 1078 | /* |
1084 | * These are simple subroutines used by xlog_clear_stale_blocks() below | 1079 | * These are simple subroutines used by xlog_clear_stale_blocks() below |
1085 | * to initialize a buffer full of empty log record headers and write | 1080 | * to initialize a buffer full of empty log record headers and write |
1086 | * them into the log. | 1081 | * them into the log. |
1087 | */ | 1082 | */ |
1088 | STATIC void | 1083 | STATIC void |
1089 | xlog_add_record( | 1084 | xlog_add_record( |
1090 | xlog_t *log, | 1085 | xlog_t *log, |
1091 | xfs_caddr_t buf, | 1086 | xfs_caddr_t buf, |
1092 | int cycle, | 1087 | int cycle, |
1093 | int block, | 1088 | int block, |
1094 | int tail_cycle, | 1089 | int tail_cycle, |
1095 | int tail_block) | 1090 | int tail_block) |
1096 | { | 1091 | { |
1097 | xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; | 1092 | xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; |
1098 | 1093 | ||
1099 | memset(buf, 0, BBSIZE); | 1094 | memset(buf, 0, BBSIZE); |
1100 | recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); | 1095 | recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); |
1101 | recp->h_cycle = cpu_to_be32(cycle); | 1096 | recp->h_cycle = cpu_to_be32(cycle); |
1102 | recp->h_version = cpu_to_be32( | 1097 | recp->h_version = cpu_to_be32( |
1103 | xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); | 1098 | xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); |
1104 | recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); | 1099 | recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); |
1105 | recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); | 1100 | recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); |
1106 | recp->h_fmt = cpu_to_be32(XLOG_FMT); | 1101 | recp->h_fmt = cpu_to_be32(XLOG_FMT); |
1107 | memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); | 1102 | memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); |
1108 | } | 1103 | } |
1109 | 1104 | ||
1110 | STATIC int | 1105 | STATIC int |
1111 | xlog_write_log_records( | 1106 | xlog_write_log_records( |
1112 | xlog_t *log, | 1107 | xlog_t *log, |
1113 | int cycle, | 1108 | int cycle, |
1114 | int start_block, | 1109 | int start_block, |
1115 | int blocks, | 1110 | int blocks, |
1116 | int tail_cycle, | 1111 | int tail_cycle, |
1117 | int tail_block) | 1112 | int tail_block) |
1118 | { | 1113 | { |
1119 | xfs_caddr_t offset; | 1114 | xfs_caddr_t offset; |
1120 | xfs_buf_t *bp; | 1115 | xfs_buf_t *bp; |
1121 | int balign, ealign; | 1116 | int balign, ealign; |
1122 | int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); | 1117 | int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); |
1123 | int end_block = start_block + blocks; | 1118 | int end_block = start_block + blocks; |
1124 | int bufblks; | 1119 | int bufblks; |
1125 | int error = 0; | 1120 | int error = 0; |
1126 | int i, j = 0; | 1121 | int i, j = 0; |
1127 | 1122 | ||
1128 | bufblks = 1 << ffs(blocks); | 1123 | bufblks = 1 << ffs(blocks); |
1129 | while (!(bp = xlog_get_bp(log, bufblks))) { | 1124 | while (!(bp = xlog_get_bp(log, bufblks))) { |
1130 | bufblks >>= 1; | 1125 | bufblks >>= 1; |
1131 | if (bufblks <= log->l_sectbb_log) | 1126 | if (bufblks <= log->l_sectbb_log) |
1132 | return ENOMEM; | 1127 | return ENOMEM; |
1133 | } | 1128 | } |
1134 | 1129 | ||
1135 | /* We may need to do a read at the start to fill in part of | 1130 | /* We may need to do a read at the start to fill in part of |
1136 | * the buffer in the starting sector not covered by the first | 1131 | * the buffer in the starting sector not covered by the first |
1137 | * write below. | 1132 | * write below. |
1138 | */ | 1133 | */ |
1139 | balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); | 1134 | balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); |
1140 | if (balign != start_block) { | 1135 | if (balign != start_block) { |
1141 | if ((error = xlog_bread(log, start_block, 1, bp))) { | 1136 | if ((error = xlog_bread(log, start_block, 1, bp))) { |
1142 | xlog_put_bp(bp); | 1137 | xlog_put_bp(bp); |
1143 | return error; | 1138 | return error; |
1144 | } | 1139 | } |
1145 | j = start_block - balign; | 1140 | j = start_block - balign; |
1146 | } | 1141 | } |
1147 | 1142 | ||
1148 | for (i = start_block; i < end_block; i += bufblks) { | 1143 | for (i = start_block; i < end_block; i += bufblks) { |
1149 | int bcount, endcount; | 1144 | int bcount, endcount; |
1150 | 1145 | ||
1151 | bcount = min(bufblks, end_block - start_block); | 1146 | bcount = min(bufblks, end_block - start_block); |
1152 | endcount = bcount - j; | 1147 | endcount = bcount - j; |
1153 | 1148 | ||
1154 | /* We may need to do a read at the end to fill in part of | 1149 | /* We may need to do a read at the end to fill in part of |
1155 | * the buffer in the final sector not covered by the write. | 1150 | * the buffer in the final sector not covered by the write. |
1156 | * If this is the same sector as the above read, skip it. | 1151 | * If this is the same sector as the above read, skip it. |
1157 | */ | 1152 | */ |
1158 | ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); | 1153 | ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); |
1159 | if (j == 0 && (start_block + endcount > ealign)) { | 1154 | if (j == 0 && (start_block + endcount > ealign)) { |
1160 | offset = XFS_BUF_PTR(bp); | 1155 | offset = XFS_BUF_PTR(bp); |
1161 | balign = BBTOB(ealign - start_block); | 1156 | balign = BBTOB(ealign - start_block); |
1162 | error = XFS_BUF_SET_PTR(bp, offset + balign, | 1157 | error = XFS_BUF_SET_PTR(bp, offset + balign, |
1163 | BBTOB(sectbb)); | 1158 | BBTOB(sectbb)); |
1164 | if (!error) | 1159 | if (!error) |
1165 | error = xlog_bread(log, ealign, sectbb, bp); | 1160 | error = xlog_bread(log, ealign, sectbb, bp); |
1166 | if (!error) | 1161 | if (!error) |
1167 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); | 1162 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); |
1168 | if (error) | 1163 | if (error) |
1169 | break; | 1164 | break; |
1170 | } | 1165 | } |
1171 | 1166 | ||
1172 | offset = xlog_align(log, start_block, endcount, bp); | 1167 | offset = xlog_align(log, start_block, endcount, bp); |
1173 | for (; j < endcount; j++) { | 1168 | for (; j < endcount; j++) { |
1174 | xlog_add_record(log, offset, cycle, i+j, | 1169 | xlog_add_record(log, offset, cycle, i+j, |
1175 | tail_cycle, tail_block); | 1170 | tail_cycle, tail_block); |
1176 | offset += BBSIZE; | 1171 | offset += BBSIZE; |
1177 | } | 1172 | } |
1178 | error = xlog_bwrite(log, start_block, endcount, bp); | 1173 | error = xlog_bwrite(log, start_block, endcount, bp); |
1179 | if (error) | 1174 | if (error) |
1180 | break; | 1175 | break; |
1181 | start_block += endcount; | 1176 | start_block += endcount; |
1182 | j = 0; | 1177 | j = 0; |
1183 | } | 1178 | } |
1184 | xlog_put_bp(bp); | 1179 | xlog_put_bp(bp); |
1185 | return error; | 1180 | return error; |
1186 | } | 1181 | } |
1187 | 1182 | ||
1188 | /* | 1183 | /* |
1189 | * This routine is called to blow away any incomplete log writes out | 1184 | * This routine is called to blow away any incomplete log writes out |
1190 | * in front of the log head. We do this so that we won't become confused | 1185 | * in front of the log head. We do this so that we won't become confused |
1191 | * if we come up, write only a little bit more, and then crash again. | 1186 | * if we come up, write only a little bit more, and then crash again. |
1192 | * If we leave the partial log records out there, this situation could | 1187 | * If we leave the partial log records out there, this situation could |
1193 | * cause us to think those partial writes are valid blocks since they | 1188 | * cause us to think those partial writes are valid blocks since they |
1194 | * have the current cycle number. We get rid of them by overwriting them | 1189 | * have the current cycle number. We get rid of them by overwriting them |
1195 | * with empty log records with the old cycle number rather than the | 1190 | * with empty log records with the old cycle number rather than the |
1196 | * current one. | 1191 | * current one. |
1197 | * | 1192 | * |
1198 | * The tail lsn is passed in rather than taken from | 1193 | * The tail lsn is passed in rather than taken from |
1199 | * the log so that we will not write over the unmount record after a | 1194 | * the log so that we will not write over the unmount record after a |
1200 | * clean unmount in a 512 block log. Doing so would leave the log without | 1195 | * clean unmount in a 512 block log. Doing so would leave the log without |
1201 | * any valid log records in it until a new one was written. If we crashed | 1196 | * any valid log records in it until a new one was written. If we crashed |
1202 | * during that time we would not be able to recover. | 1197 | * during that time we would not be able to recover. |
1203 | */ | 1198 | */ |
1204 | STATIC int | 1199 | STATIC int |
1205 | xlog_clear_stale_blocks( | 1200 | xlog_clear_stale_blocks( |
1206 | xlog_t *log, | 1201 | xlog_t *log, |
1207 | xfs_lsn_t tail_lsn) | 1202 | xfs_lsn_t tail_lsn) |
1208 | { | 1203 | { |
1209 | int tail_cycle, head_cycle; | 1204 | int tail_cycle, head_cycle; |
1210 | int tail_block, head_block; | 1205 | int tail_block, head_block; |
1211 | int tail_distance, max_distance; | 1206 | int tail_distance, max_distance; |
1212 | int distance; | 1207 | int distance; |
1213 | int error; | 1208 | int error; |
1214 | 1209 | ||
1215 | tail_cycle = CYCLE_LSN(tail_lsn); | 1210 | tail_cycle = CYCLE_LSN(tail_lsn); |
1216 | tail_block = BLOCK_LSN(tail_lsn); | 1211 | tail_block = BLOCK_LSN(tail_lsn); |
1217 | head_cycle = log->l_curr_cycle; | 1212 | head_cycle = log->l_curr_cycle; |
1218 | head_block = log->l_curr_block; | 1213 | head_block = log->l_curr_block; |
1219 | 1214 | ||
1220 | /* | 1215 | /* |
1221 | * Figure out the distance between the new head of the log | 1216 | * Figure out the distance between the new head of the log |
1222 | * and the tail. We want to write over any blocks beyond the | 1217 | * and the tail. We want to write over any blocks beyond the |
1223 | * head that we may have written just before the crash, but | 1218 | * head that we may have written just before the crash, but |
1224 | * we don't want to overwrite the tail of the log. | 1219 | * we don't want to overwrite the tail of the log. |
1225 | */ | 1220 | */ |
1226 | if (head_cycle == tail_cycle) { | 1221 | if (head_cycle == tail_cycle) { |
1227 | /* | 1222 | /* |
1228 | * The tail is behind the head in the physical log, | 1223 | * The tail is behind the head in the physical log, |
1229 | * so the distance from the head to the tail is the | 1224 | * so the distance from the head to the tail is the |
1230 | * distance from the head to the end of the log plus | 1225 | * distance from the head to the end of the log plus |
1231 | * the distance from the beginning of the log to the | 1226 | * the distance from the beginning of the log to the |
1232 | * tail. | 1227 | * tail. |
1233 | */ | 1228 | */ |
1234 | if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { | 1229 | if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { |
1235 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", | 1230 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", |
1236 | XFS_ERRLEVEL_LOW, log->l_mp); | 1231 | XFS_ERRLEVEL_LOW, log->l_mp); |
1237 | return XFS_ERROR(EFSCORRUPTED); | 1232 | return XFS_ERROR(EFSCORRUPTED); |
1238 | } | 1233 | } |
1239 | tail_distance = tail_block + (log->l_logBBsize - head_block); | 1234 | tail_distance = tail_block + (log->l_logBBsize - head_block); |
1240 | } else { | 1235 | } else { |
1241 | /* | 1236 | /* |
1242 | * The head is behind the tail in the physical log, | 1237 | * The head is behind the tail in the physical log, |
1243 | * so the distance from the head to the tail is just | 1238 | * so the distance from the head to the tail is just |
1244 | * the tail block minus the head block. | 1239 | * the tail block minus the head block. |
1245 | */ | 1240 | */ |
1246 | if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ | 1241 | if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ |
1247 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", | 1242 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", |
1248 | XFS_ERRLEVEL_LOW, log->l_mp); | 1243 | XFS_ERRLEVEL_LOW, log->l_mp); |
1249 | return XFS_ERROR(EFSCORRUPTED); | 1244 | return XFS_ERROR(EFSCORRUPTED); |
1250 | } | 1245 | } |
1251 | tail_distance = tail_block - head_block; | 1246 | tail_distance = tail_block - head_block; |
1252 | } | 1247 | } |
1253 | 1248 | ||
1254 | /* | 1249 | /* |
1255 | * If the head is right up against the tail, we can't clear | 1250 | * If the head is right up against the tail, we can't clear |
1256 | * anything. | 1251 | * anything. |
1257 | */ | 1252 | */ |
1258 | if (tail_distance <= 0) { | 1253 | if (tail_distance <= 0) { |
1259 | ASSERT(tail_distance == 0); | 1254 | ASSERT(tail_distance == 0); |
1260 | return 0; | 1255 | return 0; |
1261 | } | 1256 | } |
1262 | 1257 | ||
1263 | max_distance = XLOG_TOTAL_REC_SHIFT(log); | 1258 | max_distance = XLOG_TOTAL_REC_SHIFT(log); |
1264 | /* | 1259 | /* |
1265 | * Take the smaller of the maximum amount of outstanding I/O | 1260 | * Take the smaller of the maximum amount of outstanding I/O |
1266 | * we could have and the distance to the tail to clear out. | 1261 | * we could have and the distance to the tail to clear out. |
1267 | * We take the smaller so that we don't overwrite the tail and | 1262 | * We take the smaller so that we don't overwrite the tail and |
1268 | * we don't waste all day writing from the head to the tail | 1263 | * we don't waste all day writing from the head to the tail |
1269 | * for no reason. | 1264 | * for no reason. |
1270 | */ | 1265 | */ |
1271 | max_distance = MIN(max_distance, tail_distance); | 1266 | max_distance = MIN(max_distance, tail_distance); |
1272 | 1267 | ||
1273 | if ((head_block + max_distance) <= log->l_logBBsize) { | 1268 | if ((head_block + max_distance) <= log->l_logBBsize) { |
1274 | /* | 1269 | /* |
1275 | * We can stomp all the blocks we need to without | 1270 | * We can stomp all the blocks we need to without |
1276 | * wrapping around the end of the log. Just do it | 1271 | * wrapping around the end of the log. Just do it |
1277 | * in a single write. Use the cycle number of the | 1272 | * in a single write. Use the cycle number of the |
1278 | * current cycle minus one so that the log will look like: | 1273 | * current cycle minus one so that the log will look like: |
1279 | * n ... | n - 1 ... | 1274 | * n ... | n - 1 ... |
1280 | */ | 1275 | */ |
1281 | error = xlog_write_log_records(log, (head_cycle - 1), | 1276 | error = xlog_write_log_records(log, (head_cycle - 1), |
1282 | head_block, max_distance, tail_cycle, | 1277 | head_block, max_distance, tail_cycle, |
1283 | tail_block); | 1278 | tail_block); |
1284 | if (error) | 1279 | if (error) |
1285 | return error; | 1280 | return error; |
1286 | } else { | 1281 | } else { |
1287 | /* | 1282 | /* |
1288 | * We need to wrap around the end of the physical log in | 1283 | * We need to wrap around the end of the physical log in |
1289 | * order to clear all the blocks. Do it in two separate | 1284 | * order to clear all the blocks. Do it in two separate |
1290 | * I/Os. The first write should be from the head to the | 1285 | * I/Os. The first write should be from the head to the |
1291 | * end of the physical log, and it should use the current | 1286 | * end of the physical log, and it should use the current |
1292 | * cycle number minus one just like above. | 1287 | * cycle number minus one just like above. |
1293 | */ | 1288 | */ |
1294 | distance = log->l_logBBsize - head_block; | 1289 | distance = log->l_logBBsize - head_block; |
1295 | error = xlog_write_log_records(log, (head_cycle - 1), | 1290 | error = xlog_write_log_records(log, (head_cycle - 1), |
1296 | head_block, distance, tail_cycle, | 1291 | head_block, distance, tail_cycle, |
1297 | tail_block); | 1292 | tail_block); |
1298 | 1293 | ||
1299 | if (error) | 1294 | if (error) |
1300 | return error; | 1295 | return error; |
1301 | 1296 | ||
1302 | /* | 1297 | /* |
1303 | * Now write the blocks at the start of the physical log. | 1298 | * Now write the blocks at the start of the physical log. |
1304 | * This writes the remainder of the blocks we want to clear. | 1299 | * This writes the remainder of the blocks we want to clear. |
1305 | * It uses the current cycle number since we're now on the | 1300 | * It uses the current cycle number since we're now on the |
1306 | * same cycle as the head so that we get: | 1301 | * same cycle as the head so that we get: |
1307 | * n ... n ... | n - 1 ... | 1302 | * n ... n ... | n - 1 ... |
1308 | * ^^^^^ blocks we're writing | 1303 | * ^^^^^ blocks we're writing |
1309 | */ | 1304 | */ |
1310 | distance = max_distance - (log->l_logBBsize - head_block); | 1305 | distance = max_distance - (log->l_logBBsize - head_block); |
1311 | error = xlog_write_log_records(log, head_cycle, 0, distance, | 1306 | error = xlog_write_log_records(log, head_cycle, 0, distance, |
1312 | tail_cycle, tail_block); | 1307 | tail_cycle, tail_block); |
1313 | if (error) | 1308 | if (error) |
1314 | return error; | 1309 | return error; |
1315 | } | 1310 | } |
1316 | 1311 | ||
1317 | return 0; | 1312 | return 0; |
1318 | } | 1313 | } |
1319 | 1314 | ||
1320 | /****************************************************************************** | 1315 | /****************************************************************************** |
1321 | * | 1316 | * |
1322 | * Log recover routines | 1317 | * Log recover routines |
1323 | * | 1318 | * |
1324 | ****************************************************************************** | 1319 | ****************************************************************************** |
1325 | */ | 1320 | */ |
1326 | 1321 | ||
1327 | STATIC xlog_recover_t * | 1322 | STATIC xlog_recover_t * |
1328 | xlog_recover_find_tid( | 1323 | xlog_recover_find_tid( |
1329 | xlog_recover_t *q, | 1324 | xlog_recover_t *q, |
1330 | xlog_tid_t tid) | 1325 | xlog_tid_t tid) |
1331 | { | 1326 | { |
1332 | xlog_recover_t *p = q; | 1327 | xlog_recover_t *p = q; |
1333 | 1328 | ||
1334 | while (p != NULL) { | 1329 | while (p != NULL) { |
1335 | if (p->r_log_tid == tid) | 1330 | if (p->r_log_tid == tid) |
1336 | break; | 1331 | break; |
1337 | p = p->r_next; | 1332 | p = p->r_next; |
1338 | } | 1333 | } |
1339 | return p; | 1334 | return p; |
1340 | } | 1335 | } |
1341 | 1336 | ||
1342 | STATIC void | 1337 | STATIC void |
1343 | xlog_recover_put_hashq( | 1338 | xlog_recover_put_hashq( |
1344 | xlog_recover_t **q, | 1339 | xlog_recover_t **q, |
1345 | xlog_recover_t *trans) | 1340 | xlog_recover_t *trans) |
1346 | { | 1341 | { |
1347 | trans->r_next = *q; | 1342 | trans->r_next = *q; |
1348 | *q = trans; | 1343 | *q = trans; |
1349 | } | 1344 | } |
1350 | 1345 | ||
1351 | STATIC void | 1346 | STATIC void |
1352 | xlog_recover_add_item( | 1347 | xlog_recover_add_item( |
1353 | xlog_recover_item_t **itemq) | 1348 | xlog_recover_item_t **itemq) |
1354 | { | 1349 | { |
1355 | xlog_recover_item_t *item; | 1350 | xlog_recover_item_t *item; |
1356 | 1351 | ||
1357 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); | 1352 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); |
1358 | xlog_recover_insert_item_backq(itemq, item); | 1353 | xlog_recover_insert_item_backq(itemq, item); |
1359 | } | 1354 | } |
1360 | 1355 | ||
1361 | STATIC int | 1356 | STATIC int |
1362 | xlog_recover_add_to_cont_trans( | 1357 | xlog_recover_add_to_cont_trans( |
1363 | xlog_recover_t *trans, | 1358 | xlog_recover_t *trans, |
1364 | xfs_caddr_t dp, | 1359 | xfs_caddr_t dp, |
1365 | int len) | 1360 | int len) |
1366 | { | 1361 | { |
1367 | xlog_recover_item_t *item; | 1362 | xlog_recover_item_t *item; |
1368 | xfs_caddr_t ptr, old_ptr; | 1363 | xfs_caddr_t ptr, old_ptr; |
1369 | int old_len; | 1364 | int old_len; |
1370 | 1365 | ||
1371 | item = trans->r_itemq; | 1366 | item = trans->r_itemq; |
1372 | if (item == NULL) { | 1367 | if (item == NULL) { |
1373 | /* finish copying rest of trans header */ | 1368 | /* finish copying rest of trans header */ |
1374 | xlog_recover_add_item(&trans->r_itemq); | 1369 | xlog_recover_add_item(&trans->r_itemq); |
1375 | ptr = (xfs_caddr_t) &trans->r_theader + | 1370 | ptr = (xfs_caddr_t) &trans->r_theader + |
1376 | sizeof(xfs_trans_header_t) - len; | 1371 | sizeof(xfs_trans_header_t) - len; |
1377 | memcpy(ptr, dp, len); /* d, s, l */ | 1372 | memcpy(ptr, dp, len); /* d, s, l */ |
1378 | return 0; | 1373 | return 0; |
1379 | } | 1374 | } |
1380 | item = item->ri_prev; | 1375 | item = item->ri_prev; |
1381 | 1376 | ||
1382 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | 1377 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; |
1383 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | 1378 | old_len = item->ri_buf[item->ri_cnt-1].i_len; |
1384 | 1379 | ||
1385 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); | 1380 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); |
1386 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ | 1381 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ |
1387 | item->ri_buf[item->ri_cnt-1].i_len += len; | 1382 | item->ri_buf[item->ri_cnt-1].i_len += len; |
1388 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | 1383 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; |
1389 | return 0; | 1384 | return 0; |
1390 | } | 1385 | } |
1391 | 1386 | ||
1392 | /* | 1387 | /* |
1393 | * The next region to add is the start of a new region. It could be | 1388 | * The next region to add is the start of a new region. It could be |
1394 | * a whole region or it could be the first part of a new region. Because | 1389 | * a whole region or it could be the first part of a new region. Because |
1395 | * of this, the assumption here is that the type and size fields of all | 1390 | * of this, the assumption here is that the type and size fields of all |
1396 | * format structures fit into the first 32 bits of the structure. | 1391 | * format structures fit into the first 32 bits of the structure. |
1397 | * | 1392 | * |
1398 | * This works because all regions must be 32 bit aligned. Therefore, we | 1393 | * This works because all regions must be 32 bit aligned. Therefore, we |
1399 | * either have both fields or we have neither field. In the case we have | 1394 | * either have both fields or we have neither field. In the case we have |
1400 | * neither field, the data part of the region is zero length. We only have | 1395 | * neither field, the data part of the region is zero length. We only have |
1401 | * a log_op_header and can throw away the header since a new one will appear | 1396 | * a log_op_header and can throw away the header since a new one will appear |
1402 | * later. If we have at least 4 bytes, then we can determine how many regions | 1397 | * later. If we have at least 4 bytes, then we can determine how many regions |
1403 | * will appear in the current log item. | 1398 | * will appear in the current log item. |
1404 | */ | 1399 | */ |
1405 | STATIC int | 1400 | STATIC int |
1406 | xlog_recover_add_to_trans( | 1401 | xlog_recover_add_to_trans( |
1407 | xlog_recover_t *trans, | 1402 | xlog_recover_t *trans, |
1408 | xfs_caddr_t dp, | 1403 | xfs_caddr_t dp, |
1409 | int len) | 1404 | int len) |
1410 | { | 1405 | { |
1411 | xfs_inode_log_format_t *in_f; /* any will do */ | 1406 | xfs_inode_log_format_t *in_f; /* any will do */ |
1412 | xlog_recover_item_t *item; | 1407 | xlog_recover_item_t *item; |
1413 | xfs_caddr_t ptr; | 1408 | xfs_caddr_t ptr; |
1414 | 1409 | ||
1415 | if (!len) | 1410 | if (!len) |
1416 | return 0; | 1411 | return 0; |
1417 | item = trans->r_itemq; | 1412 | item = trans->r_itemq; |
1418 | if (item == NULL) { | 1413 | if (item == NULL) { |
1419 | /* we need to catch log corruptions here */ | 1414 | /* we need to catch log corruptions here */ |
1420 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { | 1415 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { |
1421 | xlog_warn("XFS: xlog_recover_add_to_trans: " | 1416 | xlog_warn("XFS: xlog_recover_add_to_trans: " |
1422 | "bad header magic number"); | 1417 | "bad header magic number"); |
1423 | ASSERT(0); | 1418 | ASSERT(0); |
1424 | return XFS_ERROR(EIO); | 1419 | return XFS_ERROR(EIO); |
1425 | } | 1420 | } |
1426 | if (len == sizeof(xfs_trans_header_t)) | 1421 | if (len == sizeof(xfs_trans_header_t)) |
1427 | xlog_recover_add_item(&trans->r_itemq); | 1422 | xlog_recover_add_item(&trans->r_itemq); |
1428 | memcpy(&trans->r_theader, dp, len); /* d, s, l */ | 1423 | memcpy(&trans->r_theader, dp, len); /* d, s, l */ |
1429 | return 0; | 1424 | return 0; |
1430 | } | 1425 | } |
1431 | 1426 | ||
1432 | ptr = kmem_alloc(len, KM_SLEEP); | 1427 | ptr = kmem_alloc(len, KM_SLEEP); |
1433 | memcpy(ptr, dp, len); | 1428 | memcpy(ptr, dp, len); |
1434 | in_f = (xfs_inode_log_format_t *)ptr; | 1429 | in_f = (xfs_inode_log_format_t *)ptr; |
1435 | 1430 | ||
1436 | if (item->ri_prev->ri_total != 0 && | 1431 | if (item->ri_prev->ri_total != 0 && |
1437 | item->ri_prev->ri_total == item->ri_prev->ri_cnt) { | 1432 | item->ri_prev->ri_total == item->ri_prev->ri_cnt) { |
1438 | xlog_recover_add_item(&trans->r_itemq); | 1433 | xlog_recover_add_item(&trans->r_itemq); |
1439 | } | 1434 | } |
1440 | item = trans->r_itemq; | 1435 | item = trans->r_itemq; |
1441 | item = item->ri_prev; | 1436 | item = item->ri_prev; |
1442 | 1437 | ||
1443 | if (item->ri_total == 0) { /* first region to be added */ | 1438 | if (item->ri_total == 0) { /* first region to be added */ |
1444 | item->ri_total = in_f->ilf_size; | 1439 | item->ri_total = in_f->ilf_size; |
1445 | ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); | 1440 | ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); |
1446 | item->ri_buf = kmem_zalloc((item->ri_total * | 1441 | item->ri_buf = kmem_zalloc((item->ri_total * |
1447 | sizeof(xfs_log_iovec_t)), KM_SLEEP); | 1442 | sizeof(xfs_log_iovec_t)), KM_SLEEP); |
1448 | } | 1443 | } |
1449 | ASSERT(item->ri_total > item->ri_cnt); | 1444 | ASSERT(item->ri_total > item->ri_cnt); |
1450 | /* Description region is ri_buf[0] */ | 1445 | /* Description region is ri_buf[0] */ |
1451 | item->ri_buf[item->ri_cnt].i_addr = ptr; | 1446 | item->ri_buf[item->ri_cnt].i_addr = ptr; |
1452 | item->ri_buf[item->ri_cnt].i_len = len; | 1447 | item->ri_buf[item->ri_cnt].i_len = len; |
1453 | item->ri_cnt++; | 1448 | item->ri_cnt++; |
1454 | return 0; | 1449 | return 0; |
1455 | } | 1450 | } |
1456 | 1451 | ||
1457 | STATIC void | 1452 | STATIC void |
1458 | xlog_recover_new_tid( | 1453 | xlog_recover_new_tid( |
1459 | xlog_recover_t **q, | 1454 | xlog_recover_t **q, |
1460 | xlog_tid_t tid, | 1455 | xlog_tid_t tid, |
1461 | xfs_lsn_t lsn) | 1456 | xfs_lsn_t lsn) |
1462 | { | 1457 | { |
1463 | xlog_recover_t *trans; | 1458 | xlog_recover_t *trans; |
1464 | 1459 | ||
1465 | trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); | 1460 | trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); |
1466 | trans->r_log_tid = tid; | 1461 | trans->r_log_tid = tid; |
1467 | trans->r_lsn = lsn; | 1462 | trans->r_lsn = lsn; |
1468 | xlog_recover_put_hashq(q, trans); | 1463 | xlog_recover_put_hashq(q, trans); |
1469 | } | 1464 | } |
1470 | 1465 | ||
1471 | STATIC int | 1466 | STATIC int |
1472 | xlog_recover_unlink_tid( | 1467 | xlog_recover_unlink_tid( |
1473 | xlog_recover_t **q, | 1468 | xlog_recover_t **q, |
1474 | xlog_recover_t *trans) | 1469 | xlog_recover_t *trans) |
1475 | { | 1470 | { |
1476 | xlog_recover_t *tp; | 1471 | xlog_recover_t *tp; |
1477 | int found = 0; | 1472 | int found = 0; |
1478 | 1473 | ||
1479 | ASSERT(trans != NULL); | 1474 | ASSERT(trans != NULL); |
1480 | if (trans == *q) { | 1475 | if (trans == *q) { |
1481 | *q = (*q)->r_next; | 1476 | *q = (*q)->r_next; |
1482 | } else { | 1477 | } else { |
1483 | tp = *q; | 1478 | tp = *q; |
1484 | while (tp) { | 1479 | while (tp) { |
1485 | if (tp->r_next == trans) { | 1480 | if (tp->r_next == trans) { |
1486 | found = 1; | 1481 | found = 1; |
1487 | break; | 1482 | break; |
1488 | } | 1483 | } |
1489 | tp = tp->r_next; | 1484 | tp = tp->r_next; |
1490 | } | 1485 | } |
1491 | if (!found) { | 1486 | if (!found) { |
1492 | xlog_warn( | 1487 | xlog_warn( |
1493 | "XFS: xlog_recover_unlink_tid: trans not found"); | 1488 | "XFS: xlog_recover_unlink_tid: trans not found"); |
1494 | ASSERT(0); | 1489 | ASSERT(0); |
1495 | return XFS_ERROR(EIO); | 1490 | return XFS_ERROR(EIO); |
1496 | } | 1491 | } |
1497 | tp->r_next = tp->r_next->r_next; | 1492 | tp->r_next = tp->r_next->r_next; |
1498 | } | 1493 | } |
1499 | return 0; | 1494 | return 0; |
1500 | } | 1495 | } |
1501 | 1496 | ||
1502 | STATIC void | 1497 | STATIC void |
1503 | xlog_recover_insert_item_backq( | 1498 | xlog_recover_insert_item_backq( |
1504 | xlog_recover_item_t **q, | 1499 | xlog_recover_item_t **q, |
1505 | xlog_recover_item_t *item) | 1500 | xlog_recover_item_t *item) |
1506 | { | 1501 | { |
1507 | if (*q == NULL) { | 1502 | if (*q == NULL) { |
1508 | item->ri_prev = item->ri_next = item; | 1503 | item->ri_prev = item->ri_next = item; |
1509 | *q = item; | 1504 | *q = item; |
1510 | } else { | 1505 | } else { |
1511 | item->ri_next = *q; | 1506 | item->ri_next = *q; |
1512 | item->ri_prev = (*q)->ri_prev; | 1507 | item->ri_prev = (*q)->ri_prev; |
1513 | (*q)->ri_prev = item; | 1508 | (*q)->ri_prev = item; |
1514 | item->ri_prev->ri_next = item; | 1509 | item->ri_prev->ri_next = item; |
1515 | } | 1510 | } |
1516 | } | 1511 | } |
1517 | 1512 | ||
1518 | STATIC void | 1513 | STATIC void |
1519 | xlog_recover_insert_item_frontq( | 1514 | xlog_recover_insert_item_frontq( |
1520 | xlog_recover_item_t **q, | 1515 | xlog_recover_item_t **q, |
1521 | xlog_recover_item_t *item) | 1516 | xlog_recover_item_t *item) |
1522 | { | 1517 | { |
1523 | xlog_recover_insert_item_backq(q, item); | 1518 | xlog_recover_insert_item_backq(q, item); |
1524 | *q = item; | 1519 | *q = item; |
1525 | } | 1520 | } |
1526 | 1521 | ||
1527 | STATIC int | 1522 | STATIC int |
1528 | xlog_recover_reorder_trans( | 1523 | xlog_recover_reorder_trans( |
1529 | xlog_recover_t *trans) | 1524 | xlog_recover_t *trans) |
1530 | { | 1525 | { |
1531 | xlog_recover_item_t *first_item, *itemq, *itemq_next; | 1526 | xlog_recover_item_t *first_item, *itemq, *itemq_next; |
1532 | xfs_buf_log_format_t *buf_f; | 1527 | xfs_buf_log_format_t *buf_f; |
1533 | ushort flags = 0; | 1528 | ushort flags = 0; |
1534 | 1529 | ||
1535 | first_item = itemq = trans->r_itemq; | 1530 | first_item = itemq = trans->r_itemq; |
1536 | trans->r_itemq = NULL; | 1531 | trans->r_itemq = NULL; |
1537 | do { | 1532 | do { |
1538 | itemq_next = itemq->ri_next; | 1533 | itemq_next = itemq->ri_next; |
1539 | buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; | 1534 | buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; |
1540 | 1535 | ||
1541 | switch (ITEM_TYPE(itemq)) { | 1536 | switch (ITEM_TYPE(itemq)) { |
1542 | case XFS_LI_BUF: | 1537 | case XFS_LI_BUF: |
1543 | flags = buf_f->blf_flags; | 1538 | flags = buf_f->blf_flags; |
1544 | if (!(flags & XFS_BLI_CANCEL)) { | 1539 | if (!(flags & XFS_BLI_CANCEL)) { |
1545 | xlog_recover_insert_item_frontq(&trans->r_itemq, | 1540 | xlog_recover_insert_item_frontq(&trans->r_itemq, |
1546 | itemq); | 1541 | itemq); |
1547 | break; | 1542 | break; |
1548 | } | 1543 | } |
1549 | case XFS_LI_INODE: | 1544 | case XFS_LI_INODE: |
1550 | case XFS_LI_DQUOT: | 1545 | case XFS_LI_DQUOT: |
1551 | case XFS_LI_QUOTAOFF: | 1546 | case XFS_LI_QUOTAOFF: |
1552 | case XFS_LI_EFD: | 1547 | case XFS_LI_EFD: |
1553 | case XFS_LI_EFI: | 1548 | case XFS_LI_EFI: |
1554 | xlog_recover_insert_item_backq(&trans->r_itemq, itemq); | 1549 | xlog_recover_insert_item_backq(&trans->r_itemq, itemq); |
1555 | break; | 1550 | break; |
1556 | default: | 1551 | default: |
1557 | xlog_warn( | 1552 | xlog_warn( |
1558 | "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); | 1553 | "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); |
1559 | ASSERT(0); | 1554 | ASSERT(0); |
1560 | return XFS_ERROR(EIO); | 1555 | return XFS_ERROR(EIO); |
1561 | } | 1556 | } |
1562 | itemq = itemq_next; | 1557 | itemq = itemq_next; |
1563 | } while (first_item != itemq); | 1558 | } while (first_item != itemq); |
1564 | return 0; | 1559 | return 0; |
1565 | } | 1560 | } |
1566 | 1561 | ||
1567 | /* | 1562 | /* |
1568 | * Build up the table of buf cancel records so that we don't replay | 1563 | * Build up the table of buf cancel records so that we don't replay |
1569 | * cancelled data in the second pass. For buffer records that are | 1564 | * cancelled data in the second pass. For buffer records that are |
1570 | * not cancel records, there is nothing to do here so we just return. | 1565 | * not cancel records, there is nothing to do here so we just return. |
1571 | * | 1566 | * |
1572 | * If we get a cancel record which is already in the table, this indicates | 1567 | * If we get a cancel record which is already in the table, this indicates |
1573 | * that the buffer was cancelled multiple times. In order to ensure | 1568 | * that the buffer was cancelled multiple times. In order to ensure |
1574 | * that during pass 2 we keep the record in the table until we reach its | 1569 | * that during pass 2 we keep the record in the table until we reach its |
1575 | * last occurrence in the log, we keep a reference count in the cancel | 1570 | * last occurrence in the log, we keep a reference count in the cancel |
1576 | * record in the table to tell us how many times we expect to see this | 1571 | * record in the table to tell us how many times we expect to see this |
1577 | * record during the second pass. | 1572 | * record during the second pass. |
1578 | */ | 1573 | */ |
1579 | STATIC void | 1574 | STATIC void |
1580 | xlog_recover_do_buffer_pass1( | 1575 | xlog_recover_do_buffer_pass1( |
1581 | xlog_t *log, | 1576 | xlog_t *log, |
1582 | xfs_buf_log_format_t *buf_f) | 1577 | xfs_buf_log_format_t *buf_f) |
1583 | { | 1578 | { |
1584 | xfs_buf_cancel_t *bcp; | 1579 | xfs_buf_cancel_t *bcp; |
1585 | xfs_buf_cancel_t *nextp; | 1580 | xfs_buf_cancel_t *nextp; |
1586 | xfs_buf_cancel_t *prevp; | 1581 | xfs_buf_cancel_t *prevp; |
1587 | xfs_buf_cancel_t **bucket; | 1582 | xfs_buf_cancel_t **bucket; |
1588 | xfs_daddr_t blkno = 0; | 1583 | xfs_daddr_t blkno = 0; |
1589 | uint len = 0; | 1584 | uint len = 0; |
1590 | ushort flags = 0; | 1585 | ushort flags = 0; |
1591 | 1586 | ||
1592 | switch (buf_f->blf_type) { | 1587 | switch (buf_f->blf_type) { |
1593 | case XFS_LI_BUF: | 1588 | case XFS_LI_BUF: |
1594 | blkno = buf_f->blf_blkno; | 1589 | blkno = buf_f->blf_blkno; |
1595 | len = buf_f->blf_len; | 1590 | len = buf_f->blf_len; |
1596 | flags = buf_f->blf_flags; | 1591 | flags = buf_f->blf_flags; |
1597 | break; | 1592 | break; |
1598 | } | 1593 | } |
1599 | 1594 | ||
1600 | /* | 1595 | /* |
1601 | * If this isn't a cancel buffer item, then just return. | 1596 | * If this isn't a cancel buffer item, then just return. |
1602 | */ | 1597 | */ |
1603 | if (!(flags & XFS_BLI_CANCEL)) | 1598 | if (!(flags & XFS_BLI_CANCEL)) |
1604 | return; | 1599 | return; |
1605 | 1600 | ||
1606 | /* | 1601 | /* |
1607 | * Insert an xfs_buf_cancel record into the hash table of | 1602 | * Insert an xfs_buf_cancel record into the hash table of |
1608 | * them. If there is already an identical record, bump | 1603 | * them. If there is already an identical record, bump |
1609 | * its reference count. | 1604 | * its reference count. |
1610 | */ | 1605 | */ |
1611 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | 1606 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % |
1612 | XLOG_BC_TABLE_SIZE]; | 1607 | XLOG_BC_TABLE_SIZE]; |
1613 | /* | 1608 | /* |
1614 | * If the hash bucket is empty then just insert a new record into | 1609 | * If the hash bucket is empty then just insert a new record into |
1615 | * the bucket. | 1610 | * the bucket. |
1616 | */ | 1611 | */ |
1617 | if (*bucket == NULL) { | 1612 | if (*bucket == NULL) { |
1618 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | 1613 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), |
1619 | KM_SLEEP); | 1614 | KM_SLEEP); |
1620 | bcp->bc_blkno = blkno; | 1615 | bcp->bc_blkno = blkno; |
1621 | bcp->bc_len = len; | 1616 | bcp->bc_len = len; |
1622 | bcp->bc_refcount = 1; | 1617 | bcp->bc_refcount = 1; |
1623 | bcp->bc_next = NULL; | 1618 | bcp->bc_next = NULL; |
1624 | *bucket = bcp; | 1619 | *bucket = bcp; |
1625 | return; | 1620 | return; |
1626 | } | 1621 | } |
1627 | 1622 | ||
1628 | /* | 1623 | /* |
1629 | * The hash bucket is not empty, so search for duplicates of our | 1624 | * The hash bucket is not empty, so search for duplicates of our |
1630 | * record. If we find one them just bump its refcount. If not | 1625 | * record. If we find one them just bump its refcount. If not |
1631 | * then add us at the end of the list. | 1626 | * then add us at the end of the list. |
1632 | */ | 1627 | */ |
1633 | prevp = NULL; | 1628 | prevp = NULL; |
1634 | nextp = *bucket; | 1629 | nextp = *bucket; |
1635 | while (nextp != NULL) { | 1630 | while (nextp != NULL) { |
1636 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { | 1631 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { |
1637 | nextp->bc_refcount++; | 1632 | nextp->bc_refcount++; |
1638 | return; | 1633 | return; |
1639 | } | 1634 | } |
1640 | prevp = nextp; | 1635 | prevp = nextp; |
1641 | nextp = nextp->bc_next; | 1636 | nextp = nextp->bc_next; |
1642 | } | 1637 | } |
1643 | ASSERT(prevp != NULL); | 1638 | ASSERT(prevp != NULL); |
1644 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | 1639 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), |
1645 | KM_SLEEP); | 1640 | KM_SLEEP); |
1646 | bcp->bc_blkno = blkno; | 1641 | bcp->bc_blkno = blkno; |
1647 | bcp->bc_len = len; | 1642 | bcp->bc_len = len; |
1648 | bcp->bc_refcount = 1; | 1643 | bcp->bc_refcount = 1; |
1649 | bcp->bc_next = NULL; | 1644 | bcp->bc_next = NULL; |
1650 | prevp->bc_next = bcp; | 1645 | prevp->bc_next = bcp; |
1651 | } | 1646 | } |
1652 | 1647 | ||
1653 | /* | 1648 | /* |
1654 | * Check to see whether the buffer being recovered has a corresponding | 1649 | * Check to see whether the buffer being recovered has a corresponding |
1655 | * entry in the buffer cancel record table. If it does then return 1 | 1650 | * entry in the buffer cancel record table. If it does then return 1 |
1656 | * so that it will be cancelled, otherwise return 0. If the buffer is | 1651 | * so that it will be cancelled, otherwise return 0. If the buffer is |
1657 | * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement | 1652 | * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement |
1658 | * the refcount on the entry in the table and remove it from the table | 1653 | * the refcount on the entry in the table and remove it from the table |
1659 | * if this is the last reference. | 1654 | * if this is the last reference. |
1660 | * | 1655 | * |
1661 | * We remove the cancel record from the table when we encounter its | 1656 | * We remove the cancel record from the table when we encounter its |
1662 | * last occurrence in the log so that if the same buffer is re-used | 1657 | * last occurrence in the log so that if the same buffer is re-used |
1663 | * again after its last cancellation we actually replay the changes | 1658 | * again after its last cancellation we actually replay the changes |
1664 | * made at that point. | 1659 | * made at that point. |
1665 | */ | 1660 | */ |
1666 | STATIC int | 1661 | STATIC int |
1667 | xlog_check_buffer_cancelled( | 1662 | xlog_check_buffer_cancelled( |
1668 | xlog_t *log, | 1663 | xlog_t *log, |
1669 | xfs_daddr_t blkno, | 1664 | xfs_daddr_t blkno, |
1670 | uint len, | 1665 | uint len, |
1671 | ushort flags) | 1666 | ushort flags) |
1672 | { | 1667 | { |
1673 | xfs_buf_cancel_t *bcp; | 1668 | xfs_buf_cancel_t *bcp; |
1674 | xfs_buf_cancel_t *prevp; | 1669 | xfs_buf_cancel_t *prevp; |
1675 | xfs_buf_cancel_t **bucket; | 1670 | xfs_buf_cancel_t **bucket; |
1676 | 1671 | ||
1677 | if (log->l_buf_cancel_table == NULL) { | 1672 | if (log->l_buf_cancel_table == NULL) { |
1678 | /* | 1673 | /* |
1679 | * There is nothing in the table built in pass one, | 1674 | * There is nothing in the table built in pass one, |
1680 | * so this buffer must not be cancelled. | 1675 | * so this buffer must not be cancelled. |
1681 | */ | 1676 | */ |
1682 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1677 | ASSERT(!(flags & XFS_BLI_CANCEL)); |
1683 | return 0; | 1678 | return 0; |
1684 | } | 1679 | } |
1685 | 1680 | ||
1686 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | 1681 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % |
1687 | XLOG_BC_TABLE_SIZE]; | 1682 | XLOG_BC_TABLE_SIZE]; |
1688 | bcp = *bucket; | 1683 | bcp = *bucket; |
1689 | if (bcp == NULL) { | 1684 | if (bcp == NULL) { |
1690 | /* | 1685 | /* |
1691 | * There is no corresponding entry in the table built | 1686 | * There is no corresponding entry in the table built |
1692 | * in pass one, so this buffer has not been cancelled. | 1687 | * in pass one, so this buffer has not been cancelled. |
1693 | */ | 1688 | */ |
1694 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1689 | ASSERT(!(flags & XFS_BLI_CANCEL)); |
1695 | return 0; | 1690 | return 0; |
1696 | } | 1691 | } |
1697 | 1692 | ||
1698 | /* | 1693 | /* |
1699 | * Search for an entry in the buffer cancel table that | 1694 | * Search for an entry in the buffer cancel table that |
1700 | * matches our buffer. | 1695 | * matches our buffer. |
1701 | */ | 1696 | */ |
1702 | prevp = NULL; | 1697 | prevp = NULL; |
1703 | while (bcp != NULL) { | 1698 | while (bcp != NULL) { |
1704 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) { | 1699 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) { |
1705 | /* | 1700 | /* |
1706 | * We've go a match, so return 1 so that the | 1701 | * We've go a match, so return 1 so that the |
1707 | * recovery of this buffer is cancelled. | 1702 | * recovery of this buffer is cancelled. |
1708 | * If this buffer is actually a buffer cancel | 1703 | * If this buffer is actually a buffer cancel |
1709 | * log item, then decrement the refcount on the | 1704 | * log item, then decrement the refcount on the |
1710 | * one in the table and remove it if this is the | 1705 | * one in the table and remove it if this is the |
1711 | * last reference. | 1706 | * last reference. |
1712 | */ | 1707 | */ |
1713 | if (flags & XFS_BLI_CANCEL) { | 1708 | if (flags & XFS_BLI_CANCEL) { |
1714 | bcp->bc_refcount--; | 1709 | bcp->bc_refcount--; |
1715 | if (bcp->bc_refcount == 0) { | 1710 | if (bcp->bc_refcount == 0) { |
1716 | if (prevp == NULL) { | 1711 | if (prevp == NULL) { |
1717 | *bucket = bcp->bc_next; | 1712 | *bucket = bcp->bc_next; |
1718 | } else { | 1713 | } else { |
1719 | prevp->bc_next = bcp->bc_next; | 1714 | prevp->bc_next = bcp->bc_next; |
1720 | } | 1715 | } |
1721 | kmem_free(bcp); | 1716 | kmem_free(bcp); |
1722 | } | 1717 | } |
1723 | } | 1718 | } |
1724 | return 1; | 1719 | return 1; |
1725 | } | 1720 | } |
1726 | prevp = bcp; | 1721 | prevp = bcp; |
1727 | bcp = bcp->bc_next; | 1722 | bcp = bcp->bc_next; |
1728 | } | 1723 | } |
1729 | /* | 1724 | /* |
1730 | * We didn't find a corresponding entry in the table, so | 1725 | * We didn't find a corresponding entry in the table, so |
1731 | * return 0 so that the buffer is NOT cancelled. | 1726 | * return 0 so that the buffer is NOT cancelled. |
1732 | */ | 1727 | */ |
1733 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1728 | ASSERT(!(flags & XFS_BLI_CANCEL)); |
1734 | return 0; | 1729 | return 0; |
1735 | } | 1730 | } |
1736 | 1731 | ||
1737 | STATIC int | 1732 | STATIC int |
1738 | xlog_recover_do_buffer_pass2( | 1733 | xlog_recover_do_buffer_pass2( |
1739 | xlog_t *log, | 1734 | xlog_t *log, |
1740 | xfs_buf_log_format_t *buf_f) | 1735 | xfs_buf_log_format_t *buf_f) |
1741 | { | 1736 | { |
1742 | xfs_daddr_t blkno = 0; | 1737 | xfs_daddr_t blkno = 0; |
1743 | ushort flags = 0; | 1738 | ushort flags = 0; |
1744 | uint len = 0; | 1739 | uint len = 0; |
1745 | 1740 | ||
1746 | switch (buf_f->blf_type) { | 1741 | switch (buf_f->blf_type) { |
1747 | case XFS_LI_BUF: | 1742 | case XFS_LI_BUF: |
1748 | blkno = buf_f->blf_blkno; | 1743 | blkno = buf_f->blf_blkno; |
1749 | flags = buf_f->blf_flags; | 1744 | flags = buf_f->blf_flags; |
1750 | len = buf_f->blf_len; | 1745 | len = buf_f->blf_len; |
1751 | break; | 1746 | break; |
1752 | } | 1747 | } |
1753 | 1748 | ||
1754 | return xlog_check_buffer_cancelled(log, blkno, len, flags); | 1749 | return xlog_check_buffer_cancelled(log, blkno, len, flags); |
1755 | } | 1750 | } |
1756 | 1751 | ||
1757 | /* | 1752 | /* |
1758 | * Perform recovery for a buffer full of inodes. In these buffers, | 1753 | * Perform recovery for a buffer full of inodes. In these buffers, |
1759 | * the only data which should be recovered is that which corresponds | 1754 | * the only data which should be recovered is that which corresponds |
1760 | * to the di_next_unlinked pointers in the on disk inode structures. | 1755 | * to the di_next_unlinked pointers in the on disk inode structures. |
1761 | * The rest of the data for the inodes is always logged through the | 1756 | * The rest of the data for the inodes is always logged through the |
1762 | * inodes themselves rather than the inode buffer and is recovered | 1757 | * inodes themselves rather than the inode buffer and is recovered |
1763 | * in xlog_recover_do_inode_trans(). | 1758 | * in xlog_recover_do_inode_trans(). |
1764 | * | 1759 | * |
1765 | * The only time when buffers full of inodes are fully recovered is | 1760 | * The only time when buffers full of inodes are fully recovered is |
1766 | * when the buffer is full of newly allocated inodes. In this case | 1761 | * when the buffer is full of newly allocated inodes. In this case |
1767 | * the buffer will not be marked as an inode buffer and so will be | 1762 | * the buffer will not be marked as an inode buffer and so will be |
1768 | * sent to xlog_recover_do_reg_buffer() below during recovery. | 1763 | * sent to xlog_recover_do_reg_buffer() below during recovery. |
1769 | */ | 1764 | */ |
1770 | STATIC int | 1765 | STATIC int |
1771 | xlog_recover_do_inode_buffer( | 1766 | xlog_recover_do_inode_buffer( |
1772 | xfs_mount_t *mp, | 1767 | xfs_mount_t *mp, |
1773 | xlog_recover_item_t *item, | 1768 | xlog_recover_item_t *item, |
1774 | xfs_buf_t *bp, | 1769 | xfs_buf_t *bp, |
1775 | xfs_buf_log_format_t *buf_f) | 1770 | xfs_buf_log_format_t *buf_f) |
1776 | { | 1771 | { |
1777 | int i; | 1772 | int i; |
1778 | int item_index; | 1773 | int item_index; |
1779 | int bit; | 1774 | int bit; |
1780 | int nbits; | 1775 | int nbits; |
1781 | int reg_buf_offset; | 1776 | int reg_buf_offset; |
1782 | int reg_buf_bytes; | 1777 | int reg_buf_bytes; |
1783 | int next_unlinked_offset; | 1778 | int next_unlinked_offset; |
1784 | int inodes_per_buf; | 1779 | int inodes_per_buf; |
1785 | xfs_agino_t *logged_nextp; | 1780 | xfs_agino_t *logged_nextp; |
1786 | xfs_agino_t *buffer_nextp; | 1781 | xfs_agino_t *buffer_nextp; |
1787 | unsigned int *data_map = NULL; | 1782 | unsigned int *data_map = NULL; |
1788 | unsigned int map_size = 0; | 1783 | unsigned int map_size = 0; |
1789 | 1784 | ||
1790 | switch (buf_f->blf_type) { | 1785 | switch (buf_f->blf_type) { |
1791 | case XFS_LI_BUF: | 1786 | case XFS_LI_BUF: |
1792 | data_map = buf_f->blf_data_map; | 1787 | data_map = buf_f->blf_data_map; |
1793 | map_size = buf_f->blf_map_size; | 1788 | map_size = buf_f->blf_map_size; |
1794 | break; | 1789 | break; |
1795 | } | 1790 | } |
1796 | /* | 1791 | /* |
1797 | * Set the variables corresponding to the current region to | 1792 | * Set the variables corresponding to the current region to |
1798 | * 0 so that we'll initialize them on the first pass through | 1793 | * 0 so that we'll initialize them on the first pass through |
1799 | * the loop. | 1794 | * the loop. |
1800 | */ | 1795 | */ |
1801 | reg_buf_offset = 0; | 1796 | reg_buf_offset = 0; |
1802 | reg_buf_bytes = 0; | 1797 | reg_buf_bytes = 0; |
1803 | bit = 0; | 1798 | bit = 0; |
1804 | nbits = 0; | 1799 | nbits = 0; |
1805 | item_index = 0; | 1800 | item_index = 0; |
1806 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; | 1801 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; |
1807 | for (i = 0; i < inodes_per_buf; i++) { | 1802 | for (i = 0; i < inodes_per_buf; i++) { |
1808 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + | 1803 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
1809 | offsetof(xfs_dinode_t, di_next_unlinked); | 1804 | offsetof(xfs_dinode_t, di_next_unlinked); |
1810 | 1805 | ||
1811 | while (next_unlinked_offset >= | 1806 | while (next_unlinked_offset >= |
1812 | (reg_buf_offset + reg_buf_bytes)) { | 1807 | (reg_buf_offset + reg_buf_bytes)) { |
1813 | /* | 1808 | /* |
1814 | * The next di_next_unlinked field is beyond | 1809 | * The next di_next_unlinked field is beyond |
1815 | * the current logged region. Find the next | 1810 | * the current logged region. Find the next |
1816 | * logged region that contains or is beyond | 1811 | * logged region that contains or is beyond |
1817 | * the current di_next_unlinked field. | 1812 | * the current di_next_unlinked field. |
1818 | */ | 1813 | */ |
1819 | bit += nbits; | 1814 | bit += nbits; |
1820 | bit = xfs_next_bit(data_map, map_size, bit); | 1815 | bit = xfs_next_bit(data_map, map_size, bit); |
1821 | 1816 | ||
1822 | /* | 1817 | /* |
1823 | * If there are no more logged regions in the | 1818 | * If there are no more logged regions in the |
1824 | * buffer, then we're done. | 1819 | * buffer, then we're done. |
1825 | */ | 1820 | */ |
1826 | if (bit == -1) { | 1821 | if (bit == -1) { |
1827 | return 0; | 1822 | return 0; |
1828 | } | 1823 | } |
1829 | 1824 | ||
1830 | nbits = xfs_contig_bits(data_map, map_size, | 1825 | nbits = xfs_contig_bits(data_map, map_size, |
1831 | bit); | 1826 | bit); |
1832 | ASSERT(nbits > 0); | 1827 | ASSERT(nbits > 0); |
1833 | reg_buf_offset = bit << XFS_BLI_SHIFT; | 1828 | reg_buf_offset = bit << XFS_BLI_SHIFT; |
1834 | reg_buf_bytes = nbits << XFS_BLI_SHIFT; | 1829 | reg_buf_bytes = nbits << XFS_BLI_SHIFT; |
1835 | item_index++; | 1830 | item_index++; |
1836 | } | 1831 | } |
1837 | 1832 | ||
1838 | /* | 1833 | /* |
1839 | * If the current logged region starts after the current | 1834 | * If the current logged region starts after the current |
1840 | * di_next_unlinked field, then move on to the next | 1835 | * di_next_unlinked field, then move on to the next |
1841 | * di_next_unlinked field. | 1836 | * di_next_unlinked field. |
1842 | */ | 1837 | */ |
1843 | if (next_unlinked_offset < reg_buf_offset) { | 1838 | if (next_unlinked_offset < reg_buf_offset) { |
1844 | continue; | 1839 | continue; |
1845 | } | 1840 | } |
1846 | 1841 | ||
1847 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1842 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1848 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0); | 1843 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0); |
1849 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); | 1844 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); |
1850 | 1845 | ||
1851 | /* | 1846 | /* |
1852 | * The current logged region contains a copy of the | 1847 | * The current logged region contains a copy of the |
1853 | * current di_next_unlinked field. Extract its value | 1848 | * current di_next_unlinked field. Extract its value |
1854 | * and copy it to the buffer copy. | 1849 | * and copy it to the buffer copy. |
1855 | */ | 1850 | */ |
1856 | logged_nextp = (xfs_agino_t *) | 1851 | logged_nextp = (xfs_agino_t *) |
1857 | ((char *)(item->ri_buf[item_index].i_addr) + | 1852 | ((char *)(item->ri_buf[item_index].i_addr) + |
1858 | (next_unlinked_offset - reg_buf_offset)); | 1853 | (next_unlinked_offset - reg_buf_offset)); |
1859 | if (unlikely(*logged_nextp == 0)) { | 1854 | if (unlikely(*logged_nextp == 0)) { |
1860 | xfs_fs_cmn_err(CE_ALERT, mp, | 1855 | xfs_fs_cmn_err(CE_ALERT, mp, |
1861 | "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", | 1856 | "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", |
1862 | item, bp); | 1857 | item, bp); |
1863 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", | 1858 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", |
1864 | XFS_ERRLEVEL_LOW, mp); | 1859 | XFS_ERRLEVEL_LOW, mp); |
1865 | return XFS_ERROR(EFSCORRUPTED); | 1860 | return XFS_ERROR(EFSCORRUPTED); |
1866 | } | 1861 | } |
1867 | 1862 | ||
1868 | buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, | 1863 | buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, |
1869 | next_unlinked_offset); | 1864 | next_unlinked_offset); |
1870 | *buffer_nextp = *logged_nextp; | 1865 | *buffer_nextp = *logged_nextp; |
1871 | } | 1866 | } |
1872 | 1867 | ||
1873 | return 0; | 1868 | return 0; |
1874 | } | 1869 | } |
1875 | 1870 | ||
1876 | /* | 1871 | /* |
1877 | * Perform a 'normal' buffer recovery. Each logged region of the | 1872 | * Perform a 'normal' buffer recovery. Each logged region of the |
1878 | * buffer should be copied over the corresponding region in the | 1873 | * buffer should be copied over the corresponding region in the |
1879 | * given buffer. The bitmap in the buf log format structure indicates | 1874 | * given buffer. The bitmap in the buf log format structure indicates |
1880 | * where to place the logged data. | 1875 | * where to place the logged data. |
1881 | */ | 1876 | */ |
1882 | /*ARGSUSED*/ | 1877 | /*ARGSUSED*/ |
1883 | STATIC void | 1878 | STATIC void |
1884 | xlog_recover_do_reg_buffer( | 1879 | xlog_recover_do_reg_buffer( |
1885 | xlog_recover_item_t *item, | 1880 | xlog_recover_item_t *item, |
1886 | xfs_buf_t *bp, | 1881 | xfs_buf_t *bp, |
1887 | xfs_buf_log_format_t *buf_f) | 1882 | xfs_buf_log_format_t *buf_f) |
1888 | { | 1883 | { |
1889 | int i; | 1884 | int i; |
1890 | int bit; | 1885 | int bit; |
1891 | int nbits; | 1886 | int nbits; |
1892 | unsigned int *data_map = NULL; | 1887 | unsigned int *data_map = NULL; |
1893 | unsigned int map_size = 0; | 1888 | unsigned int map_size = 0; |
1894 | int error; | 1889 | int error; |
1895 | 1890 | ||
1896 | switch (buf_f->blf_type) { | 1891 | switch (buf_f->blf_type) { |
1897 | case XFS_LI_BUF: | 1892 | case XFS_LI_BUF: |
1898 | data_map = buf_f->blf_data_map; | 1893 | data_map = buf_f->blf_data_map; |
1899 | map_size = buf_f->blf_map_size; | 1894 | map_size = buf_f->blf_map_size; |
1900 | break; | 1895 | break; |
1901 | } | 1896 | } |
1902 | bit = 0; | 1897 | bit = 0; |
1903 | i = 1; /* 0 is the buf format structure */ | 1898 | i = 1; /* 0 is the buf format structure */ |
1904 | while (1) { | 1899 | while (1) { |
1905 | bit = xfs_next_bit(data_map, map_size, bit); | 1900 | bit = xfs_next_bit(data_map, map_size, bit); |
1906 | if (bit == -1) | 1901 | if (bit == -1) |
1907 | break; | 1902 | break; |
1908 | nbits = xfs_contig_bits(data_map, map_size, bit); | 1903 | nbits = xfs_contig_bits(data_map, map_size, bit); |
1909 | ASSERT(nbits > 0); | 1904 | ASSERT(nbits > 0); |
1910 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1905 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1911 | ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); | 1906 | ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); |
1912 | ASSERT(XFS_BUF_COUNT(bp) >= | 1907 | ASSERT(XFS_BUF_COUNT(bp) >= |
1913 | ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); | 1908 | ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); |
1914 | 1909 | ||
1915 | /* | 1910 | /* |
1916 | * Do a sanity check if this is a dquot buffer. Just checking | 1911 | * Do a sanity check if this is a dquot buffer. Just checking |
1917 | * the first dquot in the buffer should do. XXXThis is | 1912 | * the first dquot in the buffer should do. XXXThis is |
1918 | * probably a good thing to do for other buf types also. | 1913 | * probably a good thing to do for other buf types also. |
1919 | */ | 1914 | */ |
1920 | error = 0; | 1915 | error = 0; |
1921 | if (buf_f->blf_flags & | 1916 | if (buf_f->blf_flags & |
1922 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 1917 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { |
1923 | error = xfs_qm_dqcheck((xfs_disk_dquot_t *) | 1918 | error = xfs_qm_dqcheck((xfs_disk_dquot_t *) |
1924 | item->ri_buf[i].i_addr, | 1919 | item->ri_buf[i].i_addr, |
1925 | -1, 0, XFS_QMOPT_DOWARN, | 1920 | -1, 0, XFS_QMOPT_DOWARN, |
1926 | "dquot_buf_recover"); | 1921 | "dquot_buf_recover"); |
1927 | } | 1922 | } |
1928 | if (!error) | 1923 | if (!error) |
1929 | memcpy(xfs_buf_offset(bp, | 1924 | memcpy(xfs_buf_offset(bp, |
1930 | (uint)bit << XFS_BLI_SHIFT), /* dest */ | 1925 | (uint)bit << XFS_BLI_SHIFT), /* dest */ |
1931 | item->ri_buf[i].i_addr, /* source */ | 1926 | item->ri_buf[i].i_addr, /* source */ |
1932 | nbits<<XFS_BLI_SHIFT); /* length */ | 1927 | nbits<<XFS_BLI_SHIFT); /* length */ |
1933 | i++; | 1928 | i++; |
1934 | bit += nbits; | 1929 | bit += nbits; |
1935 | } | 1930 | } |
1936 | 1931 | ||
1937 | /* Shouldn't be any more regions */ | 1932 | /* Shouldn't be any more regions */ |
1938 | ASSERT(i == item->ri_total); | 1933 | ASSERT(i == item->ri_total); |
1939 | } | 1934 | } |
1940 | 1935 | ||
1941 | /* | 1936 | /* |
1942 | * Do some primitive error checking on ondisk dquot data structures. | 1937 | * Do some primitive error checking on ondisk dquot data structures. |
1943 | */ | 1938 | */ |
1944 | int | 1939 | int |
1945 | xfs_qm_dqcheck( | 1940 | xfs_qm_dqcheck( |
1946 | xfs_disk_dquot_t *ddq, | 1941 | xfs_disk_dquot_t *ddq, |
1947 | xfs_dqid_t id, | 1942 | xfs_dqid_t id, |
1948 | uint type, /* used only when IO_dorepair is true */ | 1943 | uint type, /* used only when IO_dorepair is true */ |
1949 | uint flags, | 1944 | uint flags, |
1950 | char *str) | 1945 | char *str) |
1951 | { | 1946 | { |
1952 | xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; | 1947 | xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; |
1953 | int errs = 0; | 1948 | int errs = 0; |
1954 | 1949 | ||
1955 | /* | 1950 | /* |
1956 | * We can encounter an uninitialized dquot buffer for 2 reasons: | 1951 | * We can encounter an uninitialized dquot buffer for 2 reasons: |
1957 | * 1. If we crash while deleting the quotainode(s), and those blks got | 1952 | * 1. If we crash while deleting the quotainode(s), and those blks got |
1958 | * used for user data. This is because we take the path of regular | 1953 | * used for user data. This is because we take the path of regular |
1959 | * file deletion; however, the size field of quotainodes is never | 1954 | * file deletion; however, the size field of quotainodes is never |
1960 | * updated, so all the tricks that we play in itruncate_finish | 1955 | * updated, so all the tricks that we play in itruncate_finish |
1961 | * don't quite matter. | 1956 | * don't quite matter. |
1962 | * | 1957 | * |
1963 | * 2. We don't play the quota buffers when there's a quotaoff logitem. | 1958 | * 2. We don't play the quota buffers when there's a quotaoff logitem. |
1964 | * But the allocation will be replayed so we'll end up with an | 1959 | * But the allocation will be replayed so we'll end up with an |
1965 | * uninitialized quota block. | 1960 | * uninitialized quota block. |
1966 | * | 1961 | * |
1967 | * This is all fine; things are still consistent, and we haven't lost | 1962 | * This is all fine; things are still consistent, and we haven't lost |
1968 | * any quota information. Just don't complain about bad dquot blks. | 1963 | * any quota information. Just don't complain about bad dquot blks. |
1969 | */ | 1964 | */ |
1970 | if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { | 1965 | if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { |
1971 | if (flags & XFS_QMOPT_DOWARN) | 1966 | if (flags & XFS_QMOPT_DOWARN) |
1972 | cmn_err(CE_ALERT, | 1967 | cmn_err(CE_ALERT, |
1973 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", | 1968 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", |
1974 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); | 1969 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); |
1975 | errs++; | 1970 | errs++; |
1976 | } | 1971 | } |
1977 | if (ddq->d_version != XFS_DQUOT_VERSION) { | 1972 | if (ddq->d_version != XFS_DQUOT_VERSION) { |
1978 | if (flags & XFS_QMOPT_DOWARN) | 1973 | if (flags & XFS_QMOPT_DOWARN) |
1979 | cmn_err(CE_ALERT, | 1974 | cmn_err(CE_ALERT, |
1980 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", | 1975 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", |
1981 | str, id, ddq->d_version, XFS_DQUOT_VERSION); | 1976 | str, id, ddq->d_version, XFS_DQUOT_VERSION); |
1982 | errs++; | 1977 | errs++; |
1983 | } | 1978 | } |
1984 | 1979 | ||
1985 | if (ddq->d_flags != XFS_DQ_USER && | 1980 | if (ddq->d_flags != XFS_DQ_USER && |
1986 | ddq->d_flags != XFS_DQ_PROJ && | 1981 | ddq->d_flags != XFS_DQ_PROJ && |
1987 | ddq->d_flags != XFS_DQ_GROUP) { | 1982 | ddq->d_flags != XFS_DQ_GROUP) { |
1988 | if (flags & XFS_QMOPT_DOWARN) | 1983 | if (flags & XFS_QMOPT_DOWARN) |
1989 | cmn_err(CE_ALERT, | 1984 | cmn_err(CE_ALERT, |
1990 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", | 1985 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", |
1991 | str, id, ddq->d_flags); | 1986 | str, id, ddq->d_flags); |
1992 | errs++; | 1987 | errs++; |
1993 | } | 1988 | } |
1994 | 1989 | ||
1995 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { | 1990 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { |
1996 | if (flags & XFS_QMOPT_DOWARN) | 1991 | if (flags & XFS_QMOPT_DOWARN) |
1997 | cmn_err(CE_ALERT, | 1992 | cmn_err(CE_ALERT, |
1998 | "%s : ondisk-dquot 0x%p, ID mismatch: " | 1993 | "%s : ondisk-dquot 0x%p, ID mismatch: " |
1999 | "0x%x expected, found id 0x%x", | 1994 | "0x%x expected, found id 0x%x", |
2000 | str, ddq, id, be32_to_cpu(ddq->d_id)); | 1995 | str, ddq, id, be32_to_cpu(ddq->d_id)); |
2001 | errs++; | 1996 | errs++; |
2002 | } | 1997 | } |
2003 | 1998 | ||
2004 | if (!errs && ddq->d_id) { | 1999 | if (!errs && ddq->d_id) { |
2005 | if (ddq->d_blk_softlimit && | 2000 | if (ddq->d_blk_softlimit && |
2006 | be64_to_cpu(ddq->d_bcount) >= | 2001 | be64_to_cpu(ddq->d_bcount) >= |
2007 | be64_to_cpu(ddq->d_blk_softlimit)) { | 2002 | be64_to_cpu(ddq->d_blk_softlimit)) { |
2008 | if (!ddq->d_btimer) { | 2003 | if (!ddq->d_btimer) { |
2009 | if (flags & XFS_QMOPT_DOWARN) | 2004 | if (flags & XFS_QMOPT_DOWARN) |
2010 | cmn_err(CE_ALERT, | 2005 | cmn_err(CE_ALERT, |
2011 | "%s : Dquot ID 0x%x (0x%p) " | 2006 | "%s : Dquot ID 0x%x (0x%p) " |
2012 | "BLK TIMER NOT STARTED", | 2007 | "BLK TIMER NOT STARTED", |
2013 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2008 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2014 | errs++; | 2009 | errs++; |
2015 | } | 2010 | } |
2016 | } | 2011 | } |
2017 | if (ddq->d_ino_softlimit && | 2012 | if (ddq->d_ino_softlimit && |
2018 | be64_to_cpu(ddq->d_icount) >= | 2013 | be64_to_cpu(ddq->d_icount) >= |
2019 | be64_to_cpu(ddq->d_ino_softlimit)) { | 2014 | be64_to_cpu(ddq->d_ino_softlimit)) { |
2020 | if (!ddq->d_itimer) { | 2015 | if (!ddq->d_itimer) { |
2021 | if (flags & XFS_QMOPT_DOWARN) | 2016 | if (flags & XFS_QMOPT_DOWARN) |
2022 | cmn_err(CE_ALERT, | 2017 | cmn_err(CE_ALERT, |
2023 | "%s : Dquot ID 0x%x (0x%p) " | 2018 | "%s : Dquot ID 0x%x (0x%p) " |
2024 | "INODE TIMER NOT STARTED", | 2019 | "INODE TIMER NOT STARTED", |
2025 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2020 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2026 | errs++; | 2021 | errs++; |
2027 | } | 2022 | } |
2028 | } | 2023 | } |
2029 | if (ddq->d_rtb_softlimit && | 2024 | if (ddq->d_rtb_softlimit && |
2030 | be64_to_cpu(ddq->d_rtbcount) >= | 2025 | be64_to_cpu(ddq->d_rtbcount) >= |
2031 | be64_to_cpu(ddq->d_rtb_softlimit)) { | 2026 | be64_to_cpu(ddq->d_rtb_softlimit)) { |
2032 | if (!ddq->d_rtbtimer) { | 2027 | if (!ddq->d_rtbtimer) { |
2033 | if (flags & XFS_QMOPT_DOWARN) | 2028 | if (flags & XFS_QMOPT_DOWARN) |
2034 | cmn_err(CE_ALERT, | 2029 | cmn_err(CE_ALERT, |
2035 | "%s : Dquot ID 0x%x (0x%p) " | 2030 | "%s : Dquot ID 0x%x (0x%p) " |
2036 | "RTBLK TIMER NOT STARTED", | 2031 | "RTBLK TIMER NOT STARTED", |
2037 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2032 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2038 | errs++; | 2033 | errs++; |
2039 | } | 2034 | } |
2040 | } | 2035 | } |
2041 | } | 2036 | } |
2042 | 2037 | ||
2043 | if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) | 2038 | if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) |
2044 | return errs; | 2039 | return errs; |
2045 | 2040 | ||
2046 | if (flags & XFS_QMOPT_DOWARN) | 2041 | if (flags & XFS_QMOPT_DOWARN) |
2047 | cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); | 2042 | cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); |
2048 | 2043 | ||
2049 | /* | 2044 | /* |
2050 | * Typically, a repair is only requested by quotacheck. | 2045 | * Typically, a repair is only requested by quotacheck. |
2051 | */ | 2046 | */ |
2052 | ASSERT(id != -1); | 2047 | ASSERT(id != -1); |
2053 | ASSERT(flags & XFS_QMOPT_DQREPAIR); | 2048 | ASSERT(flags & XFS_QMOPT_DQREPAIR); |
2054 | memset(d, 0, sizeof(xfs_dqblk_t)); | 2049 | memset(d, 0, sizeof(xfs_dqblk_t)); |
2055 | 2050 | ||
2056 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); | 2051 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); |
2057 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; | 2052 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; |
2058 | d->dd_diskdq.d_flags = type; | 2053 | d->dd_diskdq.d_flags = type; |
2059 | d->dd_diskdq.d_id = cpu_to_be32(id); | 2054 | d->dd_diskdq.d_id = cpu_to_be32(id); |
2060 | 2055 | ||
2061 | return errs; | 2056 | return errs; |
2062 | } | 2057 | } |
2063 | 2058 | ||
2064 | /* | 2059 | /* |
2065 | * Perform a dquot buffer recovery. | 2060 | * Perform a dquot buffer recovery. |
2066 | * Simple algorithm: if we have found a QUOTAOFF logitem of the same type | 2061 | * Simple algorithm: if we have found a QUOTAOFF logitem of the same type |
2067 | * (ie. USR or GRP), then just toss this buffer away; don't recover it. | 2062 | * (ie. USR or GRP), then just toss this buffer away; don't recover it. |
2068 | * Else, treat it as a regular buffer and do recovery. | 2063 | * Else, treat it as a regular buffer and do recovery. |
2069 | */ | 2064 | */ |
2070 | STATIC void | 2065 | STATIC void |
2071 | xlog_recover_do_dquot_buffer( | 2066 | xlog_recover_do_dquot_buffer( |
2072 | xfs_mount_t *mp, | 2067 | xfs_mount_t *mp, |
2073 | xlog_t *log, | 2068 | xlog_t *log, |
2074 | xlog_recover_item_t *item, | 2069 | xlog_recover_item_t *item, |
2075 | xfs_buf_t *bp, | 2070 | xfs_buf_t *bp, |
2076 | xfs_buf_log_format_t *buf_f) | 2071 | xfs_buf_log_format_t *buf_f) |
2077 | { | 2072 | { |
2078 | uint type; | 2073 | uint type; |
2079 | 2074 | ||
2080 | /* | 2075 | /* |
2081 | * Filesystems are required to send in quota flags at mount time. | 2076 | * Filesystems are required to send in quota flags at mount time. |
2082 | */ | 2077 | */ |
2083 | if (mp->m_qflags == 0) { | 2078 | if (mp->m_qflags == 0) { |
2084 | return; | 2079 | return; |
2085 | } | 2080 | } |
2086 | 2081 | ||
2087 | type = 0; | 2082 | type = 0; |
2088 | if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) | 2083 | if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) |
2089 | type |= XFS_DQ_USER; | 2084 | type |= XFS_DQ_USER; |
2090 | if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) | 2085 | if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) |
2091 | type |= XFS_DQ_PROJ; | 2086 | type |= XFS_DQ_PROJ; |
2092 | if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) | 2087 | if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) |
2093 | type |= XFS_DQ_GROUP; | 2088 | type |= XFS_DQ_GROUP; |
2094 | /* | 2089 | /* |
2095 | * This type of quotas was turned off, so ignore this buffer | 2090 | * This type of quotas was turned off, so ignore this buffer |
2096 | */ | 2091 | */ |
2097 | if (log->l_quotaoffs_flag & type) | 2092 | if (log->l_quotaoffs_flag & type) |
2098 | return; | 2093 | return; |
2099 | 2094 | ||
2100 | xlog_recover_do_reg_buffer(item, bp, buf_f); | 2095 | xlog_recover_do_reg_buffer(item, bp, buf_f); |
2101 | } | 2096 | } |
2102 | 2097 | ||
2103 | /* | 2098 | /* |
2104 | * This routine replays a modification made to a buffer at runtime. | 2099 | * This routine replays a modification made to a buffer at runtime. |
2105 | * There are actually two types of buffer, regular and inode, which | 2100 | * There are actually two types of buffer, regular and inode, which |
2106 | * are handled differently. Inode buffers are handled differently | 2101 | * are handled differently. Inode buffers are handled differently |
2107 | * in that we only recover a specific set of data from them, namely | 2102 | * in that we only recover a specific set of data from them, namely |
2108 | * the inode di_next_unlinked fields. This is because all other inode | 2103 | * the inode di_next_unlinked fields. This is because all other inode |
2109 | * data is actually logged via inode records and any data we replay | 2104 | * data is actually logged via inode records and any data we replay |
2110 | * here which overlaps that may be stale. | 2105 | * here which overlaps that may be stale. |
2111 | * | 2106 | * |
2112 | * When meta-data buffers are freed at run time we log a buffer item | 2107 | * When meta-data buffers are freed at run time we log a buffer item |
2113 | * with the XFS_BLI_CANCEL bit set to indicate that previous copies | 2108 | * with the XFS_BLI_CANCEL bit set to indicate that previous copies |
2114 | * of the buffer in the log should not be replayed at recovery time. | 2109 | * of the buffer in the log should not be replayed at recovery time. |
2115 | * This is so that if the blocks covered by the buffer are reused for | 2110 | * This is so that if the blocks covered by the buffer are reused for |
2116 | * file data before we crash we don't end up replaying old, freed | 2111 | * file data before we crash we don't end up replaying old, freed |
2117 | * meta-data into a user's file. | 2112 | * meta-data into a user's file. |
2118 | * | 2113 | * |
2119 | * To handle the cancellation of buffer log items, we make two passes | 2114 | * To handle the cancellation of buffer log items, we make two passes |
2120 | * over the log during recovery. During the first we build a table of | 2115 | * over the log during recovery. During the first we build a table of |
2121 | * those buffers which have been cancelled, and during the second we | 2116 | * those buffers which have been cancelled, and during the second we |
2122 | * only replay those buffers which do not have corresponding cancel | 2117 | * only replay those buffers which do not have corresponding cancel |
2123 | * records in the table. See xlog_recover_do_buffer_pass[1,2] above | 2118 | * records in the table. See xlog_recover_do_buffer_pass[1,2] above |
2124 | * for more details on the implementation of the table of cancel records. | 2119 | * for more details on the implementation of the table of cancel records. |
2125 | */ | 2120 | */ |
2126 | STATIC int | 2121 | STATIC int |
2127 | xlog_recover_do_buffer_trans( | 2122 | xlog_recover_do_buffer_trans( |
2128 | xlog_t *log, | 2123 | xlog_t *log, |
2129 | xlog_recover_item_t *item, | 2124 | xlog_recover_item_t *item, |
2130 | int pass) | 2125 | int pass) |
2131 | { | 2126 | { |
2132 | xfs_buf_log_format_t *buf_f; | 2127 | xfs_buf_log_format_t *buf_f; |
2133 | xfs_mount_t *mp; | 2128 | xfs_mount_t *mp; |
2134 | xfs_buf_t *bp; | 2129 | xfs_buf_t *bp; |
2135 | int error; | 2130 | int error; |
2136 | int cancel; | 2131 | int cancel; |
2137 | xfs_daddr_t blkno; | 2132 | xfs_daddr_t blkno; |
2138 | int len; | 2133 | int len; |
2139 | ushort flags; | 2134 | ushort flags; |
2140 | 2135 | ||
2141 | buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; | 2136 | buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; |
2142 | 2137 | ||
2143 | if (pass == XLOG_RECOVER_PASS1) { | 2138 | if (pass == XLOG_RECOVER_PASS1) { |
2144 | /* | 2139 | /* |
2145 | * In this pass we're only looking for buf items | 2140 | * In this pass we're only looking for buf items |
2146 | * with the XFS_BLI_CANCEL bit set. | 2141 | * with the XFS_BLI_CANCEL bit set. |
2147 | */ | 2142 | */ |
2148 | xlog_recover_do_buffer_pass1(log, buf_f); | 2143 | xlog_recover_do_buffer_pass1(log, buf_f); |
2149 | return 0; | 2144 | return 0; |
2150 | } else { | 2145 | } else { |
2151 | /* | 2146 | /* |
2152 | * In this pass we want to recover all the buffers | 2147 | * In this pass we want to recover all the buffers |
2153 | * which have not been cancelled and are not | 2148 | * which have not been cancelled and are not |
2154 | * cancellation buffers themselves. The routine | 2149 | * cancellation buffers themselves. The routine |
2155 | * we call here will tell us whether or not to | 2150 | * we call here will tell us whether or not to |
2156 | * continue with the replay of this buffer. | 2151 | * continue with the replay of this buffer. |
2157 | */ | 2152 | */ |
2158 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); | 2153 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); |
2159 | if (cancel) { | 2154 | if (cancel) { |
2160 | return 0; | 2155 | return 0; |
2161 | } | 2156 | } |
2162 | } | 2157 | } |
2163 | switch (buf_f->blf_type) { | 2158 | switch (buf_f->blf_type) { |
2164 | case XFS_LI_BUF: | 2159 | case XFS_LI_BUF: |
2165 | blkno = buf_f->blf_blkno; | 2160 | blkno = buf_f->blf_blkno; |
2166 | len = buf_f->blf_len; | 2161 | len = buf_f->blf_len; |
2167 | flags = buf_f->blf_flags; | 2162 | flags = buf_f->blf_flags; |
2168 | break; | 2163 | break; |
2169 | default: | 2164 | default: |
2170 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, | 2165 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, |
2171 | "xfs_log_recover: unknown buffer type 0x%x, logdev %s", | 2166 | "xfs_log_recover: unknown buffer type 0x%x, logdev %s", |
2172 | buf_f->blf_type, log->l_mp->m_logname ? | 2167 | buf_f->blf_type, log->l_mp->m_logname ? |
2173 | log->l_mp->m_logname : "internal"); | 2168 | log->l_mp->m_logname : "internal"); |
2174 | XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", | 2169 | XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", |
2175 | XFS_ERRLEVEL_LOW, log->l_mp); | 2170 | XFS_ERRLEVEL_LOW, log->l_mp); |
2176 | return XFS_ERROR(EFSCORRUPTED); | 2171 | return XFS_ERROR(EFSCORRUPTED); |
2177 | } | 2172 | } |
2178 | 2173 | ||
2179 | mp = log->l_mp; | 2174 | mp = log->l_mp; |
2180 | if (flags & XFS_BLI_INODE_BUF) { | 2175 | if (flags & XFS_BLI_INODE_BUF) { |
2181 | bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, | 2176 | bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, |
2182 | XFS_BUF_LOCK); | 2177 | XFS_BUF_LOCK); |
2183 | } else { | 2178 | } else { |
2184 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); | 2179 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); |
2185 | } | 2180 | } |
2186 | if (XFS_BUF_ISERROR(bp)) { | 2181 | if (XFS_BUF_ISERROR(bp)) { |
2187 | xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, | 2182 | xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, |
2188 | bp, blkno); | 2183 | bp, blkno); |
2189 | error = XFS_BUF_GETERROR(bp); | 2184 | error = XFS_BUF_GETERROR(bp); |
2190 | xfs_buf_relse(bp); | 2185 | xfs_buf_relse(bp); |
2191 | return error; | 2186 | return error; |
2192 | } | 2187 | } |
2193 | 2188 | ||
2194 | error = 0; | 2189 | error = 0; |
2195 | if (flags & XFS_BLI_INODE_BUF) { | 2190 | if (flags & XFS_BLI_INODE_BUF) { |
2196 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2191 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2197 | } else if (flags & | 2192 | } else if (flags & |
2198 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 2193 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { |
2199 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2194 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
2200 | } else { | 2195 | } else { |
2201 | xlog_recover_do_reg_buffer(item, bp, buf_f); | 2196 | xlog_recover_do_reg_buffer(item, bp, buf_f); |
2202 | } | 2197 | } |
2203 | if (error) | 2198 | if (error) |
2204 | return XFS_ERROR(error); | 2199 | return XFS_ERROR(error); |
2205 | 2200 | ||
2206 | /* | 2201 | /* |
2207 | * Perform delayed write on the buffer. Asynchronous writes will be | 2202 | * Perform delayed write on the buffer. Asynchronous writes will be |
2208 | * slower when taking into account all the buffers to be flushed. | 2203 | * slower when taking into account all the buffers to be flushed. |
2209 | * | 2204 | * |
2210 | * Also make sure that only inode buffers with good sizes stay in | 2205 | * Also make sure that only inode buffers with good sizes stay in |
2211 | * the buffer cache. The kernel moves inodes in buffers of 1 block | 2206 | * the buffer cache. The kernel moves inodes in buffers of 1 block |
2212 | * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode | 2207 | * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode |
2213 | * buffers in the log can be a different size if the log was generated | 2208 | * buffers in the log can be a different size if the log was generated |
2214 | * by an older kernel using unclustered inode buffers or a newer kernel | 2209 | * by an older kernel using unclustered inode buffers or a newer kernel |
2215 | * running with a different inode cluster size. Regardless, if the | 2210 | * running with a different inode cluster size. Regardless, if the |
2216 | * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) | 2211 | * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) |
2217 | * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep | 2212 | * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep |
2218 | * the buffer out of the buffer cache so that the buffer won't | 2213 | * the buffer out of the buffer cache so that the buffer won't |
2219 | * overlap with future reads of those inodes. | 2214 | * overlap with future reads of those inodes. |
2220 | */ | 2215 | */ |
2221 | if (XFS_DINODE_MAGIC == | 2216 | if (XFS_DINODE_MAGIC == |
2222 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && | 2217 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && |
2223 | (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, | 2218 | (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, |
2224 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { | 2219 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { |
2225 | XFS_BUF_STALE(bp); | 2220 | XFS_BUF_STALE(bp); |
2226 | error = xfs_bwrite(mp, bp); | 2221 | error = xfs_bwrite(mp, bp); |
2227 | } else { | 2222 | } else { |
2228 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || | 2223 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); |
2229 | XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); | 2224 | bp->b_mount = mp; |
2230 | XFS_BUF_SET_FSPRIVATE(bp, mp); | ||
2231 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2225 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2232 | xfs_bdwrite(mp, bp); | 2226 | xfs_bdwrite(mp, bp); |
2233 | } | 2227 | } |
2234 | 2228 | ||
2235 | return (error); | 2229 | return (error); |
2236 | } | 2230 | } |
2237 | 2231 | ||
2238 | STATIC int | 2232 | STATIC int |
2239 | xlog_recover_do_inode_trans( | 2233 | xlog_recover_do_inode_trans( |
2240 | xlog_t *log, | 2234 | xlog_t *log, |
2241 | xlog_recover_item_t *item, | 2235 | xlog_recover_item_t *item, |
2242 | int pass) | 2236 | int pass) |
2243 | { | 2237 | { |
2244 | xfs_inode_log_format_t *in_f; | 2238 | xfs_inode_log_format_t *in_f; |
2245 | xfs_mount_t *mp; | 2239 | xfs_mount_t *mp; |
2246 | xfs_buf_t *bp; | 2240 | xfs_buf_t *bp; |
2247 | xfs_dinode_t *dip; | 2241 | xfs_dinode_t *dip; |
2248 | xfs_ino_t ino; | 2242 | xfs_ino_t ino; |
2249 | int len; | 2243 | int len; |
2250 | xfs_caddr_t src; | 2244 | xfs_caddr_t src; |
2251 | xfs_caddr_t dest; | 2245 | xfs_caddr_t dest; |
2252 | int error; | 2246 | int error; |
2253 | int attr_index; | 2247 | int attr_index; |
2254 | uint fields; | 2248 | uint fields; |
2255 | xfs_icdinode_t *dicp; | 2249 | xfs_icdinode_t *dicp; |
2256 | int need_free = 0; | 2250 | int need_free = 0; |
2257 | 2251 | ||
2258 | if (pass == XLOG_RECOVER_PASS1) { | 2252 | if (pass == XLOG_RECOVER_PASS1) { |
2259 | return 0; | 2253 | return 0; |
2260 | } | 2254 | } |
2261 | 2255 | ||
2262 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { | 2256 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { |
2263 | in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; | 2257 | in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; |
2264 | } else { | 2258 | } else { |
2265 | in_f = (xfs_inode_log_format_t *)kmem_alloc( | 2259 | in_f = (xfs_inode_log_format_t *)kmem_alloc( |
2266 | sizeof(xfs_inode_log_format_t), KM_SLEEP); | 2260 | sizeof(xfs_inode_log_format_t), KM_SLEEP); |
2267 | need_free = 1; | 2261 | need_free = 1; |
2268 | error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); | 2262 | error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); |
2269 | if (error) | 2263 | if (error) |
2270 | goto error; | 2264 | goto error; |
2271 | } | 2265 | } |
2272 | ino = in_f->ilf_ino; | 2266 | ino = in_f->ilf_ino; |
2273 | mp = log->l_mp; | 2267 | mp = log->l_mp; |
2274 | 2268 | ||
2275 | /* | 2269 | /* |
2276 | * Inode buffers can be freed, look out for it, | 2270 | * Inode buffers can be freed, look out for it, |
2277 | * and do not replay the inode. | 2271 | * and do not replay the inode. |
2278 | */ | 2272 | */ |
2279 | if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, | 2273 | if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, |
2280 | in_f->ilf_len, 0)) { | 2274 | in_f->ilf_len, 0)) { |
2281 | error = 0; | 2275 | error = 0; |
2282 | goto error; | 2276 | goto error; |
2283 | } | 2277 | } |
2284 | 2278 | ||
2285 | bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, | 2279 | bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, |
2286 | in_f->ilf_len, XFS_BUF_LOCK); | 2280 | in_f->ilf_len, XFS_BUF_LOCK); |
2287 | if (XFS_BUF_ISERROR(bp)) { | 2281 | if (XFS_BUF_ISERROR(bp)) { |
2288 | xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, | 2282 | xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, |
2289 | bp, in_f->ilf_blkno); | 2283 | bp, in_f->ilf_blkno); |
2290 | error = XFS_BUF_GETERROR(bp); | 2284 | error = XFS_BUF_GETERROR(bp); |
2291 | xfs_buf_relse(bp); | 2285 | xfs_buf_relse(bp); |
2292 | goto error; | 2286 | goto error; |
2293 | } | 2287 | } |
2294 | error = 0; | 2288 | error = 0; |
2295 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); | 2289 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); |
2296 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); | 2290 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); |
2297 | 2291 | ||
2298 | /* | 2292 | /* |
2299 | * Make sure the place we're flushing out to really looks | 2293 | * Make sure the place we're flushing out to really looks |
2300 | * like an inode! | 2294 | * like an inode! |
2301 | */ | 2295 | */ |
2302 | if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { | 2296 | if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { |
2303 | xfs_buf_relse(bp); | 2297 | xfs_buf_relse(bp); |
2304 | xfs_fs_cmn_err(CE_ALERT, mp, | 2298 | xfs_fs_cmn_err(CE_ALERT, mp, |
2305 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", | 2299 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", |
2306 | dip, bp, ino); | 2300 | dip, bp, ino); |
2307 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", | 2301 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", |
2308 | XFS_ERRLEVEL_LOW, mp); | 2302 | XFS_ERRLEVEL_LOW, mp); |
2309 | error = EFSCORRUPTED; | 2303 | error = EFSCORRUPTED; |
2310 | goto error; | 2304 | goto error; |
2311 | } | 2305 | } |
2312 | dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr); | 2306 | dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr); |
2313 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { | 2307 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { |
2314 | xfs_buf_relse(bp); | 2308 | xfs_buf_relse(bp); |
2315 | xfs_fs_cmn_err(CE_ALERT, mp, | 2309 | xfs_fs_cmn_err(CE_ALERT, mp, |
2316 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", | 2310 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", |
2317 | item, ino); | 2311 | item, ino); |
2318 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", | 2312 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", |
2319 | XFS_ERRLEVEL_LOW, mp); | 2313 | XFS_ERRLEVEL_LOW, mp); |
2320 | error = EFSCORRUPTED; | 2314 | error = EFSCORRUPTED; |
2321 | goto error; | 2315 | goto error; |
2322 | } | 2316 | } |
2323 | 2317 | ||
2324 | /* Skip replay when the on disk inode is newer than the log one */ | 2318 | /* Skip replay when the on disk inode is newer than the log one */ |
2325 | if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { | 2319 | if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { |
2326 | /* | 2320 | /* |
2327 | * Deal with the wrap case, DI_MAX_FLUSH is less | 2321 | * Deal with the wrap case, DI_MAX_FLUSH is less |
2328 | * than smaller numbers | 2322 | * than smaller numbers |
2329 | */ | 2323 | */ |
2330 | if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && | 2324 | if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && |
2331 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { | 2325 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { |
2332 | /* do nothing */ | 2326 | /* do nothing */ |
2333 | } else { | 2327 | } else { |
2334 | xfs_buf_relse(bp); | 2328 | xfs_buf_relse(bp); |
2335 | error = 0; | 2329 | error = 0; |
2336 | goto error; | 2330 | goto error; |
2337 | } | 2331 | } |
2338 | } | 2332 | } |
2339 | /* Take the opportunity to reset the flush iteration count */ | 2333 | /* Take the opportunity to reset the flush iteration count */ |
2340 | dicp->di_flushiter = 0; | 2334 | dicp->di_flushiter = 0; |
2341 | 2335 | ||
2342 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { | 2336 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { |
2343 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2337 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2344 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2338 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
2345 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", | 2339 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", |
2346 | XFS_ERRLEVEL_LOW, mp, dicp); | 2340 | XFS_ERRLEVEL_LOW, mp, dicp); |
2347 | xfs_buf_relse(bp); | 2341 | xfs_buf_relse(bp); |
2348 | xfs_fs_cmn_err(CE_ALERT, mp, | 2342 | xfs_fs_cmn_err(CE_ALERT, mp, |
2349 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2343 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2350 | item, dip, bp, ino); | 2344 | item, dip, bp, ino); |
2351 | error = EFSCORRUPTED; | 2345 | error = EFSCORRUPTED; |
2352 | goto error; | 2346 | goto error; |
2353 | } | 2347 | } |
2354 | } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { | 2348 | } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { |
2355 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2349 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2356 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && | 2350 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && |
2357 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2351 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
2358 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", | 2352 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", |
2359 | XFS_ERRLEVEL_LOW, mp, dicp); | 2353 | XFS_ERRLEVEL_LOW, mp, dicp); |
2360 | xfs_buf_relse(bp); | 2354 | xfs_buf_relse(bp); |
2361 | xfs_fs_cmn_err(CE_ALERT, mp, | 2355 | xfs_fs_cmn_err(CE_ALERT, mp, |
2362 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2356 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2363 | item, dip, bp, ino); | 2357 | item, dip, bp, ino); |
2364 | error = EFSCORRUPTED; | 2358 | error = EFSCORRUPTED; |
2365 | goto error; | 2359 | goto error; |
2366 | } | 2360 | } |
2367 | } | 2361 | } |
2368 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ | 2362 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ |
2369 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", | 2363 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", |
2370 | XFS_ERRLEVEL_LOW, mp, dicp); | 2364 | XFS_ERRLEVEL_LOW, mp, dicp); |
2371 | xfs_buf_relse(bp); | 2365 | xfs_buf_relse(bp); |
2372 | xfs_fs_cmn_err(CE_ALERT, mp, | 2366 | xfs_fs_cmn_err(CE_ALERT, mp, |
2373 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", | 2367 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", |
2374 | item, dip, bp, ino, | 2368 | item, dip, bp, ino, |
2375 | dicp->di_nextents + dicp->di_anextents, | 2369 | dicp->di_nextents + dicp->di_anextents, |
2376 | dicp->di_nblocks); | 2370 | dicp->di_nblocks); |
2377 | error = EFSCORRUPTED; | 2371 | error = EFSCORRUPTED; |
2378 | goto error; | 2372 | goto error; |
2379 | } | 2373 | } |
2380 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { | 2374 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { |
2381 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", | 2375 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", |
2382 | XFS_ERRLEVEL_LOW, mp, dicp); | 2376 | XFS_ERRLEVEL_LOW, mp, dicp); |
2383 | xfs_buf_relse(bp); | 2377 | xfs_buf_relse(bp); |
2384 | xfs_fs_cmn_err(CE_ALERT, mp, | 2378 | xfs_fs_cmn_err(CE_ALERT, mp, |
2385 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", | 2379 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", |
2386 | item, dip, bp, ino, dicp->di_forkoff); | 2380 | item, dip, bp, ino, dicp->di_forkoff); |
2387 | error = EFSCORRUPTED; | 2381 | error = EFSCORRUPTED; |
2388 | goto error; | 2382 | goto error; |
2389 | } | 2383 | } |
2390 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { | 2384 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { |
2391 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", | 2385 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", |
2392 | XFS_ERRLEVEL_LOW, mp, dicp); | 2386 | XFS_ERRLEVEL_LOW, mp, dicp); |
2393 | xfs_buf_relse(bp); | 2387 | xfs_buf_relse(bp); |
2394 | xfs_fs_cmn_err(CE_ALERT, mp, | 2388 | xfs_fs_cmn_err(CE_ALERT, mp, |
2395 | "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", | 2389 | "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", |
2396 | item->ri_buf[1].i_len, item); | 2390 | item->ri_buf[1].i_len, item); |
2397 | error = EFSCORRUPTED; | 2391 | error = EFSCORRUPTED; |
2398 | goto error; | 2392 | goto error; |
2399 | } | 2393 | } |
2400 | 2394 | ||
2401 | /* The core is in in-core format */ | 2395 | /* The core is in in-core format */ |
2402 | xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); | 2396 | xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); |
2403 | 2397 | ||
2404 | /* the rest is in on-disk format */ | 2398 | /* the rest is in on-disk format */ |
2405 | if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { | 2399 | if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { |
2406 | memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), | 2400 | memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), |
2407 | item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), | 2401 | item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), |
2408 | item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); | 2402 | item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); |
2409 | } | 2403 | } |
2410 | 2404 | ||
2411 | fields = in_f->ilf_fields; | 2405 | fields = in_f->ilf_fields; |
2412 | switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { | 2406 | switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { |
2413 | case XFS_ILOG_DEV: | 2407 | case XFS_ILOG_DEV: |
2414 | xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); | 2408 | xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); |
2415 | break; | 2409 | break; |
2416 | case XFS_ILOG_UUID: | 2410 | case XFS_ILOG_UUID: |
2417 | memcpy(XFS_DFORK_DPTR(dip), | 2411 | memcpy(XFS_DFORK_DPTR(dip), |
2418 | &in_f->ilf_u.ilfu_uuid, | 2412 | &in_f->ilf_u.ilfu_uuid, |
2419 | sizeof(uuid_t)); | 2413 | sizeof(uuid_t)); |
2420 | break; | 2414 | break; |
2421 | } | 2415 | } |
2422 | 2416 | ||
2423 | if (in_f->ilf_size == 2) | 2417 | if (in_f->ilf_size == 2) |
2424 | goto write_inode_buffer; | 2418 | goto write_inode_buffer; |
2425 | len = item->ri_buf[2].i_len; | 2419 | len = item->ri_buf[2].i_len; |
2426 | src = item->ri_buf[2].i_addr; | 2420 | src = item->ri_buf[2].i_addr; |
2427 | ASSERT(in_f->ilf_size <= 4); | 2421 | ASSERT(in_f->ilf_size <= 4); |
2428 | ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); | 2422 | ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); |
2429 | ASSERT(!(fields & XFS_ILOG_DFORK) || | 2423 | ASSERT(!(fields & XFS_ILOG_DFORK) || |
2430 | (len == in_f->ilf_dsize)); | 2424 | (len == in_f->ilf_dsize)); |
2431 | 2425 | ||
2432 | switch (fields & XFS_ILOG_DFORK) { | 2426 | switch (fields & XFS_ILOG_DFORK) { |
2433 | case XFS_ILOG_DDATA: | 2427 | case XFS_ILOG_DDATA: |
2434 | case XFS_ILOG_DEXT: | 2428 | case XFS_ILOG_DEXT: |
2435 | memcpy(XFS_DFORK_DPTR(dip), src, len); | 2429 | memcpy(XFS_DFORK_DPTR(dip), src, len); |
2436 | break; | 2430 | break; |
2437 | 2431 | ||
2438 | case XFS_ILOG_DBROOT: | 2432 | case XFS_ILOG_DBROOT: |
2439 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, | 2433 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, |
2440 | (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), | 2434 | (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), |
2441 | XFS_DFORK_DSIZE(dip, mp)); | 2435 | XFS_DFORK_DSIZE(dip, mp)); |
2442 | break; | 2436 | break; |
2443 | 2437 | ||
2444 | default: | 2438 | default: |
2445 | /* | 2439 | /* |
2446 | * There are no data fork flags set. | 2440 | * There are no data fork flags set. |
2447 | */ | 2441 | */ |
2448 | ASSERT((fields & XFS_ILOG_DFORK) == 0); | 2442 | ASSERT((fields & XFS_ILOG_DFORK) == 0); |
2449 | break; | 2443 | break; |
2450 | } | 2444 | } |
2451 | 2445 | ||
2452 | /* | 2446 | /* |
2453 | * If we logged any attribute data, recover it. There may or | 2447 | * If we logged any attribute data, recover it. There may or |
2454 | * may not have been any other non-core data logged in this | 2448 | * may not have been any other non-core data logged in this |
2455 | * transaction. | 2449 | * transaction. |
2456 | */ | 2450 | */ |
2457 | if (in_f->ilf_fields & XFS_ILOG_AFORK) { | 2451 | if (in_f->ilf_fields & XFS_ILOG_AFORK) { |
2458 | if (in_f->ilf_fields & XFS_ILOG_DFORK) { | 2452 | if (in_f->ilf_fields & XFS_ILOG_DFORK) { |
2459 | attr_index = 3; | 2453 | attr_index = 3; |
2460 | } else { | 2454 | } else { |
2461 | attr_index = 2; | 2455 | attr_index = 2; |
2462 | } | 2456 | } |
2463 | len = item->ri_buf[attr_index].i_len; | 2457 | len = item->ri_buf[attr_index].i_len; |
2464 | src = item->ri_buf[attr_index].i_addr; | 2458 | src = item->ri_buf[attr_index].i_addr; |
2465 | ASSERT(len == in_f->ilf_asize); | 2459 | ASSERT(len == in_f->ilf_asize); |
2466 | 2460 | ||
2467 | switch (in_f->ilf_fields & XFS_ILOG_AFORK) { | 2461 | switch (in_f->ilf_fields & XFS_ILOG_AFORK) { |
2468 | case XFS_ILOG_ADATA: | 2462 | case XFS_ILOG_ADATA: |
2469 | case XFS_ILOG_AEXT: | 2463 | case XFS_ILOG_AEXT: |
2470 | dest = XFS_DFORK_APTR(dip); | 2464 | dest = XFS_DFORK_APTR(dip); |
2471 | ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); | 2465 | ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); |
2472 | memcpy(dest, src, len); | 2466 | memcpy(dest, src, len); |
2473 | break; | 2467 | break; |
2474 | 2468 | ||
2475 | case XFS_ILOG_ABROOT: | 2469 | case XFS_ILOG_ABROOT: |
2476 | dest = XFS_DFORK_APTR(dip); | 2470 | dest = XFS_DFORK_APTR(dip); |
2477 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, | 2471 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, |
2478 | len, (xfs_bmdr_block_t*)dest, | 2472 | len, (xfs_bmdr_block_t*)dest, |
2479 | XFS_DFORK_ASIZE(dip, mp)); | 2473 | XFS_DFORK_ASIZE(dip, mp)); |
2480 | break; | 2474 | break; |
2481 | 2475 | ||
2482 | default: | 2476 | default: |
2483 | xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); | 2477 | xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); |
2484 | ASSERT(0); | 2478 | ASSERT(0); |
2485 | xfs_buf_relse(bp); | 2479 | xfs_buf_relse(bp); |
2486 | error = EIO; | 2480 | error = EIO; |
2487 | goto error; | 2481 | goto error; |
2488 | } | 2482 | } |
2489 | } | 2483 | } |
2490 | 2484 | ||
2491 | write_inode_buffer: | 2485 | write_inode_buffer: |
2492 | if (ITEM_TYPE(item) == XFS_LI_INODE) { | 2486 | if (ITEM_TYPE(item) == XFS_LI_INODE) { |
2493 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || | 2487 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); |
2494 | XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); | 2488 | bp->b_mount = mp; |
2495 | XFS_BUF_SET_FSPRIVATE(bp, mp); | ||
2496 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2489 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2497 | xfs_bdwrite(mp, bp); | 2490 | xfs_bdwrite(mp, bp); |
2498 | } else { | 2491 | } else { |
2499 | XFS_BUF_STALE(bp); | 2492 | XFS_BUF_STALE(bp); |
2500 | error = xfs_bwrite(mp, bp); | 2493 | error = xfs_bwrite(mp, bp); |
2501 | } | 2494 | } |
2502 | 2495 | ||
2503 | error: | 2496 | error: |
2504 | if (need_free) | 2497 | if (need_free) |
2505 | kmem_free(in_f); | 2498 | kmem_free(in_f); |
2506 | return XFS_ERROR(error); | 2499 | return XFS_ERROR(error); |
2507 | } | 2500 | } |
2508 | 2501 | ||
2509 | /* | 2502 | /* |
2510 | * Recover QUOTAOFF records. We simply make a note of it in the xlog_t | 2503 | * Recover QUOTAOFF records. We simply make a note of it in the xlog_t |
2511 | * structure, so that we know not to do any dquot item or dquot buffer recovery, | 2504 | * structure, so that we know not to do any dquot item or dquot buffer recovery, |
2512 | * of that type. | 2505 | * of that type. |
2513 | */ | 2506 | */ |
2514 | STATIC int | 2507 | STATIC int |
2515 | xlog_recover_do_quotaoff_trans( | 2508 | xlog_recover_do_quotaoff_trans( |
2516 | xlog_t *log, | 2509 | xlog_t *log, |
2517 | xlog_recover_item_t *item, | 2510 | xlog_recover_item_t *item, |
2518 | int pass) | 2511 | int pass) |
2519 | { | 2512 | { |
2520 | xfs_qoff_logformat_t *qoff_f; | 2513 | xfs_qoff_logformat_t *qoff_f; |
2521 | 2514 | ||
2522 | if (pass == XLOG_RECOVER_PASS2) { | 2515 | if (pass == XLOG_RECOVER_PASS2) { |
2523 | return (0); | 2516 | return (0); |
2524 | } | 2517 | } |
2525 | 2518 | ||
2526 | qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; | 2519 | qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; |
2527 | ASSERT(qoff_f); | 2520 | ASSERT(qoff_f); |
2528 | 2521 | ||
2529 | /* | 2522 | /* |
2530 | * The logitem format's flag tells us if this was user quotaoff, | 2523 | * The logitem format's flag tells us if this was user quotaoff, |
2531 | * group/project quotaoff or both. | 2524 | * group/project quotaoff or both. |
2532 | */ | 2525 | */ |
2533 | if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) | 2526 | if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) |
2534 | log->l_quotaoffs_flag |= XFS_DQ_USER; | 2527 | log->l_quotaoffs_flag |= XFS_DQ_USER; |
2535 | if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) | 2528 | if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) |
2536 | log->l_quotaoffs_flag |= XFS_DQ_PROJ; | 2529 | log->l_quotaoffs_flag |= XFS_DQ_PROJ; |
2537 | if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) | 2530 | if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) |
2538 | log->l_quotaoffs_flag |= XFS_DQ_GROUP; | 2531 | log->l_quotaoffs_flag |= XFS_DQ_GROUP; |
2539 | 2532 | ||
2540 | return (0); | 2533 | return (0); |
2541 | } | 2534 | } |
2542 | 2535 | ||
2543 | /* | 2536 | /* |
2544 | * Recover a dquot record | 2537 | * Recover a dquot record |
2545 | */ | 2538 | */ |
2546 | STATIC int | 2539 | STATIC int |
2547 | xlog_recover_do_dquot_trans( | 2540 | xlog_recover_do_dquot_trans( |
2548 | xlog_t *log, | 2541 | xlog_t *log, |
2549 | xlog_recover_item_t *item, | 2542 | xlog_recover_item_t *item, |
2550 | int pass) | 2543 | int pass) |
2551 | { | 2544 | { |
2552 | xfs_mount_t *mp; | 2545 | xfs_mount_t *mp; |
2553 | xfs_buf_t *bp; | 2546 | xfs_buf_t *bp; |
2554 | struct xfs_disk_dquot *ddq, *recddq; | 2547 | struct xfs_disk_dquot *ddq, *recddq; |
2555 | int error; | 2548 | int error; |
2556 | xfs_dq_logformat_t *dq_f; | 2549 | xfs_dq_logformat_t *dq_f; |
2557 | uint type; | 2550 | uint type; |
2558 | 2551 | ||
2559 | if (pass == XLOG_RECOVER_PASS1) { | 2552 | if (pass == XLOG_RECOVER_PASS1) { |
2560 | return 0; | 2553 | return 0; |
2561 | } | 2554 | } |
2562 | mp = log->l_mp; | 2555 | mp = log->l_mp; |
2563 | 2556 | ||
2564 | /* | 2557 | /* |
2565 | * Filesystems are required to send in quota flags at mount time. | 2558 | * Filesystems are required to send in quota flags at mount time. |
2566 | */ | 2559 | */ |
2567 | if (mp->m_qflags == 0) | 2560 | if (mp->m_qflags == 0) |
2568 | return (0); | 2561 | return (0); |
2569 | 2562 | ||
2570 | recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; | 2563 | recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; |
2571 | ASSERT(recddq); | 2564 | ASSERT(recddq); |
2572 | /* | 2565 | /* |
2573 | * This type of quotas was turned off, so ignore this record. | 2566 | * This type of quotas was turned off, so ignore this record. |
2574 | */ | 2567 | */ |
2575 | type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); | 2568 | type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); |
2576 | ASSERT(type); | 2569 | ASSERT(type); |
2577 | if (log->l_quotaoffs_flag & type) | 2570 | if (log->l_quotaoffs_flag & type) |
2578 | return (0); | 2571 | return (0); |
2579 | 2572 | ||
2580 | /* | 2573 | /* |
2581 | * At this point we know that quota was _not_ turned off. | 2574 | * At this point we know that quota was _not_ turned off. |
2582 | * Since the mount flags are not indicating to us otherwise, this | 2575 | * Since the mount flags are not indicating to us otherwise, this |
2583 | * must mean that quota is on, and the dquot needs to be replayed. | 2576 | * must mean that quota is on, and the dquot needs to be replayed. |
2584 | * Remember that we may not have fully recovered the superblock yet, | 2577 | * Remember that we may not have fully recovered the superblock yet, |
2585 | * so we can't do the usual trick of looking at the SB quota bits. | 2578 | * so we can't do the usual trick of looking at the SB quota bits. |
2586 | * | 2579 | * |
2587 | * The other possibility, of course, is that the quota subsystem was | 2580 | * The other possibility, of course, is that the quota subsystem was |
2588 | * removed since the last mount - ENOSYS. | 2581 | * removed since the last mount - ENOSYS. |
2589 | */ | 2582 | */ |
2590 | dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; | 2583 | dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; |
2591 | ASSERT(dq_f); | 2584 | ASSERT(dq_f); |
2592 | if ((error = xfs_qm_dqcheck(recddq, | 2585 | if ((error = xfs_qm_dqcheck(recddq, |
2593 | dq_f->qlf_id, | 2586 | dq_f->qlf_id, |
2594 | 0, XFS_QMOPT_DOWARN, | 2587 | 0, XFS_QMOPT_DOWARN, |
2595 | "xlog_recover_do_dquot_trans (log copy)"))) { | 2588 | "xlog_recover_do_dquot_trans (log copy)"))) { |
2596 | return XFS_ERROR(EIO); | 2589 | return XFS_ERROR(EIO); |
2597 | } | 2590 | } |
2598 | ASSERT(dq_f->qlf_len == 1); | 2591 | ASSERT(dq_f->qlf_len == 1); |
2599 | 2592 | ||
2600 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 2593 | error = xfs_read_buf(mp, mp->m_ddev_targp, |
2601 | dq_f->qlf_blkno, | 2594 | dq_f->qlf_blkno, |
2602 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), | 2595 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), |
2603 | 0, &bp); | 2596 | 0, &bp); |
2604 | if (error) { | 2597 | if (error) { |
2605 | xfs_ioerror_alert("xlog_recover_do..(read#3)", mp, | 2598 | xfs_ioerror_alert("xlog_recover_do..(read#3)", mp, |
2606 | bp, dq_f->qlf_blkno); | 2599 | bp, dq_f->qlf_blkno); |
2607 | return error; | 2600 | return error; |
2608 | } | 2601 | } |
2609 | ASSERT(bp); | 2602 | ASSERT(bp); |
2610 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); | 2603 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); |
2611 | 2604 | ||
2612 | /* | 2605 | /* |
2613 | * At least the magic num portion should be on disk because this | 2606 | * At least the magic num portion should be on disk because this |
2614 | * was among a chunk of dquots created earlier, and we did some | 2607 | * was among a chunk of dquots created earlier, and we did some |
2615 | * minimal initialization then. | 2608 | * minimal initialization then. |
2616 | */ | 2609 | */ |
2617 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, | 2610 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
2618 | "xlog_recover_do_dquot_trans")) { | 2611 | "xlog_recover_do_dquot_trans")) { |
2619 | xfs_buf_relse(bp); | 2612 | xfs_buf_relse(bp); |
2620 | return XFS_ERROR(EIO); | 2613 | return XFS_ERROR(EIO); |
2621 | } | 2614 | } |
2622 | 2615 | ||
2623 | memcpy(ddq, recddq, item->ri_buf[1].i_len); | 2616 | memcpy(ddq, recddq, item->ri_buf[1].i_len); |
2624 | 2617 | ||
2625 | ASSERT(dq_f->qlf_size == 2); | 2618 | ASSERT(dq_f->qlf_size == 2); |
2626 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || | 2619 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); |
2627 | XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); | 2620 | bp->b_mount = mp; |
2628 | XFS_BUF_SET_FSPRIVATE(bp, mp); | ||
2629 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2621 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2630 | xfs_bdwrite(mp, bp); | 2622 | xfs_bdwrite(mp, bp); |
2631 | 2623 | ||
2632 | return (0); | 2624 | return (0); |
2633 | } | 2625 | } |
2634 | 2626 | ||
2635 | /* | 2627 | /* |
2636 | * This routine is called to create an in-core extent free intent | 2628 | * This routine is called to create an in-core extent free intent |
2637 | * item from the efi format structure which was logged on disk. | 2629 | * item from the efi format structure which was logged on disk. |
2638 | * It allocates an in-core efi, copies the extents from the format | 2630 | * It allocates an in-core efi, copies the extents from the format |
2639 | * structure into it, and adds the efi to the AIL with the given | 2631 | * structure into it, and adds the efi to the AIL with the given |
2640 | * LSN. | 2632 | * LSN. |
2641 | */ | 2633 | */ |
2642 | STATIC int | 2634 | STATIC int |
2643 | xlog_recover_do_efi_trans( | 2635 | xlog_recover_do_efi_trans( |
2644 | xlog_t *log, | 2636 | xlog_t *log, |
2645 | xlog_recover_item_t *item, | 2637 | xlog_recover_item_t *item, |
2646 | xfs_lsn_t lsn, | 2638 | xfs_lsn_t lsn, |
2647 | int pass) | 2639 | int pass) |
2648 | { | 2640 | { |
2649 | int error; | 2641 | int error; |
2650 | xfs_mount_t *mp; | 2642 | xfs_mount_t *mp; |
2651 | xfs_efi_log_item_t *efip; | 2643 | xfs_efi_log_item_t *efip; |
2652 | xfs_efi_log_format_t *efi_formatp; | 2644 | xfs_efi_log_format_t *efi_formatp; |
2653 | 2645 | ||
2654 | if (pass == XLOG_RECOVER_PASS1) { | 2646 | if (pass == XLOG_RECOVER_PASS1) { |
2655 | return 0; | 2647 | return 0; |
2656 | } | 2648 | } |
2657 | 2649 | ||
2658 | efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; | 2650 | efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; |
2659 | 2651 | ||
2660 | mp = log->l_mp; | 2652 | mp = log->l_mp; |
2661 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); | 2653 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); |
2662 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), | 2654 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), |
2663 | &(efip->efi_format)))) { | 2655 | &(efip->efi_format)))) { |
2664 | xfs_efi_item_free(efip); | 2656 | xfs_efi_item_free(efip); |
2665 | return error; | 2657 | return error; |
2666 | } | 2658 | } |
2667 | efip->efi_next_extent = efi_formatp->efi_nextents; | 2659 | efip->efi_next_extent = efi_formatp->efi_nextents; |
2668 | efip->efi_flags |= XFS_EFI_COMMITTED; | 2660 | efip->efi_flags |= XFS_EFI_COMMITTED; |
2669 | 2661 | ||
2670 | spin_lock(&log->l_ailp->xa_lock); | 2662 | spin_lock(&log->l_ailp->xa_lock); |
2671 | /* | 2663 | /* |
2672 | * xfs_trans_ail_update() drops the AIL lock. | 2664 | * xfs_trans_ail_update() drops the AIL lock. |
2673 | */ | 2665 | */ |
2674 | xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); | 2666 | xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); |
2675 | return 0; | 2667 | return 0; |
2676 | } | 2668 | } |
2677 | 2669 | ||
2678 | 2670 | ||
2679 | /* | 2671 | /* |
2680 | * This routine is called when an efd format structure is found in | 2672 | * This routine is called when an efd format structure is found in |
2681 | * a committed transaction in the log. It's purpose is to cancel | 2673 | * a committed transaction in the log. It's purpose is to cancel |
2682 | * the corresponding efi if it was still in the log. To do this | 2674 | * the corresponding efi if it was still in the log. To do this |
2683 | * it searches the AIL for the efi with an id equal to that in the | 2675 | * it searches the AIL for the efi with an id equal to that in the |
2684 | * efd format structure. If we find it, we remove the efi from the | 2676 | * efd format structure. If we find it, we remove the efi from the |
2685 | * AIL and free it. | 2677 | * AIL and free it. |
2686 | */ | 2678 | */ |
2687 | STATIC void | 2679 | STATIC void |
2688 | xlog_recover_do_efd_trans( | 2680 | xlog_recover_do_efd_trans( |
2689 | xlog_t *log, | 2681 | xlog_t *log, |
2690 | xlog_recover_item_t *item, | 2682 | xlog_recover_item_t *item, |
2691 | int pass) | 2683 | int pass) |
2692 | { | 2684 | { |
2693 | xfs_efd_log_format_t *efd_formatp; | 2685 | xfs_efd_log_format_t *efd_formatp; |
2694 | xfs_efi_log_item_t *efip = NULL; | 2686 | xfs_efi_log_item_t *efip = NULL; |
2695 | xfs_log_item_t *lip; | 2687 | xfs_log_item_t *lip; |
2696 | __uint64_t efi_id; | 2688 | __uint64_t efi_id; |
2697 | struct xfs_ail_cursor cur; | 2689 | struct xfs_ail_cursor cur; |
2698 | struct xfs_ail *ailp = log->l_ailp; | 2690 | struct xfs_ail *ailp = log->l_ailp; |
2699 | 2691 | ||
2700 | if (pass == XLOG_RECOVER_PASS1) { | 2692 | if (pass == XLOG_RECOVER_PASS1) { |
2701 | return; | 2693 | return; |
2702 | } | 2694 | } |
2703 | 2695 | ||
2704 | efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; | 2696 | efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; |
2705 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + | 2697 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + |
2706 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || | 2698 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || |
2707 | (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + | 2699 | (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + |
2708 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); | 2700 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); |
2709 | efi_id = efd_formatp->efd_efi_id; | 2701 | efi_id = efd_formatp->efd_efi_id; |
2710 | 2702 | ||
2711 | /* | 2703 | /* |
2712 | * Search for the efi with the id in the efd format structure | 2704 | * Search for the efi with the id in the efd format structure |
2713 | * in the AIL. | 2705 | * in the AIL. |
2714 | */ | 2706 | */ |
2715 | spin_lock(&ailp->xa_lock); | 2707 | spin_lock(&ailp->xa_lock); |
2716 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); | 2708 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
2717 | while (lip != NULL) { | 2709 | while (lip != NULL) { |
2718 | if (lip->li_type == XFS_LI_EFI) { | 2710 | if (lip->li_type == XFS_LI_EFI) { |
2719 | efip = (xfs_efi_log_item_t *)lip; | 2711 | efip = (xfs_efi_log_item_t *)lip; |
2720 | if (efip->efi_format.efi_id == efi_id) { | 2712 | if (efip->efi_format.efi_id == efi_id) { |
2721 | /* | 2713 | /* |
2722 | * xfs_trans_ail_delete() drops the | 2714 | * xfs_trans_ail_delete() drops the |
2723 | * AIL lock. | 2715 | * AIL lock. |
2724 | */ | 2716 | */ |
2725 | xfs_trans_ail_delete(ailp, lip); | 2717 | xfs_trans_ail_delete(ailp, lip); |
2726 | xfs_efi_item_free(efip); | 2718 | xfs_efi_item_free(efip); |
2727 | spin_lock(&ailp->xa_lock); | 2719 | spin_lock(&ailp->xa_lock); |
2728 | break; | 2720 | break; |
2729 | } | 2721 | } |
2730 | } | 2722 | } |
2731 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 2723 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
2732 | } | 2724 | } |
2733 | xfs_trans_ail_cursor_done(ailp, &cur); | 2725 | xfs_trans_ail_cursor_done(ailp, &cur); |
2734 | spin_unlock(&ailp->xa_lock); | 2726 | spin_unlock(&ailp->xa_lock); |
2735 | } | 2727 | } |
2736 | 2728 | ||
2737 | /* | 2729 | /* |
2738 | * Perform the transaction | 2730 | * Perform the transaction |
2739 | * | 2731 | * |
2740 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | 2732 | * If the transaction modifies a buffer or inode, do it now. Otherwise, |
2741 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | 2733 | * EFIs and EFDs get queued up by adding entries into the AIL for them. |
2742 | */ | 2734 | */ |
2743 | STATIC int | 2735 | STATIC int |
2744 | xlog_recover_do_trans( | 2736 | xlog_recover_do_trans( |
2745 | xlog_t *log, | 2737 | xlog_t *log, |
2746 | xlog_recover_t *trans, | 2738 | xlog_recover_t *trans, |
2747 | int pass) | 2739 | int pass) |
2748 | { | 2740 | { |
2749 | int error = 0; | 2741 | int error = 0; |
2750 | xlog_recover_item_t *item, *first_item; | 2742 | xlog_recover_item_t *item, *first_item; |
2751 | 2743 | ||
2752 | if ((error = xlog_recover_reorder_trans(trans))) | 2744 | if ((error = xlog_recover_reorder_trans(trans))) |
2753 | return error; | 2745 | return error; |
2754 | first_item = item = trans->r_itemq; | 2746 | first_item = item = trans->r_itemq; |
2755 | do { | 2747 | do { |
2756 | /* | 2748 | /* |
2757 | * we don't need to worry about the block number being | 2749 | * we don't need to worry about the block number being |
2758 | * truncated in > 1 TB buffers because in user-land, | 2750 | * truncated in > 1 TB buffers because in user-land, |
2759 | * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so | 2751 | * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so |
2760 | * the blknos will get through the user-mode buffer | 2752 | * the blknos will get through the user-mode buffer |
2761 | * cache properly. The only bad case is o32 kernels | 2753 | * cache properly. The only bad case is o32 kernels |
2762 | * where xfs_daddr_t is 32-bits but mount will warn us | 2754 | * where xfs_daddr_t is 32-bits but mount will warn us |
2763 | * off a > 1 TB filesystem before we get here. | 2755 | * off a > 1 TB filesystem before we get here. |
2764 | */ | 2756 | */ |
2765 | if ((ITEM_TYPE(item) == XFS_LI_BUF)) { | 2757 | if ((ITEM_TYPE(item) == XFS_LI_BUF)) { |
2766 | if ((error = xlog_recover_do_buffer_trans(log, item, | 2758 | if ((error = xlog_recover_do_buffer_trans(log, item, |
2767 | pass))) | 2759 | pass))) |
2768 | break; | 2760 | break; |
2769 | } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) { | 2761 | } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) { |
2770 | if ((error = xlog_recover_do_inode_trans(log, item, | 2762 | if ((error = xlog_recover_do_inode_trans(log, item, |
2771 | pass))) | 2763 | pass))) |
2772 | break; | 2764 | break; |
2773 | } else if (ITEM_TYPE(item) == XFS_LI_EFI) { | 2765 | } else if (ITEM_TYPE(item) == XFS_LI_EFI) { |
2774 | if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn, | 2766 | if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn, |
2775 | pass))) | 2767 | pass))) |
2776 | break; | 2768 | break; |
2777 | } else if (ITEM_TYPE(item) == XFS_LI_EFD) { | 2769 | } else if (ITEM_TYPE(item) == XFS_LI_EFD) { |
2778 | xlog_recover_do_efd_trans(log, item, pass); | 2770 | xlog_recover_do_efd_trans(log, item, pass); |
2779 | } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { | 2771 | } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { |
2780 | if ((error = xlog_recover_do_dquot_trans(log, item, | 2772 | if ((error = xlog_recover_do_dquot_trans(log, item, |
2781 | pass))) | 2773 | pass))) |
2782 | break; | 2774 | break; |
2783 | } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) { | 2775 | } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) { |
2784 | if ((error = xlog_recover_do_quotaoff_trans(log, item, | 2776 | if ((error = xlog_recover_do_quotaoff_trans(log, item, |
2785 | pass))) | 2777 | pass))) |
2786 | break; | 2778 | break; |
2787 | } else { | 2779 | } else { |
2788 | xlog_warn("XFS: xlog_recover_do_trans"); | 2780 | xlog_warn("XFS: xlog_recover_do_trans"); |
2789 | ASSERT(0); | 2781 | ASSERT(0); |
2790 | error = XFS_ERROR(EIO); | 2782 | error = XFS_ERROR(EIO); |
2791 | break; | 2783 | break; |
2792 | } | 2784 | } |
2793 | item = item->ri_next; | 2785 | item = item->ri_next; |
2794 | } while (first_item != item); | 2786 | } while (first_item != item); |
2795 | 2787 | ||
2796 | return error; | 2788 | return error; |
2797 | } | 2789 | } |
2798 | 2790 | ||
2799 | /* | 2791 | /* |
2800 | * Free up any resources allocated by the transaction | 2792 | * Free up any resources allocated by the transaction |
2801 | * | 2793 | * |
2802 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | 2794 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. |
2803 | */ | 2795 | */ |
2804 | STATIC void | 2796 | STATIC void |
2805 | xlog_recover_free_trans( | 2797 | xlog_recover_free_trans( |
2806 | xlog_recover_t *trans) | 2798 | xlog_recover_t *trans) |
2807 | { | 2799 | { |
2808 | xlog_recover_item_t *first_item, *item, *free_item; | 2800 | xlog_recover_item_t *first_item, *item, *free_item; |
2809 | int i; | 2801 | int i; |
2810 | 2802 | ||
2811 | item = first_item = trans->r_itemq; | 2803 | item = first_item = trans->r_itemq; |
2812 | do { | 2804 | do { |
2813 | free_item = item; | 2805 | free_item = item; |
2814 | item = item->ri_next; | 2806 | item = item->ri_next; |
2815 | /* Free the regions in the item. */ | 2807 | /* Free the regions in the item. */ |
2816 | for (i = 0; i < free_item->ri_cnt; i++) { | 2808 | for (i = 0; i < free_item->ri_cnt; i++) { |
2817 | kmem_free(free_item->ri_buf[i].i_addr); | 2809 | kmem_free(free_item->ri_buf[i].i_addr); |
2818 | } | 2810 | } |
2819 | /* Free the item itself */ | 2811 | /* Free the item itself */ |
2820 | kmem_free(free_item->ri_buf); | 2812 | kmem_free(free_item->ri_buf); |
2821 | kmem_free(free_item); | 2813 | kmem_free(free_item); |
2822 | } while (first_item != item); | 2814 | } while (first_item != item); |
2823 | /* Free the transaction recover structure */ | 2815 | /* Free the transaction recover structure */ |
2824 | kmem_free(trans); | 2816 | kmem_free(trans); |
2825 | } | 2817 | } |
2826 | 2818 | ||
2827 | STATIC int | 2819 | STATIC int |
2828 | xlog_recover_commit_trans( | 2820 | xlog_recover_commit_trans( |
2829 | xlog_t *log, | 2821 | xlog_t *log, |
2830 | xlog_recover_t **q, | 2822 | xlog_recover_t **q, |
2831 | xlog_recover_t *trans, | 2823 | xlog_recover_t *trans, |
2832 | int pass) | 2824 | int pass) |
2833 | { | 2825 | { |
2834 | int error; | 2826 | int error; |
2835 | 2827 | ||
2836 | if ((error = xlog_recover_unlink_tid(q, trans))) | 2828 | if ((error = xlog_recover_unlink_tid(q, trans))) |
2837 | return error; | 2829 | return error; |
2838 | if ((error = xlog_recover_do_trans(log, trans, pass))) | 2830 | if ((error = xlog_recover_do_trans(log, trans, pass))) |
2839 | return error; | 2831 | return error; |
2840 | xlog_recover_free_trans(trans); /* no error */ | 2832 | xlog_recover_free_trans(trans); /* no error */ |
2841 | return 0; | 2833 | return 0; |
2842 | } | 2834 | } |
2843 | 2835 | ||
2844 | STATIC int | 2836 | STATIC int |
2845 | xlog_recover_unmount_trans( | 2837 | xlog_recover_unmount_trans( |
2846 | xlog_recover_t *trans) | 2838 | xlog_recover_t *trans) |
2847 | { | 2839 | { |
2848 | /* Do nothing now */ | 2840 | /* Do nothing now */ |
2849 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); | 2841 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); |
2850 | return 0; | 2842 | return 0; |
2851 | } | 2843 | } |
2852 | 2844 | ||
2853 | /* | 2845 | /* |
2854 | * There are two valid states of the r_state field. 0 indicates that the | 2846 | * There are two valid states of the r_state field. 0 indicates that the |
2855 | * transaction structure is in a normal state. We have either seen the | 2847 | * transaction structure is in a normal state. We have either seen the |
2856 | * start of the transaction or the last operation we added was not a partial | 2848 | * start of the transaction or the last operation we added was not a partial |
2857 | * operation. If the last operation we added to the transaction was a | 2849 | * operation. If the last operation we added to the transaction was a |
2858 | * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. | 2850 | * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. |
2859 | * | 2851 | * |
2860 | * NOTE: skip LRs with 0 data length. | 2852 | * NOTE: skip LRs with 0 data length. |
2861 | */ | 2853 | */ |
2862 | STATIC int | 2854 | STATIC int |
2863 | xlog_recover_process_data( | 2855 | xlog_recover_process_data( |
2864 | xlog_t *log, | 2856 | xlog_t *log, |
2865 | xlog_recover_t *rhash[], | 2857 | xlog_recover_t *rhash[], |
2866 | xlog_rec_header_t *rhead, | 2858 | xlog_rec_header_t *rhead, |
2867 | xfs_caddr_t dp, | 2859 | xfs_caddr_t dp, |
2868 | int pass) | 2860 | int pass) |
2869 | { | 2861 | { |
2870 | xfs_caddr_t lp; | 2862 | xfs_caddr_t lp; |
2871 | int num_logops; | 2863 | int num_logops; |
2872 | xlog_op_header_t *ohead; | 2864 | xlog_op_header_t *ohead; |
2873 | xlog_recover_t *trans; | 2865 | xlog_recover_t *trans; |
2874 | xlog_tid_t tid; | 2866 | xlog_tid_t tid; |
2875 | int error; | 2867 | int error; |
2876 | unsigned long hash; | 2868 | unsigned long hash; |
2877 | uint flags; | 2869 | uint flags; |
2878 | 2870 | ||
2879 | lp = dp + be32_to_cpu(rhead->h_len); | 2871 | lp = dp + be32_to_cpu(rhead->h_len); |
2880 | num_logops = be32_to_cpu(rhead->h_num_logops); | 2872 | num_logops = be32_to_cpu(rhead->h_num_logops); |
2881 | 2873 | ||
2882 | /* check the log format matches our own - else we can't recover */ | 2874 | /* check the log format matches our own - else we can't recover */ |
2883 | if (xlog_header_check_recover(log->l_mp, rhead)) | 2875 | if (xlog_header_check_recover(log->l_mp, rhead)) |
2884 | return (XFS_ERROR(EIO)); | 2876 | return (XFS_ERROR(EIO)); |
2885 | 2877 | ||
2886 | while ((dp < lp) && num_logops) { | 2878 | while ((dp < lp) && num_logops) { |
2887 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); | 2879 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); |
2888 | ohead = (xlog_op_header_t *)dp; | 2880 | ohead = (xlog_op_header_t *)dp; |
2889 | dp += sizeof(xlog_op_header_t); | 2881 | dp += sizeof(xlog_op_header_t); |
2890 | if (ohead->oh_clientid != XFS_TRANSACTION && | 2882 | if (ohead->oh_clientid != XFS_TRANSACTION && |
2891 | ohead->oh_clientid != XFS_LOG) { | 2883 | ohead->oh_clientid != XFS_LOG) { |
2892 | xlog_warn( | 2884 | xlog_warn( |
2893 | "XFS: xlog_recover_process_data: bad clientid"); | 2885 | "XFS: xlog_recover_process_data: bad clientid"); |
2894 | ASSERT(0); | 2886 | ASSERT(0); |
2895 | return (XFS_ERROR(EIO)); | 2887 | return (XFS_ERROR(EIO)); |
2896 | } | 2888 | } |
2897 | tid = be32_to_cpu(ohead->oh_tid); | 2889 | tid = be32_to_cpu(ohead->oh_tid); |
2898 | hash = XLOG_RHASH(tid); | 2890 | hash = XLOG_RHASH(tid); |
2899 | trans = xlog_recover_find_tid(rhash[hash], tid); | 2891 | trans = xlog_recover_find_tid(rhash[hash], tid); |
2900 | if (trans == NULL) { /* not found; add new tid */ | 2892 | if (trans == NULL) { /* not found; add new tid */ |
2901 | if (ohead->oh_flags & XLOG_START_TRANS) | 2893 | if (ohead->oh_flags & XLOG_START_TRANS) |
2902 | xlog_recover_new_tid(&rhash[hash], tid, | 2894 | xlog_recover_new_tid(&rhash[hash], tid, |
2903 | be64_to_cpu(rhead->h_lsn)); | 2895 | be64_to_cpu(rhead->h_lsn)); |
2904 | } else { | 2896 | } else { |
2905 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { | 2897 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { |
2906 | xlog_warn( | 2898 | xlog_warn( |
2907 | "XFS: xlog_recover_process_data: bad length"); | 2899 | "XFS: xlog_recover_process_data: bad length"); |
2908 | WARN_ON(1); | 2900 | WARN_ON(1); |
2909 | return (XFS_ERROR(EIO)); | 2901 | return (XFS_ERROR(EIO)); |
2910 | } | 2902 | } |
2911 | flags = ohead->oh_flags & ~XLOG_END_TRANS; | 2903 | flags = ohead->oh_flags & ~XLOG_END_TRANS; |
2912 | if (flags & XLOG_WAS_CONT_TRANS) | 2904 | if (flags & XLOG_WAS_CONT_TRANS) |
2913 | flags &= ~XLOG_CONTINUE_TRANS; | 2905 | flags &= ~XLOG_CONTINUE_TRANS; |
2914 | switch (flags) { | 2906 | switch (flags) { |
2915 | case XLOG_COMMIT_TRANS: | 2907 | case XLOG_COMMIT_TRANS: |
2916 | error = xlog_recover_commit_trans(log, | 2908 | error = xlog_recover_commit_trans(log, |
2917 | &rhash[hash], trans, pass); | 2909 | &rhash[hash], trans, pass); |
2918 | break; | 2910 | break; |
2919 | case XLOG_UNMOUNT_TRANS: | 2911 | case XLOG_UNMOUNT_TRANS: |
2920 | error = xlog_recover_unmount_trans(trans); | 2912 | error = xlog_recover_unmount_trans(trans); |
2921 | break; | 2913 | break; |
2922 | case XLOG_WAS_CONT_TRANS: | 2914 | case XLOG_WAS_CONT_TRANS: |
2923 | error = xlog_recover_add_to_cont_trans(trans, | 2915 | error = xlog_recover_add_to_cont_trans(trans, |
2924 | dp, be32_to_cpu(ohead->oh_len)); | 2916 | dp, be32_to_cpu(ohead->oh_len)); |
2925 | break; | 2917 | break; |
2926 | case XLOG_START_TRANS: | 2918 | case XLOG_START_TRANS: |
2927 | xlog_warn( | 2919 | xlog_warn( |
2928 | "XFS: xlog_recover_process_data: bad transaction"); | 2920 | "XFS: xlog_recover_process_data: bad transaction"); |
2929 | ASSERT(0); | 2921 | ASSERT(0); |
2930 | error = XFS_ERROR(EIO); | 2922 | error = XFS_ERROR(EIO); |
2931 | break; | 2923 | break; |
2932 | case 0: | 2924 | case 0: |
2933 | case XLOG_CONTINUE_TRANS: | 2925 | case XLOG_CONTINUE_TRANS: |
2934 | error = xlog_recover_add_to_trans(trans, | 2926 | error = xlog_recover_add_to_trans(trans, |
2935 | dp, be32_to_cpu(ohead->oh_len)); | 2927 | dp, be32_to_cpu(ohead->oh_len)); |
2936 | break; | 2928 | break; |
2937 | default: | 2929 | default: |
2938 | xlog_warn( | 2930 | xlog_warn( |
2939 | "XFS: xlog_recover_process_data: bad flag"); | 2931 | "XFS: xlog_recover_process_data: bad flag"); |
2940 | ASSERT(0); | 2932 | ASSERT(0); |
2941 | error = XFS_ERROR(EIO); | 2933 | error = XFS_ERROR(EIO); |
2942 | break; | 2934 | break; |
2943 | } | 2935 | } |
2944 | if (error) | 2936 | if (error) |
2945 | return error; | 2937 | return error; |
2946 | } | 2938 | } |
2947 | dp += be32_to_cpu(ohead->oh_len); | 2939 | dp += be32_to_cpu(ohead->oh_len); |
2948 | num_logops--; | 2940 | num_logops--; |
2949 | } | 2941 | } |
2950 | return 0; | 2942 | return 0; |
2951 | } | 2943 | } |
2952 | 2944 | ||
2953 | /* | 2945 | /* |
2954 | * Process an extent free intent item that was recovered from | 2946 | * Process an extent free intent item that was recovered from |
2955 | * the log. We need to free the extents that it describes. | 2947 | * the log. We need to free the extents that it describes. |
2956 | */ | 2948 | */ |
2957 | STATIC int | 2949 | STATIC int |
2958 | xlog_recover_process_efi( | 2950 | xlog_recover_process_efi( |
2959 | xfs_mount_t *mp, | 2951 | xfs_mount_t *mp, |
2960 | xfs_efi_log_item_t *efip) | 2952 | xfs_efi_log_item_t *efip) |
2961 | { | 2953 | { |
2962 | xfs_efd_log_item_t *efdp; | 2954 | xfs_efd_log_item_t *efdp; |
2963 | xfs_trans_t *tp; | 2955 | xfs_trans_t *tp; |
2964 | int i; | 2956 | int i; |
2965 | int error = 0; | 2957 | int error = 0; |
2966 | xfs_extent_t *extp; | 2958 | xfs_extent_t *extp; |
2967 | xfs_fsblock_t startblock_fsb; | 2959 | xfs_fsblock_t startblock_fsb; |
2968 | 2960 | ||
2969 | ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); | 2961 | ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); |
2970 | 2962 | ||
2971 | /* | 2963 | /* |
2972 | * First check the validity of the extents described by the | 2964 | * First check the validity of the extents described by the |
2973 | * EFI. If any are bad, then assume that all are bad and | 2965 | * EFI. If any are bad, then assume that all are bad and |
2974 | * just toss the EFI. | 2966 | * just toss the EFI. |
2975 | */ | 2967 | */ |
2976 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { | 2968 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { |
2977 | extp = &(efip->efi_format.efi_extents[i]); | 2969 | extp = &(efip->efi_format.efi_extents[i]); |
2978 | startblock_fsb = XFS_BB_TO_FSB(mp, | 2970 | startblock_fsb = XFS_BB_TO_FSB(mp, |
2979 | XFS_FSB_TO_DADDR(mp, extp->ext_start)); | 2971 | XFS_FSB_TO_DADDR(mp, extp->ext_start)); |
2980 | if ((startblock_fsb == 0) || | 2972 | if ((startblock_fsb == 0) || |
2981 | (extp->ext_len == 0) || | 2973 | (extp->ext_len == 0) || |
2982 | (startblock_fsb >= mp->m_sb.sb_dblocks) || | 2974 | (startblock_fsb >= mp->m_sb.sb_dblocks) || |
2983 | (extp->ext_len >= mp->m_sb.sb_agblocks)) { | 2975 | (extp->ext_len >= mp->m_sb.sb_agblocks)) { |
2984 | /* | 2976 | /* |
2985 | * This will pull the EFI from the AIL and | 2977 | * This will pull the EFI from the AIL and |
2986 | * free the memory associated with it. | 2978 | * free the memory associated with it. |
2987 | */ | 2979 | */ |
2988 | xfs_efi_release(efip, efip->efi_format.efi_nextents); | 2980 | xfs_efi_release(efip, efip->efi_format.efi_nextents); |
2989 | return XFS_ERROR(EIO); | 2981 | return XFS_ERROR(EIO); |
2990 | } | 2982 | } |
2991 | } | 2983 | } |
2992 | 2984 | ||
2993 | tp = xfs_trans_alloc(mp, 0); | 2985 | tp = xfs_trans_alloc(mp, 0); |
2994 | error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); | 2986 | error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); |
2995 | if (error) | 2987 | if (error) |
2996 | goto abort_error; | 2988 | goto abort_error; |
2997 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); | 2989 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); |
2998 | 2990 | ||
2999 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { | 2991 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { |
3000 | extp = &(efip->efi_format.efi_extents[i]); | 2992 | extp = &(efip->efi_format.efi_extents[i]); |
3001 | error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); | 2993 | error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); |
3002 | if (error) | 2994 | if (error) |
3003 | goto abort_error; | 2995 | goto abort_error; |
3004 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, | 2996 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, |
3005 | extp->ext_len); | 2997 | extp->ext_len); |
3006 | } | 2998 | } |
3007 | 2999 | ||
3008 | efip->efi_flags |= XFS_EFI_RECOVERED; | 3000 | efip->efi_flags |= XFS_EFI_RECOVERED; |
3009 | error = xfs_trans_commit(tp, 0); | 3001 | error = xfs_trans_commit(tp, 0); |
3010 | return error; | 3002 | return error; |
3011 | 3003 | ||
3012 | abort_error: | 3004 | abort_error: |
3013 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3005 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); |
3014 | return error; | 3006 | return error; |
3015 | } | 3007 | } |
3016 | 3008 | ||
3017 | /* | 3009 | /* |
3018 | * When this is called, all of the EFIs which did not have | 3010 | * When this is called, all of the EFIs which did not have |
3019 | * corresponding EFDs should be in the AIL. What we do now | 3011 | * corresponding EFDs should be in the AIL. What we do now |
3020 | * is free the extents associated with each one. | 3012 | * is free the extents associated with each one. |
3021 | * | 3013 | * |
3022 | * Since we process the EFIs in normal transactions, they | 3014 | * Since we process the EFIs in normal transactions, they |
3023 | * will be removed at some point after the commit. This prevents | 3015 | * will be removed at some point after the commit. This prevents |
3024 | * us from just walking down the list processing each one. | 3016 | * us from just walking down the list processing each one. |
3025 | * We'll use a flag in the EFI to skip those that we've already | 3017 | * We'll use a flag in the EFI to skip those that we've already |
3026 | * processed and use the AIL iteration mechanism's generation | 3018 | * processed and use the AIL iteration mechanism's generation |
3027 | * count to try to speed this up at least a bit. | 3019 | * count to try to speed this up at least a bit. |
3028 | * | 3020 | * |
3029 | * When we start, we know that the EFIs are the only things in | 3021 | * When we start, we know that the EFIs are the only things in |
3030 | * the AIL. As we process them, however, other items are added | 3022 | * the AIL. As we process them, however, other items are added |
3031 | * to the AIL. Since everything added to the AIL must come after | 3023 | * to the AIL. Since everything added to the AIL must come after |
3032 | * everything already in the AIL, we stop processing as soon as | 3024 | * everything already in the AIL, we stop processing as soon as |
3033 | * we see something other than an EFI in the AIL. | 3025 | * we see something other than an EFI in the AIL. |
3034 | */ | 3026 | */ |
3035 | STATIC int | 3027 | STATIC int |
3036 | xlog_recover_process_efis( | 3028 | xlog_recover_process_efis( |
3037 | xlog_t *log) | 3029 | xlog_t *log) |
3038 | { | 3030 | { |
3039 | xfs_log_item_t *lip; | 3031 | xfs_log_item_t *lip; |
3040 | xfs_efi_log_item_t *efip; | 3032 | xfs_efi_log_item_t *efip; |
3041 | int error = 0; | 3033 | int error = 0; |
3042 | struct xfs_ail_cursor cur; | 3034 | struct xfs_ail_cursor cur; |
3043 | struct xfs_ail *ailp; | 3035 | struct xfs_ail *ailp; |
3044 | 3036 | ||
3045 | ailp = log->l_ailp; | 3037 | ailp = log->l_ailp; |
3046 | spin_lock(&ailp->xa_lock); | 3038 | spin_lock(&ailp->xa_lock); |
3047 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); | 3039 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
3048 | while (lip != NULL) { | 3040 | while (lip != NULL) { |
3049 | /* | 3041 | /* |
3050 | * We're done when we see something other than an EFI. | 3042 | * We're done when we see something other than an EFI. |
3051 | * There should be no EFIs left in the AIL now. | 3043 | * There should be no EFIs left in the AIL now. |
3052 | */ | 3044 | */ |
3053 | if (lip->li_type != XFS_LI_EFI) { | 3045 | if (lip->li_type != XFS_LI_EFI) { |
3054 | #ifdef DEBUG | 3046 | #ifdef DEBUG |
3055 | for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) | 3047 | for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) |
3056 | ASSERT(lip->li_type != XFS_LI_EFI); | 3048 | ASSERT(lip->li_type != XFS_LI_EFI); |
3057 | #endif | 3049 | #endif |
3058 | break; | 3050 | break; |
3059 | } | 3051 | } |
3060 | 3052 | ||
3061 | /* | 3053 | /* |
3062 | * Skip EFIs that we've already processed. | 3054 | * Skip EFIs that we've already processed. |
3063 | */ | 3055 | */ |
3064 | efip = (xfs_efi_log_item_t *)lip; | 3056 | efip = (xfs_efi_log_item_t *)lip; |
3065 | if (efip->efi_flags & XFS_EFI_RECOVERED) { | 3057 | if (efip->efi_flags & XFS_EFI_RECOVERED) { |
3066 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 3058 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
3067 | continue; | 3059 | continue; |
3068 | } | 3060 | } |
3069 | 3061 | ||
3070 | spin_unlock(&ailp->xa_lock); | 3062 | spin_unlock(&ailp->xa_lock); |
3071 | error = xlog_recover_process_efi(log->l_mp, efip); | 3063 | error = xlog_recover_process_efi(log->l_mp, efip); |
3072 | spin_lock(&ailp->xa_lock); | 3064 | spin_lock(&ailp->xa_lock); |
3073 | if (error) | 3065 | if (error) |
3074 | goto out; | 3066 | goto out; |
3075 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 3067 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
3076 | } | 3068 | } |
3077 | out: | 3069 | out: |
3078 | xfs_trans_ail_cursor_done(ailp, &cur); | 3070 | xfs_trans_ail_cursor_done(ailp, &cur); |
3079 | spin_unlock(&ailp->xa_lock); | 3071 | spin_unlock(&ailp->xa_lock); |
3080 | return error; | 3072 | return error; |
3081 | } | 3073 | } |
3082 | 3074 | ||
3083 | /* | 3075 | /* |
3084 | * This routine performs a transaction to null out a bad inode pointer | 3076 | * This routine performs a transaction to null out a bad inode pointer |
3085 | * in an agi unlinked inode hash bucket. | 3077 | * in an agi unlinked inode hash bucket. |
3086 | */ | 3078 | */ |
3087 | STATIC void | 3079 | STATIC void |
3088 | xlog_recover_clear_agi_bucket( | 3080 | xlog_recover_clear_agi_bucket( |
3089 | xfs_mount_t *mp, | 3081 | xfs_mount_t *mp, |
3090 | xfs_agnumber_t agno, | 3082 | xfs_agnumber_t agno, |
3091 | int bucket) | 3083 | int bucket) |
3092 | { | 3084 | { |
3093 | xfs_trans_t *tp; | 3085 | xfs_trans_t *tp; |
3094 | xfs_agi_t *agi; | 3086 | xfs_agi_t *agi; |
3095 | xfs_buf_t *agibp; | 3087 | xfs_buf_t *agibp; |
3096 | int offset; | 3088 | int offset; |
3097 | int error; | 3089 | int error; |
3098 | 3090 | ||
3099 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); | 3091 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); |
3100 | error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), | 3092 | error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), |
3101 | 0, 0, 0); | 3093 | 0, 0, 0); |
3102 | if (error) | 3094 | if (error) |
3103 | goto out_abort; | 3095 | goto out_abort; |
3104 | 3096 | ||
3105 | error = xfs_read_agi(mp, tp, agno, &agibp); | 3097 | error = xfs_read_agi(mp, tp, agno, &agibp); |
3106 | if (error) | 3098 | if (error) |
3107 | goto out_abort; | 3099 | goto out_abort; |
3108 | 3100 | ||
3109 | agi = XFS_BUF_TO_AGI(agibp); | 3101 | agi = XFS_BUF_TO_AGI(agibp); |
3110 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); | 3102 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
3111 | offset = offsetof(xfs_agi_t, agi_unlinked) + | 3103 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
3112 | (sizeof(xfs_agino_t) * bucket); | 3104 | (sizeof(xfs_agino_t) * bucket); |
3113 | xfs_trans_log_buf(tp, agibp, offset, | 3105 | xfs_trans_log_buf(tp, agibp, offset, |
3114 | (offset + sizeof(xfs_agino_t) - 1)); | 3106 | (offset + sizeof(xfs_agino_t) - 1)); |
3115 | 3107 | ||
3116 | error = xfs_trans_commit(tp, 0); | 3108 | error = xfs_trans_commit(tp, 0); |
3117 | if (error) | 3109 | if (error) |
3118 | goto out_error; | 3110 | goto out_error; |
3119 | return; | 3111 | return; |
3120 | 3112 | ||
3121 | out_abort: | 3113 | out_abort: |
3122 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3114 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); |
3123 | out_error: | 3115 | out_error: |
3124 | xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " | 3116 | xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " |
3125 | "failed to clear agi %d. Continuing.", agno); | 3117 | "failed to clear agi %d. Continuing.", agno); |
3126 | return; | 3118 | return; |
3127 | } | 3119 | } |
3128 | 3120 | ||
3129 | STATIC xfs_agino_t | 3121 | STATIC xfs_agino_t |
3130 | xlog_recover_process_one_iunlink( | 3122 | xlog_recover_process_one_iunlink( |
3131 | struct xfs_mount *mp, | 3123 | struct xfs_mount *mp, |
3132 | xfs_agnumber_t agno, | 3124 | xfs_agnumber_t agno, |
3133 | xfs_agino_t agino, | 3125 | xfs_agino_t agino, |
3134 | int bucket) | 3126 | int bucket) |
3135 | { | 3127 | { |
3136 | struct xfs_buf *ibp; | 3128 | struct xfs_buf *ibp; |
3137 | struct xfs_dinode *dip; | 3129 | struct xfs_dinode *dip; |
3138 | struct xfs_inode *ip; | 3130 | struct xfs_inode *ip; |
3139 | xfs_ino_t ino; | 3131 | xfs_ino_t ino; |
3140 | int error; | 3132 | int error; |
3141 | 3133 | ||
3142 | ino = XFS_AGINO_TO_INO(mp, agno, agino); | 3134 | ino = XFS_AGINO_TO_INO(mp, agno, agino); |
3143 | error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); | 3135 | error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); |
3144 | if (error) | 3136 | if (error) |
3145 | goto fail; | 3137 | goto fail; |
3146 | 3138 | ||
3147 | /* | 3139 | /* |
3148 | * Get the on disk inode to find the next inode in the bucket. | 3140 | * Get the on disk inode to find the next inode in the bucket. |
3149 | */ | 3141 | */ |
3150 | error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); | 3142 | error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); |
3151 | if (error) | 3143 | if (error) |
3152 | goto fail_iput; | 3144 | goto fail_iput; |
3153 | 3145 | ||
3154 | ASSERT(ip->i_d.di_nlink == 0); | 3146 | ASSERT(ip->i_d.di_nlink == 0); |
3155 | ASSERT(ip->i_d.di_mode != 0); | 3147 | ASSERT(ip->i_d.di_mode != 0); |
3156 | 3148 | ||
3157 | /* setup for the next pass */ | 3149 | /* setup for the next pass */ |
3158 | agino = be32_to_cpu(dip->di_next_unlinked); | 3150 | agino = be32_to_cpu(dip->di_next_unlinked); |
3159 | xfs_buf_relse(ibp); | 3151 | xfs_buf_relse(ibp); |
3160 | 3152 | ||
3161 | /* | 3153 | /* |
3162 | * Prevent any DMAPI event from being sent when the reference on | 3154 | * Prevent any DMAPI event from being sent when the reference on |
3163 | * the inode is dropped. | 3155 | * the inode is dropped. |
3164 | */ | 3156 | */ |
3165 | ip->i_d.di_dmevmask = 0; | 3157 | ip->i_d.di_dmevmask = 0; |
3166 | 3158 | ||
3167 | IRELE(ip); | 3159 | IRELE(ip); |
3168 | return agino; | 3160 | return agino; |
3169 | 3161 | ||
3170 | fail_iput: | 3162 | fail_iput: |
3171 | IRELE(ip); | 3163 | IRELE(ip); |
3172 | fail: | 3164 | fail: |
3173 | /* | 3165 | /* |
3174 | * We can't read in the inode this bucket points to, or this inode | 3166 | * We can't read in the inode this bucket points to, or this inode |
3175 | * is messed up. Just ditch this bucket of inodes. We will lose | 3167 | * is messed up. Just ditch this bucket of inodes. We will lose |
3176 | * some inodes and space, but at least we won't hang. | 3168 | * some inodes and space, but at least we won't hang. |
3177 | * | 3169 | * |
3178 | * Call xlog_recover_clear_agi_bucket() to perform a transaction to | 3170 | * Call xlog_recover_clear_agi_bucket() to perform a transaction to |
3179 | * clear the inode pointer in the bucket. | 3171 | * clear the inode pointer in the bucket. |
3180 | */ | 3172 | */ |
3181 | xlog_recover_clear_agi_bucket(mp, agno, bucket); | 3173 | xlog_recover_clear_agi_bucket(mp, agno, bucket); |
3182 | return NULLAGINO; | 3174 | return NULLAGINO; |
3183 | } | 3175 | } |
3184 | 3176 | ||
3185 | /* | 3177 | /* |
3186 | * xlog_iunlink_recover | 3178 | * xlog_iunlink_recover |
3187 | * | 3179 | * |
3188 | * This is called during recovery to process any inodes which | 3180 | * This is called during recovery to process any inodes which |
3189 | * we unlinked but not freed when the system crashed. These | 3181 | * we unlinked but not freed when the system crashed. These |
3190 | * inodes will be on the lists in the AGI blocks. What we do | 3182 | * inodes will be on the lists in the AGI blocks. What we do |
3191 | * here is scan all the AGIs and fully truncate and free any | 3183 | * here is scan all the AGIs and fully truncate and free any |
3192 | * inodes found on the lists. Each inode is removed from the | 3184 | * inodes found on the lists. Each inode is removed from the |
3193 | * lists when it has been fully truncated and is freed. The | 3185 | * lists when it has been fully truncated and is freed. The |
3194 | * freeing of the inode and its removal from the list must be | 3186 | * freeing of the inode and its removal from the list must be |
3195 | * atomic. | 3187 | * atomic. |
3196 | */ | 3188 | */ |
3197 | void | 3189 | void |
3198 | xlog_recover_process_iunlinks( | 3190 | xlog_recover_process_iunlinks( |
3199 | xlog_t *log) | 3191 | xlog_t *log) |
3200 | { | 3192 | { |
3201 | xfs_mount_t *mp; | 3193 | xfs_mount_t *mp; |
3202 | xfs_agnumber_t agno; | 3194 | xfs_agnumber_t agno; |
3203 | xfs_agi_t *agi; | 3195 | xfs_agi_t *agi; |
3204 | xfs_buf_t *agibp; | 3196 | xfs_buf_t *agibp; |
3205 | xfs_agino_t agino; | 3197 | xfs_agino_t agino; |
3206 | int bucket; | 3198 | int bucket; |
3207 | int error; | 3199 | int error; |
3208 | uint mp_dmevmask; | 3200 | uint mp_dmevmask; |
3209 | 3201 | ||
3210 | mp = log->l_mp; | 3202 | mp = log->l_mp; |
3211 | 3203 | ||
3212 | /* | 3204 | /* |
3213 | * Prevent any DMAPI event from being sent while in this function. | 3205 | * Prevent any DMAPI event from being sent while in this function. |
3214 | */ | 3206 | */ |
3215 | mp_dmevmask = mp->m_dmevmask; | 3207 | mp_dmevmask = mp->m_dmevmask; |
3216 | mp->m_dmevmask = 0; | 3208 | mp->m_dmevmask = 0; |
3217 | 3209 | ||
3218 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 3210 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
3219 | /* | 3211 | /* |
3220 | * Find the agi for this ag. | 3212 | * Find the agi for this ag. |
3221 | */ | 3213 | */ |
3222 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3214 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3223 | if (error) { | 3215 | if (error) { |
3224 | /* | 3216 | /* |
3225 | * AGI is b0rked. Don't process it. | 3217 | * AGI is b0rked. Don't process it. |
3226 | * | 3218 | * |
3227 | * We should probably mark the filesystem as corrupt | 3219 | * We should probably mark the filesystem as corrupt |
3228 | * after we've recovered all the ag's we can.... | 3220 | * after we've recovered all the ag's we can.... |
3229 | */ | 3221 | */ |
3230 | continue; | 3222 | continue; |
3231 | } | 3223 | } |
3232 | agi = XFS_BUF_TO_AGI(agibp); | 3224 | agi = XFS_BUF_TO_AGI(agibp); |
3233 | 3225 | ||
3234 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { | 3226 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { |
3235 | agino = be32_to_cpu(agi->agi_unlinked[bucket]); | 3227 | agino = be32_to_cpu(agi->agi_unlinked[bucket]); |
3236 | while (agino != NULLAGINO) { | 3228 | while (agino != NULLAGINO) { |
3237 | /* | 3229 | /* |
3238 | * Release the agi buffer so that it can | 3230 | * Release the agi buffer so that it can |
3239 | * be acquired in the normal course of the | 3231 | * be acquired in the normal course of the |
3240 | * transaction to truncate and free the inode. | 3232 | * transaction to truncate and free the inode. |
3241 | */ | 3233 | */ |
3242 | xfs_buf_relse(agibp); | 3234 | xfs_buf_relse(agibp); |
3243 | 3235 | ||
3244 | agino = xlog_recover_process_one_iunlink(mp, | 3236 | agino = xlog_recover_process_one_iunlink(mp, |
3245 | agno, agino, bucket); | 3237 | agno, agino, bucket); |
3246 | 3238 | ||
3247 | /* | 3239 | /* |
3248 | * Reacquire the agibuffer and continue around | 3240 | * Reacquire the agibuffer and continue around |
3249 | * the loop. This should never fail as we know | 3241 | * the loop. This should never fail as we know |
3250 | * the buffer was good earlier on. | 3242 | * the buffer was good earlier on. |
3251 | */ | 3243 | */ |
3252 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3244 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3253 | ASSERT(error == 0); | 3245 | ASSERT(error == 0); |
3254 | agi = XFS_BUF_TO_AGI(agibp); | 3246 | agi = XFS_BUF_TO_AGI(agibp); |
3255 | } | 3247 | } |
3256 | } | 3248 | } |
3257 | 3249 | ||
3258 | /* | 3250 | /* |
3259 | * Release the buffer for the current agi so we can | 3251 | * Release the buffer for the current agi so we can |
3260 | * go on to the next one. | 3252 | * go on to the next one. |
3261 | */ | 3253 | */ |
3262 | xfs_buf_relse(agibp); | 3254 | xfs_buf_relse(agibp); |
3263 | } | 3255 | } |
3264 | 3256 | ||
3265 | mp->m_dmevmask = mp_dmevmask; | 3257 | mp->m_dmevmask = mp_dmevmask; |
3266 | } | 3258 | } |
3267 | 3259 | ||
3268 | 3260 | ||
3269 | #ifdef DEBUG | 3261 | #ifdef DEBUG |
3270 | STATIC void | 3262 | STATIC void |
3271 | xlog_pack_data_checksum( | 3263 | xlog_pack_data_checksum( |
3272 | xlog_t *log, | 3264 | xlog_t *log, |
3273 | xlog_in_core_t *iclog, | 3265 | xlog_in_core_t *iclog, |
3274 | int size) | 3266 | int size) |
3275 | { | 3267 | { |
3276 | int i; | 3268 | int i; |
3277 | __be32 *up; | 3269 | __be32 *up; |
3278 | uint chksum = 0; | 3270 | uint chksum = 0; |
3279 | 3271 | ||
3280 | up = (__be32 *)iclog->ic_datap; | 3272 | up = (__be32 *)iclog->ic_datap; |
3281 | /* divide length by 4 to get # words */ | 3273 | /* divide length by 4 to get # words */ |
3282 | for (i = 0; i < (size >> 2); i++) { | 3274 | for (i = 0; i < (size >> 2); i++) { |
3283 | chksum ^= be32_to_cpu(*up); | 3275 | chksum ^= be32_to_cpu(*up); |
3284 | up++; | 3276 | up++; |
3285 | } | 3277 | } |
3286 | iclog->ic_header.h_chksum = cpu_to_be32(chksum); | 3278 | iclog->ic_header.h_chksum = cpu_to_be32(chksum); |
3287 | } | 3279 | } |
3288 | #else | 3280 | #else |
3289 | #define xlog_pack_data_checksum(log, iclog, size) | 3281 | #define xlog_pack_data_checksum(log, iclog, size) |
3290 | #endif | 3282 | #endif |
3291 | 3283 | ||
3292 | /* | 3284 | /* |
3293 | * Stamp cycle number in every block | 3285 | * Stamp cycle number in every block |
3294 | */ | 3286 | */ |
3295 | void | 3287 | void |
3296 | xlog_pack_data( | 3288 | xlog_pack_data( |
3297 | xlog_t *log, | 3289 | xlog_t *log, |
3298 | xlog_in_core_t *iclog, | 3290 | xlog_in_core_t *iclog, |
3299 | int roundoff) | 3291 | int roundoff) |
3300 | { | 3292 | { |
3301 | int i, j, k; | 3293 | int i, j, k; |
3302 | int size = iclog->ic_offset + roundoff; | 3294 | int size = iclog->ic_offset + roundoff; |
3303 | __be32 cycle_lsn; | 3295 | __be32 cycle_lsn; |
3304 | xfs_caddr_t dp; | 3296 | xfs_caddr_t dp; |
3305 | 3297 | ||
3306 | xlog_pack_data_checksum(log, iclog, size); | 3298 | xlog_pack_data_checksum(log, iclog, size); |
3307 | 3299 | ||
3308 | cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); | 3300 | cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); |
3309 | 3301 | ||
3310 | dp = iclog->ic_datap; | 3302 | dp = iclog->ic_datap; |
3311 | for (i = 0; i < BTOBB(size) && | 3303 | for (i = 0; i < BTOBB(size) && |
3312 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { | 3304 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { |
3313 | iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; | 3305 | iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; |
3314 | *(__be32 *)dp = cycle_lsn; | 3306 | *(__be32 *)dp = cycle_lsn; |
3315 | dp += BBSIZE; | 3307 | dp += BBSIZE; |
3316 | } | 3308 | } |
3317 | 3309 | ||
3318 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3310 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3319 | xlog_in_core_2_t *xhdr = iclog->ic_data; | 3311 | xlog_in_core_2_t *xhdr = iclog->ic_data; |
3320 | 3312 | ||
3321 | for ( ; i < BTOBB(size); i++) { | 3313 | for ( ; i < BTOBB(size); i++) { |
3322 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3314 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3323 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3315 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3324 | xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; | 3316 | xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; |
3325 | *(__be32 *)dp = cycle_lsn; | 3317 | *(__be32 *)dp = cycle_lsn; |
3326 | dp += BBSIZE; | 3318 | dp += BBSIZE; |
3327 | } | 3319 | } |
3328 | 3320 | ||
3329 | for (i = 1; i < log->l_iclog_heads; i++) { | 3321 | for (i = 1; i < log->l_iclog_heads; i++) { |
3330 | xhdr[i].hic_xheader.xh_cycle = cycle_lsn; | 3322 | xhdr[i].hic_xheader.xh_cycle = cycle_lsn; |
3331 | } | 3323 | } |
3332 | } | 3324 | } |
3333 | } | 3325 | } |
3334 | 3326 | ||
3335 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) | 3327 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) |
3336 | STATIC void | 3328 | STATIC void |
3337 | xlog_unpack_data_checksum( | 3329 | xlog_unpack_data_checksum( |
3338 | xlog_rec_header_t *rhead, | 3330 | xlog_rec_header_t *rhead, |
3339 | xfs_caddr_t dp, | 3331 | xfs_caddr_t dp, |
3340 | xlog_t *log) | 3332 | xlog_t *log) |
3341 | { | 3333 | { |
3342 | __be32 *up = (__be32 *)dp; | 3334 | __be32 *up = (__be32 *)dp; |
3343 | uint chksum = 0; | 3335 | uint chksum = 0; |
3344 | int i; | 3336 | int i; |
3345 | 3337 | ||
3346 | /* divide length by 4 to get # words */ | 3338 | /* divide length by 4 to get # words */ |
3347 | for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) { | 3339 | for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) { |
3348 | chksum ^= be32_to_cpu(*up); | 3340 | chksum ^= be32_to_cpu(*up); |
3349 | up++; | 3341 | up++; |
3350 | } | 3342 | } |
3351 | if (chksum != be32_to_cpu(rhead->h_chksum)) { | 3343 | if (chksum != be32_to_cpu(rhead->h_chksum)) { |
3352 | if (rhead->h_chksum || | 3344 | if (rhead->h_chksum || |
3353 | ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { | 3345 | ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { |
3354 | cmn_err(CE_DEBUG, | 3346 | cmn_err(CE_DEBUG, |
3355 | "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", | 3347 | "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", |
3356 | be32_to_cpu(rhead->h_chksum), chksum); | 3348 | be32_to_cpu(rhead->h_chksum), chksum); |
3357 | cmn_err(CE_DEBUG, | 3349 | cmn_err(CE_DEBUG, |
3358 | "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); | 3350 | "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); |
3359 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3351 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3360 | cmn_err(CE_DEBUG, | 3352 | cmn_err(CE_DEBUG, |
3361 | "XFS: LogR this is a LogV2 filesystem\n"); | 3353 | "XFS: LogR this is a LogV2 filesystem\n"); |
3362 | } | 3354 | } |
3363 | log->l_flags |= XLOG_CHKSUM_MISMATCH; | 3355 | log->l_flags |= XLOG_CHKSUM_MISMATCH; |
3364 | } | 3356 | } |
3365 | } | 3357 | } |
3366 | } | 3358 | } |
3367 | #else | 3359 | #else |
3368 | #define xlog_unpack_data_checksum(rhead, dp, log) | 3360 | #define xlog_unpack_data_checksum(rhead, dp, log) |
3369 | #endif | 3361 | #endif |
3370 | 3362 | ||
3371 | STATIC void | 3363 | STATIC void |
3372 | xlog_unpack_data( | 3364 | xlog_unpack_data( |
3373 | xlog_rec_header_t *rhead, | 3365 | xlog_rec_header_t *rhead, |
3374 | xfs_caddr_t dp, | 3366 | xfs_caddr_t dp, |
3375 | xlog_t *log) | 3367 | xlog_t *log) |
3376 | { | 3368 | { |
3377 | int i, j, k; | 3369 | int i, j, k; |
3378 | 3370 | ||
3379 | for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && | 3371 | for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && |
3380 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { | 3372 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { |
3381 | *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; | 3373 | *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; |
3382 | dp += BBSIZE; | 3374 | dp += BBSIZE; |
3383 | } | 3375 | } |
3384 | 3376 | ||
3385 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3377 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3386 | xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; | 3378 | xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; |
3387 | for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { | 3379 | for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { |
3388 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3380 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3389 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3381 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3390 | *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; | 3382 | *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; |
3391 | dp += BBSIZE; | 3383 | dp += BBSIZE; |
3392 | } | 3384 | } |
3393 | } | 3385 | } |
3394 | 3386 | ||
3395 | xlog_unpack_data_checksum(rhead, dp, log); | 3387 | xlog_unpack_data_checksum(rhead, dp, log); |
3396 | } | 3388 | } |
3397 | 3389 | ||
3398 | STATIC int | 3390 | STATIC int |
3399 | xlog_valid_rec_header( | 3391 | xlog_valid_rec_header( |
3400 | xlog_t *log, | 3392 | xlog_t *log, |
3401 | xlog_rec_header_t *rhead, | 3393 | xlog_rec_header_t *rhead, |
3402 | xfs_daddr_t blkno) | 3394 | xfs_daddr_t blkno) |
3403 | { | 3395 | { |
3404 | int hlen; | 3396 | int hlen; |
3405 | 3397 | ||
3406 | if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { | 3398 | if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { |
3407 | XFS_ERROR_REPORT("xlog_valid_rec_header(1)", | 3399 | XFS_ERROR_REPORT("xlog_valid_rec_header(1)", |
3408 | XFS_ERRLEVEL_LOW, log->l_mp); | 3400 | XFS_ERRLEVEL_LOW, log->l_mp); |
3409 | return XFS_ERROR(EFSCORRUPTED); | 3401 | return XFS_ERROR(EFSCORRUPTED); |
3410 | } | 3402 | } |
3411 | if (unlikely( | 3403 | if (unlikely( |
3412 | (!rhead->h_version || | 3404 | (!rhead->h_version || |
3413 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { | 3405 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { |
3414 | xlog_warn("XFS: %s: unrecognised log version (%d).", | 3406 | xlog_warn("XFS: %s: unrecognised log version (%d).", |
3415 | __func__, be32_to_cpu(rhead->h_version)); | 3407 | __func__, be32_to_cpu(rhead->h_version)); |
3416 | return XFS_ERROR(EIO); | 3408 | return XFS_ERROR(EIO); |
3417 | } | 3409 | } |
3418 | 3410 | ||
3419 | /* LR body must have data or it wouldn't have been written */ | 3411 | /* LR body must have data or it wouldn't have been written */ |
3420 | hlen = be32_to_cpu(rhead->h_len); | 3412 | hlen = be32_to_cpu(rhead->h_len); |
3421 | if (unlikely( hlen <= 0 || hlen > INT_MAX )) { | 3413 | if (unlikely( hlen <= 0 || hlen > INT_MAX )) { |
3422 | XFS_ERROR_REPORT("xlog_valid_rec_header(2)", | 3414 | XFS_ERROR_REPORT("xlog_valid_rec_header(2)", |
3423 | XFS_ERRLEVEL_LOW, log->l_mp); | 3415 | XFS_ERRLEVEL_LOW, log->l_mp); |
3424 | return XFS_ERROR(EFSCORRUPTED); | 3416 | return XFS_ERROR(EFSCORRUPTED); |
3425 | } | 3417 | } |
3426 | if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { | 3418 | if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { |
3427 | XFS_ERROR_REPORT("xlog_valid_rec_header(3)", | 3419 | XFS_ERROR_REPORT("xlog_valid_rec_header(3)", |
3428 | XFS_ERRLEVEL_LOW, log->l_mp); | 3420 | XFS_ERRLEVEL_LOW, log->l_mp); |
3429 | return XFS_ERROR(EFSCORRUPTED); | 3421 | return XFS_ERROR(EFSCORRUPTED); |
3430 | } | 3422 | } |
3431 | return 0; | 3423 | return 0; |
3432 | } | 3424 | } |
3433 | 3425 | ||
3434 | /* | 3426 | /* |
3435 | * Read the log from tail to head and process the log records found. | 3427 | * Read the log from tail to head and process the log records found. |
3436 | * Handle the two cases where the tail and head are in the same cycle | 3428 | * Handle the two cases where the tail and head are in the same cycle |
3437 | * and where the active portion of the log wraps around the end of | 3429 | * and where the active portion of the log wraps around the end of |
3438 | * the physical log separately. The pass parameter is passed through | 3430 | * the physical log separately. The pass parameter is passed through |
3439 | * to the routines called to process the data and is not looked at | 3431 | * to the routines called to process the data and is not looked at |
3440 | * here. | 3432 | * here. |
3441 | */ | 3433 | */ |
3442 | STATIC int | 3434 | STATIC int |
3443 | xlog_do_recovery_pass( | 3435 | xlog_do_recovery_pass( |
3444 | xlog_t *log, | 3436 | xlog_t *log, |
3445 | xfs_daddr_t head_blk, | 3437 | xfs_daddr_t head_blk, |
3446 | xfs_daddr_t tail_blk, | 3438 | xfs_daddr_t tail_blk, |
3447 | int pass) | 3439 | int pass) |
3448 | { | 3440 | { |
3449 | xlog_rec_header_t *rhead; | 3441 | xlog_rec_header_t *rhead; |
3450 | xfs_daddr_t blk_no; | 3442 | xfs_daddr_t blk_no; |
3451 | xfs_caddr_t bufaddr, offset; | 3443 | xfs_caddr_t bufaddr, offset; |
3452 | xfs_buf_t *hbp, *dbp; | 3444 | xfs_buf_t *hbp, *dbp; |
3453 | int error = 0, h_size; | 3445 | int error = 0, h_size; |
3454 | int bblks, split_bblks; | 3446 | int bblks, split_bblks; |
3455 | int hblks, split_hblks, wrapped_hblks; | 3447 | int hblks, split_hblks, wrapped_hblks; |
3456 | xlog_recover_t *rhash[XLOG_RHASH_SIZE]; | 3448 | xlog_recover_t *rhash[XLOG_RHASH_SIZE]; |
3457 | 3449 | ||
3458 | ASSERT(head_blk != tail_blk); | 3450 | ASSERT(head_blk != tail_blk); |
3459 | 3451 | ||
3460 | /* | 3452 | /* |
3461 | * Read the header of the tail block and get the iclog buffer size from | 3453 | * Read the header of the tail block and get the iclog buffer size from |
3462 | * h_size. Use this to tell how many sectors make up the log header. | 3454 | * h_size. Use this to tell how many sectors make up the log header. |
3463 | */ | 3455 | */ |
3464 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3456 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3465 | /* | 3457 | /* |
3466 | * When using variable length iclogs, read first sector of | 3458 | * When using variable length iclogs, read first sector of |
3467 | * iclog header and extract the header size from it. Get a | 3459 | * iclog header and extract the header size from it. Get a |
3468 | * new hbp that is the correct size. | 3460 | * new hbp that is the correct size. |
3469 | */ | 3461 | */ |
3470 | hbp = xlog_get_bp(log, 1); | 3462 | hbp = xlog_get_bp(log, 1); |
3471 | if (!hbp) | 3463 | if (!hbp) |
3472 | return ENOMEM; | 3464 | return ENOMEM; |
3473 | if ((error = xlog_bread(log, tail_blk, 1, hbp))) | 3465 | if ((error = xlog_bread(log, tail_blk, 1, hbp))) |
3474 | goto bread_err1; | 3466 | goto bread_err1; |
3475 | offset = xlog_align(log, tail_blk, 1, hbp); | 3467 | offset = xlog_align(log, tail_blk, 1, hbp); |
3476 | rhead = (xlog_rec_header_t *)offset; | 3468 | rhead = (xlog_rec_header_t *)offset; |
3477 | error = xlog_valid_rec_header(log, rhead, tail_blk); | 3469 | error = xlog_valid_rec_header(log, rhead, tail_blk); |
3478 | if (error) | 3470 | if (error) |
3479 | goto bread_err1; | 3471 | goto bread_err1; |
3480 | h_size = be32_to_cpu(rhead->h_size); | 3472 | h_size = be32_to_cpu(rhead->h_size); |
3481 | if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && | 3473 | if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && |
3482 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | 3474 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
3483 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | 3475 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
3484 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | 3476 | if (h_size % XLOG_HEADER_CYCLE_SIZE) |
3485 | hblks++; | 3477 | hblks++; |
3486 | xlog_put_bp(hbp); | 3478 | xlog_put_bp(hbp); |
3487 | hbp = xlog_get_bp(log, hblks); | 3479 | hbp = xlog_get_bp(log, hblks); |
3488 | } else { | 3480 | } else { |
3489 | hblks = 1; | 3481 | hblks = 1; |
3490 | } | 3482 | } |
3491 | } else { | 3483 | } else { |
3492 | ASSERT(log->l_sectbb_log == 0); | 3484 | ASSERT(log->l_sectbb_log == 0); |
3493 | hblks = 1; | 3485 | hblks = 1; |
3494 | hbp = xlog_get_bp(log, 1); | 3486 | hbp = xlog_get_bp(log, 1); |
3495 | h_size = XLOG_BIG_RECORD_BSIZE; | 3487 | h_size = XLOG_BIG_RECORD_BSIZE; |
3496 | } | 3488 | } |
3497 | 3489 | ||
3498 | if (!hbp) | 3490 | if (!hbp) |
3499 | return ENOMEM; | 3491 | return ENOMEM; |
3500 | dbp = xlog_get_bp(log, BTOBB(h_size)); | 3492 | dbp = xlog_get_bp(log, BTOBB(h_size)); |
3501 | if (!dbp) { | 3493 | if (!dbp) { |
3502 | xlog_put_bp(hbp); | 3494 | xlog_put_bp(hbp); |
3503 | return ENOMEM; | 3495 | return ENOMEM; |
3504 | } | 3496 | } |
3505 | 3497 | ||
3506 | memset(rhash, 0, sizeof(rhash)); | 3498 | memset(rhash, 0, sizeof(rhash)); |
3507 | if (tail_blk <= head_blk) { | 3499 | if (tail_blk <= head_blk) { |
3508 | for (blk_no = tail_blk; blk_no < head_blk; ) { | 3500 | for (blk_no = tail_blk; blk_no < head_blk; ) { |
3509 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | 3501 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) |
3510 | goto bread_err2; | 3502 | goto bread_err2; |
3511 | offset = xlog_align(log, blk_no, hblks, hbp); | 3503 | offset = xlog_align(log, blk_no, hblks, hbp); |
3512 | rhead = (xlog_rec_header_t *)offset; | 3504 | rhead = (xlog_rec_header_t *)offset; |
3513 | error = xlog_valid_rec_header(log, rhead, blk_no); | 3505 | error = xlog_valid_rec_header(log, rhead, blk_no); |
3514 | if (error) | 3506 | if (error) |
3515 | goto bread_err2; | 3507 | goto bread_err2; |
3516 | 3508 | ||
3517 | /* blocks in data section */ | 3509 | /* blocks in data section */ |
3518 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3510 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3519 | error = xlog_bread(log, blk_no + hblks, bblks, dbp); | 3511 | error = xlog_bread(log, blk_no + hblks, bblks, dbp); |
3520 | if (error) | 3512 | if (error) |
3521 | goto bread_err2; | 3513 | goto bread_err2; |
3522 | offset = xlog_align(log, blk_no + hblks, bblks, dbp); | 3514 | offset = xlog_align(log, blk_no + hblks, bblks, dbp); |
3523 | xlog_unpack_data(rhead, offset, log); | 3515 | xlog_unpack_data(rhead, offset, log); |
3524 | if ((error = xlog_recover_process_data(log, | 3516 | if ((error = xlog_recover_process_data(log, |
3525 | rhash, rhead, offset, pass))) | 3517 | rhash, rhead, offset, pass))) |
3526 | goto bread_err2; | 3518 | goto bread_err2; |
3527 | blk_no += bblks + hblks; | 3519 | blk_no += bblks + hblks; |
3528 | } | 3520 | } |
3529 | } else { | 3521 | } else { |
3530 | /* | 3522 | /* |
3531 | * Perform recovery around the end of the physical log. | 3523 | * Perform recovery around the end of the physical log. |
3532 | * When the head is not on the same cycle number as the tail, | 3524 | * When the head is not on the same cycle number as the tail, |
3533 | * we can't do a sequential recovery as above. | 3525 | * we can't do a sequential recovery as above. |
3534 | */ | 3526 | */ |
3535 | blk_no = tail_blk; | 3527 | blk_no = tail_blk; |
3536 | while (blk_no < log->l_logBBsize) { | 3528 | while (blk_no < log->l_logBBsize) { |
3537 | /* | 3529 | /* |
3538 | * Check for header wrapping around physical end-of-log | 3530 | * Check for header wrapping around physical end-of-log |
3539 | */ | 3531 | */ |
3540 | offset = NULL; | 3532 | offset = NULL; |
3541 | split_hblks = 0; | 3533 | split_hblks = 0; |
3542 | wrapped_hblks = 0; | 3534 | wrapped_hblks = 0; |
3543 | if (blk_no + hblks <= log->l_logBBsize) { | 3535 | if (blk_no + hblks <= log->l_logBBsize) { |
3544 | /* Read header in one read */ | 3536 | /* Read header in one read */ |
3545 | error = xlog_bread(log, blk_no, hblks, hbp); | 3537 | error = xlog_bread(log, blk_no, hblks, hbp); |
3546 | if (error) | 3538 | if (error) |
3547 | goto bread_err2; | 3539 | goto bread_err2; |
3548 | offset = xlog_align(log, blk_no, hblks, hbp); | 3540 | offset = xlog_align(log, blk_no, hblks, hbp); |
3549 | } else { | 3541 | } else { |
3550 | /* This LR is split across physical log end */ | 3542 | /* This LR is split across physical log end */ |
3551 | if (blk_no != log->l_logBBsize) { | 3543 | if (blk_no != log->l_logBBsize) { |
3552 | /* some data before physical log end */ | 3544 | /* some data before physical log end */ |
3553 | ASSERT(blk_no <= INT_MAX); | 3545 | ASSERT(blk_no <= INT_MAX); |
3554 | split_hblks = log->l_logBBsize - (int)blk_no; | 3546 | split_hblks = log->l_logBBsize - (int)blk_no; |
3555 | ASSERT(split_hblks > 0); | 3547 | ASSERT(split_hblks > 0); |
3556 | if ((error = xlog_bread(log, blk_no, | 3548 | if ((error = xlog_bread(log, blk_no, |
3557 | split_hblks, hbp))) | 3549 | split_hblks, hbp))) |
3558 | goto bread_err2; | 3550 | goto bread_err2; |
3559 | offset = xlog_align(log, blk_no, | 3551 | offset = xlog_align(log, blk_no, |
3560 | split_hblks, hbp); | 3552 | split_hblks, hbp); |
3561 | } | 3553 | } |
3562 | /* | 3554 | /* |
3563 | * Note: this black magic still works with | 3555 | * Note: this black magic still works with |
3564 | * large sector sizes (non-512) only because: | 3556 | * large sector sizes (non-512) only because: |
3565 | * - we increased the buffer size originally | 3557 | * - we increased the buffer size originally |
3566 | * by 1 sector giving us enough extra space | 3558 | * by 1 sector giving us enough extra space |
3567 | * for the second read; | 3559 | * for the second read; |
3568 | * - the log start is guaranteed to be sector | 3560 | * - the log start is guaranteed to be sector |
3569 | * aligned; | 3561 | * aligned; |
3570 | * - we read the log end (LR header start) | 3562 | * - we read the log end (LR header start) |
3571 | * _first_, then the log start (LR header end) | 3563 | * _first_, then the log start (LR header end) |
3572 | * - order is important. | 3564 | * - order is important. |
3573 | */ | 3565 | */ |
3574 | wrapped_hblks = hblks - split_hblks; | 3566 | wrapped_hblks = hblks - split_hblks; |
3575 | bufaddr = XFS_BUF_PTR(hbp); | 3567 | bufaddr = XFS_BUF_PTR(hbp); |
3576 | error = XFS_BUF_SET_PTR(hbp, | 3568 | error = XFS_BUF_SET_PTR(hbp, |
3577 | bufaddr + BBTOB(split_hblks), | 3569 | bufaddr + BBTOB(split_hblks), |
3578 | BBTOB(hblks - split_hblks)); | 3570 | BBTOB(hblks - split_hblks)); |
3579 | if (!error) | 3571 | if (!error) |
3580 | error = xlog_bread(log, 0, | 3572 | error = xlog_bread(log, 0, |
3581 | wrapped_hblks, hbp); | 3573 | wrapped_hblks, hbp); |
3582 | if (!error) | 3574 | if (!error) |
3583 | error = XFS_BUF_SET_PTR(hbp, bufaddr, | 3575 | error = XFS_BUF_SET_PTR(hbp, bufaddr, |
3584 | BBTOB(hblks)); | 3576 | BBTOB(hblks)); |
3585 | if (error) | 3577 | if (error) |
3586 | goto bread_err2; | 3578 | goto bread_err2; |
3587 | if (!offset) | 3579 | if (!offset) |
3588 | offset = xlog_align(log, 0, | 3580 | offset = xlog_align(log, 0, |
3589 | wrapped_hblks, hbp); | 3581 | wrapped_hblks, hbp); |
3590 | } | 3582 | } |
3591 | rhead = (xlog_rec_header_t *)offset; | 3583 | rhead = (xlog_rec_header_t *)offset; |
3592 | error = xlog_valid_rec_header(log, rhead, | 3584 | error = xlog_valid_rec_header(log, rhead, |
3593 | split_hblks ? blk_no : 0); | 3585 | split_hblks ? blk_no : 0); |
3594 | if (error) | 3586 | if (error) |
3595 | goto bread_err2; | 3587 | goto bread_err2; |
3596 | 3588 | ||
3597 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3589 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3598 | blk_no += hblks; | 3590 | blk_no += hblks; |
3599 | 3591 | ||
3600 | /* Read in data for log record */ | 3592 | /* Read in data for log record */ |
3601 | if (blk_no + bblks <= log->l_logBBsize) { | 3593 | if (blk_no + bblks <= log->l_logBBsize) { |
3602 | error = xlog_bread(log, blk_no, bblks, dbp); | 3594 | error = xlog_bread(log, blk_no, bblks, dbp); |
3603 | if (error) | 3595 | if (error) |
3604 | goto bread_err2; | 3596 | goto bread_err2; |
3605 | offset = xlog_align(log, blk_no, bblks, dbp); | 3597 | offset = xlog_align(log, blk_no, bblks, dbp); |
3606 | } else { | 3598 | } else { |
3607 | /* This log record is split across the | 3599 | /* This log record is split across the |
3608 | * physical end of log */ | 3600 | * physical end of log */ |
3609 | offset = NULL; | 3601 | offset = NULL; |
3610 | split_bblks = 0; | 3602 | split_bblks = 0; |
3611 | if (blk_no != log->l_logBBsize) { | 3603 | if (blk_no != log->l_logBBsize) { |
3612 | /* some data is before the physical | 3604 | /* some data is before the physical |
3613 | * end of log */ | 3605 | * end of log */ |
3614 | ASSERT(!wrapped_hblks); | 3606 | ASSERT(!wrapped_hblks); |
3615 | ASSERT(blk_no <= INT_MAX); | 3607 | ASSERT(blk_no <= INT_MAX); |
3616 | split_bblks = | 3608 | split_bblks = |
3617 | log->l_logBBsize - (int)blk_no; | 3609 | log->l_logBBsize - (int)blk_no; |
3618 | ASSERT(split_bblks > 0); | 3610 | ASSERT(split_bblks > 0); |
3619 | if ((error = xlog_bread(log, blk_no, | 3611 | if ((error = xlog_bread(log, blk_no, |
3620 | split_bblks, dbp))) | 3612 | split_bblks, dbp))) |
3621 | goto bread_err2; | 3613 | goto bread_err2; |
3622 | offset = xlog_align(log, blk_no, | 3614 | offset = xlog_align(log, blk_no, |
3623 | split_bblks, dbp); | 3615 | split_bblks, dbp); |
3624 | } | 3616 | } |
3625 | /* | 3617 | /* |
3626 | * Note: this black magic still works with | 3618 | * Note: this black magic still works with |
3627 | * large sector sizes (non-512) only because: | 3619 | * large sector sizes (non-512) only because: |
3628 | * - we increased the buffer size originally | 3620 | * - we increased the buffer size originally |
3629 | * by 1 sector giving us enough extra space | 3621 | * by 1 sector giving us enough extra space |
3630 | * for the second read; | 3622 | * for the second read; |
3631 | * - the log start is guaranteed to be sector | 3623 | * - the log start is guaranteed to be sector |
3632 | * aligned; | 3624 | * aligned; |
3633 | * - we read the log end (LR header start) | 3625 | * - we read the log end (LR header start) |
3634 | * _first_, then the log start (LR header end) | 3626 | * _first_, then the log start (LR header end) |
3635 | * - order is important. | 3627 | * - order is important. |
3636 | */ | 3628 | */ |
3637 | bufaddr = XFS_BUF_PTR(dbp); | 3629 | bufaddr = XFS_BUF_PTR(dbp); |
3638 | error = XFS_BUF_SET_PTR(dbp, | 3630 | error = XFS_BUF_SET_PTR(dbp, |
3639 | bufaddr + BBTOB(split_bblks), | 3631 | bufaddr + BBTOB(split_bblks), |
3640 | BBTOB(bblks - split_bblks)); | 3632 | BBTOB(bblks - split_bblks)); |
3641 | if (!error) | 3633 | if (!error) |
3642 | error = xlog_bread(log, wrapped_hblks, | 3634 | error = xlog_bread(log, wrapped_hblks, |
3643 | bblks - split_bblks, | 3635 | bblks - split_bblks, |
3644 | dbp); | 3636 | dbp); |
3645 | if (!error) | 3637 | if (!error) |
3646 | error = XFS_BUF_SET_PTR(dbp, bufaddr, | 3638 | error = XFS_BUF_SET_PTR(dbp, bufaddr, |
3647 | h_size); | 3639 | h_size); |
3648 | if (error) | 3640 | if (error) |
3649 | goto bread_err2; | 3641 | goto bread_err2; |
3650 | if (!offset) | 3642 | if (!offset) |
3651 | offset = xlog_align(log, wrapped_hblks, | 3643 | offset = xlog_align(log, wrapped_hblks, |
3652 | bblks - split_bblks, dbp); | 3644 | bblks - split_bblks, dbp); |
3653 | } | 3645 | } |
3654 | xlog_unpack_data(rhead, offset, log); | 3646 | xlog_unpack_data(rhead, offset, log); |
3655 | if ((error = xlog_recover_process_data(log, rhash, | 3647 | if ((error = xlog_recover_process_data(log, rhash, |
3656 | rhead, offset, pass))) | 3648 | rhead, offset, pass))) |
3657 | goto bread_err2; | 3649 | goto bread_err2; |
3658 | blk_no += bblks; | 3650 | blk_no += bblks; |
3659 | } | 3651 | } |
3660 | 3652 | ||
3661 | ASSERT(blk_no >= log->l_logBBsize); | 3653 | ASSERT(blk_no >= log->l_logBBsize); |
3662 | blk_no -= log->l_logBBsize; | 3654 | blk_no -= log->l_logBBsize; |
3663 | 3655 | ||
3664 | /* read first part of physical log */ | 3656 | /* read first part of physical log */ |
3665 | while (blk_no < head_blk) { | 3657 | while (blk_no < head_blk) { |
3666 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | 3658 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) |
3667 | goto bread_err2; | 3659 | goto bread_err2; |
3668 | offset = xlog_align(log, blk_no, hblks, hbp); | 3660 | offset = xlog_align(log, blk_no, hblks, hbp); |
3669 | rhead = (xlog_rec_header_t *)offset; | 3661 | rhead = (xlog_rec_header_t *)offset; |
3670 | error = xlog_valid_rec_header(log, rhead, blk_no); | 3662 | error = xlog_valid_rec_header(log, rhead, blk_no); |
3671 | if (error) | 3663 | if (error) |
3672 | goto bread_err2; | 3664 | goto bread_err2; |
3673 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3665 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3674 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) | 3666 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) |
3675 | goto bread_err2; | 3667 | goto bread_err2; |
3676 | offset = xlog_align(log, blk_no+hblks, bblks, dbp); | 3668 | offset = xlog_align(log, blk_no+hblks, bblks, dbp); |
3677 | xlog_unpack_data(rhead, offset, log); | 3669 | xlog_unpack_data(rhead, offset, log); |
3678 | if ((error = xlog_recover_process_data(log, rhash, | 3670 | if ((error = xlog_recover_process_data(log, rhash, |
3679 | rhead, offset, pass))) | 3671 | rhead, offset, pass))) |
3680 | goto bread_err2; | 3672 | goto bread_err2; |
3681 | blk_no += bblks + hblks; | 3673 | blk_no += bblks + hblks; |
3682 | } | 3674 | } |
3683 | } | 3675 | } |
3684 | 3676 | ||
3685 | bread_err2: | 3677 | bread_err2: |
3686 | xlog_put_bp(dbp); | 3678 | xlog_put_bp(dbp); |
3687 | bread_err1: | 3679 | bread_err1: |
3688 | xlog_put_bp(hbp); | 3680 | xlog_put_bp(hbp); |
3689 | return error; | 3681 | return error; |
3690 | } | 3682 | } |
3691 | 3683 | ||
3692 | /* | 3684 | /* |
3693 | * Do the recovery of the log. We actually do this in two phases. | 3685 | * Do the recovery of the log. We actually do this in two phases. |
3694 | * The two passes are necessary in order to implement the function | 3686 | * The two passes are necessary in order to implement the function |
3695 | * of cancelling a record written into the log. The first pass | 3687 | * of cancelling a record written into the log. The first pass |
3696 | * determines those things which have been cancelled, and the | 3688 | * determines those things which have been cancelled, and the |
3697 | * second pass replays log items normally except for those which | 3689 | * second pass replays log items normally except for those which |
3698 | * have been cancelled. The handling of the replay and cancellations | 3690 | * have been cancelled. The handling of the replay and cancellations |
3699 | * takes place in the log item type specific routines. | 3691 | * takes place in the log item type specific routines. |
3700 | * | 3692 | * |
3701 | * The table of items which have cancel records in the log is allocated | 3693 | * The table of items which have cancel records in the log is allocated |
3702 | * and freed at this level, since only here do we know when all of | 3694 | * and freed at this level, since only here do we know when all of |
3703 | * the log recovery has been completed. | 3695 | * the log recovery has been completed. |
3704 | */ | 3696 | */ |
3705 | STATIC int | 3697 | STATIC int |
3706 | xlog_do_log_recovery( | 3698 | xlog_do_log_recovery( |
3707 | xlog_t *log, | 3699 | xlog_t *log, |
3708 | xfs_daddr_t head_blk, | 3700 | xfs_daddr_t head_blk, |
3709 | xfs_daddr_t tail_blk) | 3701 | xfs_daddr_t tail_blk) |
3710 | { | 3702 | { |
3711 | int error; | 3703 | int error; |
3712 | 3704 | ||
3713 | ASSERT(head_blk != tail_blk); | 3705 | ASSERT(head_blk != tail_blk); |
3714 | 3706 | ||
3715 | /* | 3707 | /* |
3716 | * First do a pass to find all of the cancelled buf log items. | 3708 | * First do a pass to find all of the cancelled buf log items. |
3717 | * Store them in the buf_cancel_table for use in the second pass. | 3709 | * Store them in the buf_cancel_table for use in the second pass. |
3718 | */ | 3710 | */ |
3719 | log->l_buf_cancel_table = | 3711 | log->l_buf_cancel_table = |
3720 | (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * | 3712 | (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * |
3721 | sizeof(xfs_buf_cancel_t*), | 3713 | sizeof(xfs_buf_cancel_t*), |
3722 | KM_SLEEP); | 3714 | KM_SLEEP); |
3723 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3715 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
3724 | XLOG_RECOVER_PASS1); | 3716 | XLOG_RECOVER_PASS1); |
3725 | if (error != 0) { | 3717 | if (error != 0) { |
3726 | kmem_free(log->l_buf_cancel_table); | 3718 | kmem_free(log->l_buf_cancel_table); |
3727 | log->l_buf_cancel_table = NULL; | 3719 | log->l_buf_cancel_table = NULL; |
3728 | return error; | 3720 | return error; |
3729 | } | 3721 | } |
3730 | /* | 3722 | /* |
3731 | * Then do a second pass to actually recover the items in the log. | 3723 | * Then do a second pass to actually recover the items in the log. |
3732 | * When it is complete free the table of buf cancel items. | 3724 | * When it is complete free the table of buf cancel items. |
3733 | */ | 3725 | */ |
3734 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3726 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
3735 | XLOG_RECOVER_PASS2); | 3727 | XLOG_RECOVER_PASS2); |
3736 | #ifdef DEBUG | 3728 | #ifdef DEBUG |
3737 | if (!error) { | 3729 | if (!error) { |
3738 | int i; | 3730 | int i; |
3739 | 3731 | ||
3740 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | 3732 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
3741 | ASSERT(log->l_buf_cancel_table[i] == NULL); | 3733 | ASSERT(log->l_buf_cancel_table[i] == NULL); |
3742 | } | 3734 | } |
3743 | #endif /* DEBUG */ | 3735 | #endif /* DEBUG */ |
3744 | 3736 | ||
3745 | kmem_free(log->l_buf_cancel_table); | 3737 | kmem_free(log->l_buf_cancel_table); |
3746 | log->l_buf_cancel_table = NULL; | 3738 | log->l_buf_cancel_table = NULL; |
3747 | 3739 | ||
3748 | return error; | 3740 | return error; |
3749 | } | 3741 | } |
3750 | 3742 | ||
3751 | /* | 3743 | /* |
3752 | * Do the actual recovery | 3744 | * Do the actual recovery |
3753 | */ | 3745 | */ |
3754 | STATIC int | 3746 | STATIC int |
3755 | xlog_do_recover( | 3747 | xlog_do_recover( |
3756 | xlog_t *log, | 3748 | xlog_t *log, |
3757 | xfs_daddr_t head_blk, | 3749 | xfs_daddr_t head_blk, |
3758 | xfs_daddr_t tail_blk) | 3750 | xfs_daddr_t tail_blk) |
3759 | { | 3751 | { |
3760 | int error; | 3752 | int error; |
3761 | xfs_buf_t *bp; | 3753 | xfs_buf_t *bp; |
3762 | xfs_sb_t *sbp; | 3754 | xfs_sb_t *sbp; |
3763 | 3755 | ||
3764 | /* | 3756 | /* |
3765 | * First replay the images in the log. | 3757 | * First replay the images in the log. |
3766 | */ | 3758 | */ |
3767 | error = xlog_do_log_recovery(log, head_blk, tail_blk); | 3759 | error = xlog_do_log_recovery(log, head_blk, tail_blk); |
3768 | if (error) { | 3760 | if (error) { |
3769 | return error; | 3761 | return error; |
3770 | } | 3762 | } |
3771 | 3763 | ||
3772 | XFS_bflush(log->l_mp->m_ddev_targp); | 3764 | XFS_bflush(log->l_mp->m_ddev_targp); |
3773 | 3765 | ||
3774 | /* | 3766 | /* |
3775 | * If IO errors happened during recovery, bail out. | 3767 | * If IO errors happened during recovery, bail out. |
3776 | */ | 3768 | */ |
3777 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) { | 3769 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) { |
3778 | return (EIO); | 3770 | return (EIO); |
3779 | } | 3771 | } |
3780 | 3772 | ||
3781 | /* | 3773 | /* |
3782 | * We now update the tail_lsn since much of the recovery has completed | 3774 | * We now update the tail_lsn since much of the recovery has completed |
3783 | * and there may be space available to use. If there were no extent | 3775 | * and there may be space available to use. If there were no extent |
3784 | * or iunlinks, we can free up the entire log and set the tail_lsn to | 3776 | * or iunlinks, we can free up the entire log and set the tail_lsn to |
3785 | * be the last_sync_lsn. This was set in xlog_find_tail to be the | 3777 | * be the last_sync_lsn. This was set in xlog_find_tail to be the |
3786 | * lsn of the last known good LR on disk. If there are extent frees | 3778 | * lsn of the last known good LR on disk. If there are extent frees |
3787 | * or iunlinks they will have some entries in the AIL; so we look at | 3779 | * or iunlinks they will have some entries in the AIL; so we look at |
3788 | * the AIL to determine how to set the tail_lsn. | 3780 | * the AIL to determine how to set the tail_lsn. |
3789 | */ | 3781 | */ |
3790 | xlog_assign_tail_lsn(log->l_mp); | 3782 | xlog_assign_tail_lsn(log->l_mp); |
3791 | 3783 | ||
3792 | /* | 3784 | /* |
3793 | * Now that we've finished replaying all buffer and inode | 3785 | * Now that we've finished replaying all buffer and inode |
3794 | * updates, re-read in the superblock. | 3786 | * updates, re-read in the superblock. |
3795 | */ | 3787 | */ |
3796 | bp = xfs_getsb(log->l_mp, 0); | 3788 | bp = xfs_getsb(log->l_mp, 0); |
3797 | XFS_BUF_UNDONE(bp); | 3789 | XFS_BUF_UNDONE(bp); |
3798 | ASSERT(!(XFS_BUF_ISWRITE(bp))); | 3790 | ASSERT(!(XFS_BUF_ISWRITE(bp))); |
3799 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | 3791 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); |
3800 | XFS_BUF_READ(bp); | 3792 | XFS_BUF_READ(bp); |
3801 | XFS_BUF_UNASYNC(bp); | 3793 | XFS_BUF_UNASYNC(bp); |
3802 | xfsbdstrat(log->l_mp, bp); | 3794 | xfsbdstrat(log->l_mp, bp); |
3803 | error = xfs_iowait(bp); | 3795 | error = xfs_iowait(bp); |
3804 | if (error) { | 3796 | if (error) { |
3805 | xfs_ioerror_alert("xlog_do_recover", | 3797 | xfs_ioerror_alert("xlog_do_recover", |
3806 | log->l_mp, bp, XFS_BUF_ADDR(bp)); | 3798 | log->l_mp, bp, XFS_BUF_ADDR(bp)); |
3807 | ASSERT(0); | 3799 | ASSERT(0); |
3808 | xfs_buf_relse(bp); | 3800 | xfs_buf_relse(bp); |
3809 | return error; | 3801 | return error; |
3810 | } | 3802 | } |
3811 | 3803 | ||
3812 | /* Convert superblock from on-disk format */ | 3804 | /* Convert superblock from on-disk format */ |
3813 | sbp = &log->l_mp->m_sb; | 3805 | sbp = &log->l_mp->m_sb; |
3814 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); | 3806 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); |
3815 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); | 3807 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); |
3816 | ASSERT(xfs_sb_good_version(sbp)); | 3808 | ASSERT(xfs_sb_good_version(sbp)); |
3817 | xfs_buf_relse(bp); | 3809 | xfs_buf_relse(bp); |
3818 | 3810 | ||
3819 | /* We've re-read the superblock so re-initialize per-cpu counters */ | 3811 | /* We've re-read the superblock so re-initialize per-cpu counters */ |
3820 | xfs_icsb_reinit_counters(log->l_mp); | 3812 | xfs_icsb_reinit_counters(log->l_mp); |
3821 | 3813 | ||
3822 | xlog_recover_check_summary(log); | 3814 | xlog_recover_check_summary(log); |
3823 | 3815 | ||
3824 | /* Normal transactions can now occur */ | 3816 | /* Normal transactions can now occur */ |
3825 | log->l_flags &= ~XLOG_ACTIVE_RECOVERY; | 3817 | log->l_flags &= ~XLOG_ACTIVE_RECOVERY; |
3826 | return 0; | 3818 | return 0; |
3827 | } | 3819 | } |
3828 | 3820 | ||
3829 | /* | 3821 | /* |
3830 | * Perform recovery and re-initialize some log variables in xlog_find_tail. | 3822 | * Perform recovery and re-initialize some log variables in xlog_find_tail. |
3831 | * | 3823 | * |
3832 | * Return error or zero. | 3824 | * Return error or zero. |
3833 | */ | 3825 | */ |
3834 | int | 3826 | int |
3835 | xlog_recover( | 3827 | xlog_recover( |
3836 | xlog_t *log) | 3828 | xlog_t *log) |
3837 | { | 3829 | { |
3838 | xfs_daddr_t head_blk, tail_blk; | 3830 | xfs_daddr_t head_blk, tail_blk; |
3839 | int error; | 3831 | int error; |
3840 | 3832 | ||
3841 | /* find the tail of the log */ | 3833 | /* find the tail of the log */ |
3842 | if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) | 3834 | if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) |
3843 | return error; | 3835 | return error; |
3844 | 3836 | ||
3845 | if (tail_blk != head_blk) { | 3837 | if (tail_blk != head_blk) { |
3846 | /* There used to be a comment here: | 3838 | /* There used to be a comment here: |
3847 | * | 3839 | * |
3848 | * disallow recovery on read-only mounts. note -- mount | 3840 | * disallow recovery on read-only mounts. note -- mount |
3849 | * checks for ENOSPC and turns it into an intelligent | 3841 | * checks for ENOSPC and turns it into an intelligent |
3850 | * error message. | 3842 | * error message. |
3851 | * ...but this is no longer true. Now, unless you specify | 3843 | * ...but this is no longer true. Now, unless you specify |
3852 | * NORECOVERY (in which case this function would never be | 3844 | * NORECOVERY (in which case this function would never be |
3853 | * called), we just go ahead and recover. We do this all | 3845 | * called), we just go ahead and recover. We do this all |
3854 | * under the vfs layer, so we can get away with it unless | 3846 | * under the vfs layer, so we can get away with it unless |
3855 | * the device itself is read-only, in which case we fail. | 3847 | * the device itself is read-only, in which case we fail. |
3856 | */ | 3848 | */ |
3857 | if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { | 3849 | if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { |
3858 | return error; | 3850 | return error; |
3859 | } | 3851 | } |
3860 | 3852 | ||
3861 | cmn_err(CE_NOTE, | 3853 | cmn_err(CE_NOTE, |
3862 | "Starting XFS recovery on filesystem: %s (logdev: %s)", | 3854 | "Starting XFS recovery on filesystem: %s (logdev: %s)", |
3863 | log->l_mp->m_fsname, log->l_mp->m_logname ? | 3855 | log->l_mp->m_fsname, log->l_mp->m_logname ? |
3864 | log->l_mp->m_logname : "internal"); | 3856 | log->l_mp->m_logname : "internal"); |
3865 | 3857 | ||
3866 | error = xlog_do_recover(log, head_blk, tail_blk); | 3858 | error = xlog_do_recover(log, head_blk, tail_blk); |
3867 | log->l_flags |= XLOG_RECOVERY_NEEDED; | 3859 | log->l_flags |= XLOG_RECOVERY_NEEDED; |
3868 | } | 3860 | } |
3869 | return error; | 3861 | return error; |
3870 | } | 3862 | } |
3871 | 3863 | ||
3872 | /* | 3864 | /* |
3873 | * In the first part of recovery we replay inodes and buffers and build | 3865 | * In the first part of recovery we replay inodes and buffers and build |
3874 | * up the list of extent free items which need to be processed. Here | 3866 | * up the list of extent free items which need to be processed. Here |
3875 | * we process the extent free items and clean up the on disk unlinked | 3867 | * we process the extent free items and clean up the on disk unlinked |
3876 | * inode lists. This is separated from the first part of recovery so | 3868 | * inode lists. This is separated from the first part of recovery so |
3877 | * that the root and real-time bitmap inodes can be read in from disk in | 3869 | * that the root and real-time bitmap inodes can be read in from disk in |
3878 | * between the two stages. This is necessary so that we can free space | 3870 | * between the two stages. This is necessary so that we can free space |
3879 | * in the real-time portion of the file system. | 3871 | * in the real-time portion of the file system. |
3880 | */ | 3872 | */ |
3881 | int | 3873 | int |
3882 | xlog_recover_finish( | 3874 | xlog_recover_finish( |
3883 | xlog_t *log) | 3875 | xlog_t *log) |
3884 | { | 3876 | { |
3885 | /* | 3877 | /* |
3886 | * Now we're ready to do the transactions needed for the | 3878 | * Now we're ready to do the transactions needed for the |
3887 | * rest of recovery. Start with completing all the extent | 3879 | * rest of recovery. Start with completing all the extent |
3888 | * free intent records and then process the unlinked inode | 3880 | * free intent records and then process the unlinked inode |
3889 | * lists. At this point, we essentially run in normal mode | 3881 | * lists. At this point, we essentially run in normal mode |
3890 | * except that we're still performing recovery actions | 3882 | * except that we're still performing recovery actions |
3891 | * rather than accepting new requests. | 3883 | * rather than accepting new requests. |
3892 | */ | 3884 | */ |
3893 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { | 3885 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { |
3894 | int error; | 3886 | int error; |
3895 | error = xlog_recover_process_efis(log); | 3887 | error = xlog_recover_process_efis(log); |
3896 | if (error) { | 3888 | if (error) { |
3897 | cmn_err(CE_ALERT, | 3889 | cmn_err(CE_ALERT, |
3898 | "Failed to recover EFIs on filesystem: %s", | 3890 | "Failed to recover EFIs on filesystem: %s", |
3899 | log->l_mp->m_fsname); | 3891 | log->l_mp->m_fsname); |
3900 | return error; | 3892 | return error; |
3901 | } | 3893 | } |
3902 | /* | 3894 | /* |
3903 | * Sync the log to get all the EFIs out of the AIL. | 3895 | * Sync the log to get all the EFIs out of the AIL. |
3904 | * This isn't absolutely necessary, but it helps in | 3896 | * This isn't absolutely necessary, but it helps in |
3905 | * case the unlink transactions would have problems | 3897 | * case the unlink transactions would have problems |
3906 | * pushing the EFIs out of the way. | 3898 | * pushing the EFIs out of the way. |
3907 | */ | 3899 | */ |
3908 | xfs_log_force(log->l_mp, (xfs_lsn_t)0, | 3900 | xfs_log_force(log->l_mp, (xfs_lsn_t)0, |
3909 | (XFS_LOG_FORCE | XFS_LOG_SYNC)); | 3901 | (XFS_LOG_FORCE | XFS_LOG_SYNC)); |
3910 | 3902 | ||
3911 | xlog_recover_process_iunlinks(log); | 3903 | xlog_recover_process_iunlinks(log); |
3912 | 3904 | ||
3913 | xlog_recover_check_summary(log); | 3905 | xlog_recover_check_summary(log); |
3914 | 3906 | ||
3915 | cmn_err(CE_NOTE, | 3907 | cmn_err(CE_NOTE, |
3916 | "Ending XFS recovery on filesystem: %s (logdev: %s)", | 3908 | "Ending XFS recovery on filesystem: %s (logdev: %s)", |
3917 | log->l_mp->m_fsname, log->l_mp->m_logname ? | 3909 | log->l_mp->m_fsname, log->l_mp->m_logname ? |
3918 | log->l_mp->m_logname : "internal"); | 3910 | log->l_mp->m_logname : "internal"); |
3919 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; | 3911 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; |
3920 | } else { | 3912 | } else { |
3921 | cmn_err(CE_DEBUG, | 3913 | cmn_err(CE_DEBUG, |
3922 | "!Ending clean XFS mount for filesystem: %s\n", | 3914 | "!Ending clean XFS mount for filesystem: %s\n", |
3923 | log->l_mp->m_fsname); | 3915 | log->l_mp->m_fsname); |
3924 | } | 3916 | } |
3925 | return 0; | 3917 | return 0; |
3926 | } | 3918 | } |
3927 | 3919 | ||
3928 | 3920 | ||
3929 | #if defined(DEBUG) | 3921 | #if defined(DEBUG) |
3930 | /* | 3922 | /* |
3931 | * Read all of the agf and agi counters and check that they | 3923 | * Read all of the agf and agi counters and check that they |
3932 | * are consistent with the superblock counters. | 3924 | * are consistent with the superblock counters. |
3933 | */ | 3925 | */ |
3934 | void | 3926 | void |
3935 | xlog_recover_check_summary( | 3927 | xlog_recover_check_summary( |
3936 | xlog_t *log) | 3928 | xlog_t *log) |
3937 | { | 3929 | { |
3938 | xfs_mount_t *mp; | 3930 | xfs_mount_t *mp; |
3939 | xfs_agf_t *agfp; | 3931 | xfs_agf_t *agfp; |
3940 | xfs_buf_t *agfbp; | 3932 | xfs_buf_t *agfbp; |
3941 | xfs_buf_t *agibp; | 3933 | xfs_buf_t *agibp; |
3942 | xfs_buf_t *sbbp; | 3934 | xfs_buf_t *sbbp; |
3943 | #ifdef XFS_LOUD_RECOVERY | 3935 | #ifdef XFS_LOUD_RECOVERY |
3944 | xfs_sb_t *sbp; | 3936 | xfs_sb_t *sbp; |
3945 | #endif | 3937 | #endif |
3946 | xfs_agnumber_t agno; | 3938 | xfs_agnumber_t agno; |
3947 | __uint64_t freeblks; | 3939 | __uint64_t freeblks; |
3948 | __uint64_t itotal; | 3940 | __uint64_t itotal; |
3949 | __uint64_t ifree; | 3941 | __uint64_t ifree; |
3950 | int error; | 3942 | int error; |
3951 | 3943 | ||
3952 | mp = log->l_mp; | 3944 | mp = log->l_mp; |
3953 | 3945 | ||
3954 | freeblks = 0LL; | 3946 | freeblks = 0LL; |
3955 | itotal = 0LL; | 3947 | itotal = 0LL; |
3956 | ifree = 0LL; | 3948 | ifree = 0LL; |
3957 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 3949 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
3958 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); | 3950 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); |
3959 | if (error) { | 3951 | if (error) { |
3960 | xfs_fs_cmn_err(CE_ALERT, mp, | 3952 | xfs_fs_cmn_err(CE_ALERT, mp, |
3961 | "xlog_recover_check_summary(agf)" | 3953 | "xlog_recover_check_summary(agf)" |
3962 | "agf read failed agno %d error %d", | 3954 | "agf read failed agno %d error %d", |
3963 | agno, error); | 3955 | agno, error); |
3964 | } else { | 3956 | } else { |
3965 | agfp = XFS_BUF_TO_AGF(agfbp); | 3957 | agfp = XFS_BUF_TO_AGF(agfbp); |
3966 | freeblks += be32_to_cpu(agfp->agf_freeblks) + | 3958 | freeblks += be32_to_cpu(agfp->agf_freeblks) + |
3967 | be32_to_cpu(agfp->agf_flcount); | 3959 | be32_to_cpu(agfp->agf_flcount); |
3968 | xfs_buf_relse(agfbp); | 3960 | xfs_buf_relse(agfbp); |
3969 | } | 3961 | } |
3970 | 3962 | ||
3971 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3963 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3972 | if (!error) { | 3964 | if (!error) { |
3973 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); | 3965 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); |
3974 | 3966 | ||
3975 | itotal += be32_to_cpu(agi->agi_count); | 3967 | itotal += be32_to_cpu(agi->agi_count); |
3976 | ifree += be32_to_cpu(agi->agi_freecount); | 3968 | ifree += be32_to_cpu(agi->agi_freecount); |
3977 | xfs_buf_relse(agibp); | 3969 | xfs_buf_relse(agibp); |
3978 | } | 3970 | } |
3979 | } | 3971 | } |
3980 | 3972 | ||
3981 | sbbp = xfs_getsb(mp, 0); | 3973 | sbbp = xfs_getsb(mp, 0); |
3982 | #ifdef XFS_LOUD_RECOVERY | 3974 | #ifdef XFS_LOUD_RECOVERY |
3983 | sbp = &mp->m_sb; | 3975 | sbp = &mp->m_sb; |
3984 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp)); | 3976 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp)); |
3985 | cmn_err(CE_NOTE, | 3977 | cmn_err(CE_NOTE, |
3986 | "xlog_recover_check_summary: sb_icount %Lu itotal %Lu", | 3978 | "xlog_recover_check_summary: sb_icount %Lu itotal %Lu", |
3987 | sbp->sb_icount, itotal); | 3979 | sbp->sb_icount, itotal); |
3988 | cmn_err(CE_NOTE, | 3980 | cmn_err(CE_NOTE, |
3989 | "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu", | 3981 | "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu", |
3990 | sbp->sb_ifree, ifree); | 3982 | sbp->sb_ifree, ifree); |
3991 | cmn_err(CE_NOTE, | 3983 | cmn_err(CE_NOTE, |
3992 | "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu", | 3984 | "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu", |
3993 | sbp->sb_fdblocks, freeblks); | 3985 | sbp->sb_fdblocks, freeblks); |
3994 | #if 0 | 3986 | #if 0 |
3995 | /* | 3987 | /* |
3996 | * This is turned off until I account for the allocation | 3988 | * This is turned off until I account for the allocation |
3997 | * btree blocks which live in free space. | 3989 | * btree blocks which live in free space. |
3998 | */ | 3990 | */ |
3999 | ASSERT(sbp->sb_icount == itotal); | 3991 | ASSERT(sbp->sb_icount == itotal); |
4000 | ASSERT(sbp->sb_ifree == ifree); | 3992 | ASSERT(sbp->sb_ifree == ifree); |
4001 | ASSERT(sbp->sb_fdblocks == freeblks); | 3993 | ASSERT(sbp->sb_fdblocks == freeblks); |
4002 | #endif | 3994 | #endif |
4003 | #endif | 3995 | #endif |
4004 | xfs_buf_relse(sbbp); | 3996 | xfs_buf_relse(sbbp); |
4005 | } | 3997 | } |
4006 | #endif /* DEBUG */ | 3998 | #endif /* DEBUG */ |
4007 | 3999 |
fs/xfs/xfs_rw.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
27 | #include "xfs_dir2.h" | 27 | #include "xfs_dir2.h" |
28 | #include "xfs_dmapi.h" | 28 | #include "xfs_dmapi.h" |
29 | #include "xfs_mount.h" | 29 | #include "xfs_mount.h" |
30 | #include "xfs_bmap_btree.h" | 30 | #include "xfs_bmap_btree.h" |
31 | #include "xfs_alloc_btree.h" | 31 | #include "xfs_alloc_btree.h" |
32 | #include "xfs_ialloc_btree.h" | 32 | #include "xfs_ialloc_btree.h" |
33 | #include "xfs_dir2_sf.h" | 33 | #include "xfs_dir2_sf.h" |
34 | #include "xfs_attr_sf.h" | 34 | #include "xfs_attr_sf.h" |
35 | #include "xfs_dinode.h" | 35 | #include "xfs_dinode.h" |
36 | #include "xfs_inode.h" | 36 | #include "xfs_inode.h" |
37 | #include "xfs_inode_item.h" | 37 | #include "xfs_inode_item.h" |
38 | #include "xfs_itable.h" | 38 | #include "xfs_itable.h" |
39 | #include "xfs_btree.h" | 39 | #include "xfs_btree.h" |
40 | #include "xfs_alloc.h" | 40 | #include "xfs_alloc.h" |
41 | #include "xfs_ialloc.h" | 41 | #include "xfs_ialloc.h" |
42 | #include "xfs_attr.h" | 42 | #include "xfs_attr.h" |
43 | #include "xfs_bmap.h" | 43 | #include "xfs_bmap.h" |
44 | #include "xfs_acl.h" | 44 | #include "xfs_acl.h" |
45 | #include "xfs_error.h" | 45 | #include "xfs_error.h" |
46 | #include "xfs_buf_item.h" | 46 | #include "xfs_buf_item.h" |
47 | #include "xfs_rw.h" | 47 | #include "xfs_rw.h" |
48 | 48 | ||
49 | /* | 49 | /* |
50 | * This is a subroutine for xfs_write() and other writers (xfs_ioctl) | 50 | * This is a subroutine for xfs_write() and other writers (xfs_ioctl) |
51 | * which clears the setuid and setgid bits when a file is written. | 51 | * which clears the setuid and setgid bits when a file is written. |
52 | */ | 52 | */ |
53 | int | 53 | int |
54 | xfs_write_clear_setuid( | 54 | xfs_write_clear_setuid( |
55 | xfs_inode_t *ip) | 55 | xfs_inode_t *ip) |
56 | { | 56 | { |
57 | xfs_mount_t *mp; | 57 | xfs_mount_t *mp; |
58 | xfs_trans_t *tp; | 58 | xfs_trans_t *tp; |
59 | int error; | 59 | int error; |
60 | 60 | ||
61 | mp = ip->i_mount; | 61 | mp = ip->i_mount; |
62 | tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); | 62 | tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); |
63 | if ((error = xfs_trans_reserve(tp, 0, | 63 | if ((error = xfs_trans_reserve(tp, 0, |
64 | XFS_WRITEID_LOG_RES(mp), | 64 | XFS_WRITEID_LOG_RES(mp), |
65 | 0, 0, 0))) { | 65 | 0, 0, 0))) { |
66 | xfs_trans_cancel(tp, 0); | 66 | xfs_trans_cancel(tp, 0); |
67 | return error; | 67 | return error; |
68 | } | 68 | } |
69 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 69 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
70 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 70 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
71 | xfs_trans_ihold(tp, ip); | 71 | xfs_trans_ihold(tp, ip); |
72 | ip->i_d.di_mode &= ~S_ISUID; | 72 | ip->i_d.di_mode &= ~S_ISUID; |
73 | 73 | ||
74 | /* | 74 | /* |
75 | * Note that we don't have to worry about mandatory | 75 | * Note that we don't have to worry about mandatory |
76 | * file locking being disabled here because we only | 76 | * file locking being disabled here because we only |
77 | * clear the S_ISGID bit if the Group execute bit is | 77 | * clear the S_ISGID bit if the Group execute bit is |
78 | * on, but if it was on then mandatory locking wouldn't | 78 | * on, but if it was on then mandatory locking wouldn't |
79 | * have been enabled. | 79 | * have been enabled. |
80 | */ | 80 | */ |
81 | if (ip->i_d.di_mode & S_IXGRP) { | 81 | if (ip->i_d.di_mode & S_IXGRP) { |
82 | ip->i_d.di_mode &= ~S_ISGID; | 82 | ip->i_d.di_mode &= ~S_ISGID; |
83 | } | 83 | } |
84 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 84 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
85 | xfs_trans_set_sync(tp); | 85 | xfs_trans_set_sync(tp); |
86 | error = xfs_trans_commit(tp, 0); | 86 | error = xfs_trans_commit(tp, 0); |
87 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 87 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
88 | return 0; | 88 | return 0; |
89 | } | 89 | } |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Handle logging requirements of various synchronous types of write. | 92 | * Handle logging requirements of various synchronous types of write. |
93 | */ | 93 | */ |
94 | int | 94 | int |
95 | xfs_write_sync_logforce( | 95 | xfs_write_sync_logforce( |
96 | xfs_mount_t *mp, | 96 | xfs_mount_t *mp, |
97 | xfs_inode_t *ip) | 97 | xfs_inode_t *ip) |
98 | { | 98 | { |
99 | int error = 0; | 99 | int error = 0; |
100 | 100 | ||
101 | /* | 101 | /* |
102 | * If we're treating this as O_DSYNC and we have not updated the | 102 | * If we're treating this as O_DSYNC and we have not updated the |
103 | * size, force the log. | 103 | * size, force the log. |
104 | */ | 104 | */ |
105 | if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && | 105 | if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && |
106 | !(ip->i_update_size)) { | 106 | !(ip->i_update_size)) { |
107 | xfs_inode_log_item_t *iip = ip->i_itemp; | 107 | xfs_inode_log_item_t *iip = ip->i_itemp; |
108 | 108 | ||
109 | /* | 109 | /* |
110 | * If an allocation transaction occurred | 110 | * If an allocation transaction occurred |
111 | * without extending the size, then we have to force | 111 | * without extending the size, then we have to force |
112 | * the log up the proper point to ensure that the | 112 | * the log up the proper point to ensure that the |
113 | * allocation is permanent. We can't count on | 113 | * allocation is permanent. We can't count on |
114 | * the fact that buffered writes lock out direct I/O | 114 | * the fact that buffered writes lock out direct I/O |
115 | * writes - the direct I/O write could have extended | 115 | * writes - the direct I/O write could have extended |
116 | * the size nontransactionally, then finished before | 116 | * the size nontransactionally, then finished before |
117 | * we started. xfs_write_file will think that the file | 117 | * we started. xfs_write_file will think that the file |
118 | * didn't grow but the update isn't safe unless the | 118 | * didn't grow but the update isn't safe unless the |
119 | * size change is logged. | 119 | * size change is logged. |
120 | * | 120 | * |
121 | * Force the log if we've committed a transaction | 121 | * Force the log if we've committed a transaction |
122 | * against the inode or if someone else has and | 122 | * against the inode or if someone else has and |
123 | * the commit record hasn't gone to disk (e.g. | 123 | * the commit record hasn't gone to disk (e.g. |
124 | * the inode is pinned). This guarantees that | 124 | * the inode is pinned). This guarantees that |
125 | * all changes affecting the inode are permanent | 125 | * all changes affecting the inode are permanent |
126 | * when we return. | 126 | * when we return. |
127 | */ | 127 | */ |
128 | if (iip && iip->ili_last_lsn) { | 128 | if (iip && iip->ili_last_lsn) { |
129 | error = _xfs_log_force(mp, iip->ili_last_lsn, | 129 | error = _xfs_log_force(mp, iip->ili_last_lsn, |
130 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); | 130 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); |
131 | } else if (xfs_ipincount(ip) > 0) { | 131 | } else if (xfs_ipincount(ip) > 0) { |
132 | error = _xfs_log_force(mp, (xfs_lsn_t)0, | 132 | error = _xfs_log_force(mp, (xfs_lsn_t)0, |
133 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); | 133 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); |
134 | } | 134 | } |
135 | 135 | ||
136 | } else { | 136 | } else { |
137 | xfs_trans_t *tp; | 137 | xfs_trans_t *tp; |
138 | 138 | ||
139 | /* | 139 | /* |
140 | * O_SYNC or O_DSYNC _with_ a size update are handled | 140 | * O_SYNC or O_DSYNC _with_ a size update are handled |
141 | * the same way. | 141 | * the same way. |
142 | * | 142 | * |
143 | * If the write was synchronous then we need to make | 143 | * If the write was synchronous then we need to make |
144 | * sure that the inode modification time is permanent. | 144 | * sure that the inode modification time is permanent. |
145 | * We'll have updated the timestamp above, so here | 145 | * We'll have updated the timestamp above, so here |
146 | * we use a synchronous transaction to log the inode. | 146 | * we use a synchronous transaction to log the inode. |
147 | * It's not fast, but it's necessary. | 147 | * It's not fast, but it's necessary. |
148 | * | 148 | * |
149 | * If this a dsync write and the size got changed | 149 | * If this a dsync write and the size got changed |
150 | * non-transactionally, then we need to ensure that | 150 | * non-transactionally, then we need to ensure that |
151 | * the size change gets logged in a synchronous | 151 | * the size change gets logged in a synchronous |
152 | * transaction. | 152 | * transaction. |
153 | */ | 153 | */ |
154 | tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); | 154 | tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); |
155 | if ((error = xfs_trans_reserve(tp, 0, | 155 | if ((error = xfs_trans_reserve(tp, 0, |
156 | XFS_SWRITE_LOG_RES(mp), | 156 | XFS_SWRITE_LOG_RES(mp), |
157 | 0, 0, 0))) { | 157 | 0, 0, 0))) { |
158 | /* Transaction reserve failed */ | 158 | /* Transaction reserve failed */ |
159 | xfs_trans_cancel(tp, 0); | 159 | xfs_trans_cancel(tp, 0); |
160 | } else { | 160 | } else { |
161 | /* Transaction reserve successful */ | 161 | /* Transaction reserve successful */ |
162 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 162 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
163 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 163 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
164 | xfs_trans_ihold(tp, ip); | 164 | xfs_trans_ihold(tp, ip); |
165 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 165 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
166 | xfs_trans_set_sync(tp); | 166 | xfs_trans_set_sync(tp); |
167 | error = xfs_trans_commit(tp, 0); | 167 | error = xfs_trans_commit(tp, 0); |
168 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 168 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
169 | } | 169 | } |
170 | } | 170 | } |
171 | 171 | ||
172 | return error; | 172 | return error; |
173 | } | 173 | } |
174 | 174 | ||
175 | /* | 175 | /* |
176 | * Force a shutdown of the filesystem instantly while keeping | 176 | * Force a shutdown of the filesystem instantly while keeping |
177 | * the filesystem consistent. We don't do an unmount here; just shutdown | 177 | * the filesystem consistent. We don't do an unmount here; just shutdown |
178 | * the shop, make sure that absolutely nothing persistent happens to | 178 | * the shop, make sure that absolutely nothing persistent happens to |
179 | * this filesystem after this point. | 179 | * this filesystem after this point. |
180 | */ | 180 | */ |
181 | void | 181 | void |
182 | xfs_do_force_shutdown( | 182 | xfs_do_force_shutdown( |
183 | xfs_mount_t *mp, | 183 | xfs_mount_t *mp, |
184 | int flags, | 184 | int flags, |
185 | char *fname, | 185 | char *fname, |
186 | int lnnum) | 186 | int lnnum) |
187 | { | 187 | { |
188 | int logerror; | 188 | int logerror; |
189 | 189 | ||
190 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; | 190 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; |
191 | 191 | ||
192 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | 192 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { |
193 | cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from " | 193 | cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from " |
194 | "line %d of file %s. Return address = 0x%p", | 194 | "line %d of file %s. Return address = 0x%p", |
195 | mp->m_fsname, flags, lnnum, fname, __return_address); | 195 | mp->m_fsname, flags, lnnum, fname, __return_address); |
196 | } | 196 | } |
197 | /* | 197 | /* |
198 | * No need to duplicate efforts. | 198 | * No need to duplicate efforts. |
199 | */ | 199 | */ |
200 | if (XFS_FORCED_SHUTDOWN(mp) && !logerror) | 200 | if (XFS_FORCED_SHUTDOWN(mp) && !logerror) |
201 | return; | 201 | return; |
202 | 202 | ||
203 | /* | 203 | /* |
204 | * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't | 204 | * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't |
205 | * queue up anybody new on the log reservations, and wakes up | 205 | * queue up anybody new on the log reservations, and wakes up |
206 | * everybody who's sleeping on log reservations to tell them | 206 | * everybody who's sleeping on log reservations to tell them |
207 | * the bad news. | 207 | * the bad news. |
208 | */ | 208 | */ |
209 | if (xfs_log_force_umount(mp, logerror)) | 209 | if (xfs_log_force_umount(mp, logerror)) |
210 | return; | 210 | return; |
211 | 211 | ||
212 | if (flags & SHUTDOWN_CORRUPT_INCORE) { | 212 | if (flags & SHUTDOWN_CORRUPT_INCORE) { |
213 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, | 213 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, |
214 | "Corruption of in-memory data detected. Shutting down filesystem: %s", | 214 | "Corruption of in-memory data detected. Shutting down filesystem: %s", |
215 | mp->m_fsname); | 215 | mp->m_fsname); |
216 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) { | 216 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) { |
217 | xfs_stack_trace(); | 217 | xfs_stack_trace(); |
218 | } | 218 | } |
219 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | 219 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { |
220 | if (logerror) { | 220 | if (logerror) { |
221 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, | 221 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, |
222 | "Log I/O Error Detected. Shutting down filesystem: %s", | 222 | "Log I/O Error Detected. Shutting down filesystem: %s", |
223 | mp->m_fsname); | 223 | mp->m_fsname); |
224 | } else if (flags & SHUTDOWN_DEVICE_REQ) { | 224 | } else if (flags & SHUTDOWN_DEVICE_REQ) { |
225 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, | 225 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, |
226 | "All device paths lost. Shutting down filesystem: %s", | 226 | "All device paths lost. Shutting down filesystem: %s", |
227 | mp->m_fsname); | 227 | mp->m_fsname); |
228 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { | 228 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { |
229 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, | 229 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, |
230 | "I/O Error Detected. Shutting down filesystem: %s", | 230 | "I/O Error Detected. Shutting down filesystem: %s", |
231 | mp->m_fsname); | 231 | mp->m_fsname); |
232 | } | 232 | } |
233 | } | 233 | } |
234 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | 234 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { |
235 | cmn_err(CE_ALERT, "Please umount the filesystem, " | 235 | cmn_err(CE_ALERT, "Please umount the filesystem, " |
236 | "and rectify the problem(s)"); | 236 | "and rectify the problem(s)"); |
237 | } | 237 | } |
238 | } | 238 | } |
239 | 239 | ||
240 | 240 | ||
241 | /* | 241 | /* |
242 | * Called when we want to stop a buffer from getting written or read. | 242 | * Called when we want to stop a buffer from getting written or read. |
243 | * We attach the EIO error, muck with its flags, and call biodone | 243 | * We attach the EIO error, muck with its flags, and call biodone |
244 | * so that the proper iodone callbacks get called. | 244 | * so that the proper iodone callbacks get called. |
245 | */ | 245 | */ |
246 | int | 246 | int |
247 | xfs_bioerror( | 247 | xfs_bioerror( |
248 | xfs_buf_t *bp) | 248 | xfs_buf_t *bp) |
249 | { | 249 | { |
250 | 250 | ||
251 | #ifdef XFSERRORDEBUG | 251 | #ifdef XFSERRORDEBUG |
252 | ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); | 252 | ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); |
253 | #endif | 253 | #endif |
254 | 254 | ||
255 | /* | 255 | /* |
256 | * No need to wait until the buffer is unpinned. | 256 | * No need to wait until the buffer is unpinned. |
257 | * We aren't flushing it. | 257 | * We aren't flushing it. |
258 | */ | 258 | */ |
259 | xfs_buftrace("XFS IOERROR", bp); | 259 | xfs_buftrace("XFS IOERROR", bp); |
260 | XFS_BUF_ERROR(bp, EIO); | 260 | XFS_BUF_ERROR(bp, EIO); |
261 | /* | 261 | /* |
262 | * We're calling biodone, so delete B_DONE flag. Either way | 262 | * We're calling biodone, so delete B_DONE flag. Either way |
263 | * we have to call the iodone callback, and calling biodone | 263 | * we have to call the iodone callback, and calling biodone |
264 | * probably is the best way since it takes care of | 264 | * probably is the best way since it takes care of |
265 | * GRIO as well. | 265 | * GRIO as well. |
266 | */ | 266 | */ |
267 | XFS_BUF_UNREAD(bp); | 267 | XFS_BUF_UNREAD(bp); |
268 | XFS_BUF_UNDELAYWRITE(bp); | 268 | XFS_BUF_UNDELAYWRITE(bp); |
269 | XFS_BUF_UNDONE(bp); | 269 | XFS_BUF_UNDONE(bp); |
270 | XFS_BUF_STALE(bp); | 270 | XFS_BUF_STALE(bp); |
271 | 271 | ||
272 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | 272 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); |
273 | xfs_biodone(bp); | 273 | xfs_biodone(bp); |
274 | 274 | ||
275 | return (EIO); | 275 | return (EIO); |
276 | } | 276 | } |
277 | 277 | ||
278 | /* | 278 | /* |
279 | * Same as xfs_bioerror, except that we are releasing the buffer | 279 | * Same as xfs_bioerror, except that we are releasing the buffer |
280 | * here ourselves, and avoiding the biodone call. | 280 | * here ourselves, and avoiding the biodone call. |
281 | * This is meant for userdata errors; metadata bufs come with | 281 | * This is meant for userdata errors; metadata bufs come with |
282 | * iodone functions attached, so that we can track down errors. | 282 | * iodone functions attached, so that we can track down errors. |
283 | */ | 283 | */ |
284 | int | 284 | int |
285 | xfs_bioerror_relse( | 285 | xfs_bioerror_relse( |
286 | xfs_buf_t *bp) | 286 | xfs_buf_t *bp) |
287 | { | 287 | { |
288 | int64_t fl; | 288 | int64_t fl; |
289 | 289 | ||
290 | ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks); | 290 | ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks); |
291 | ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone); | 291 | ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone); |
292 | 292 | ||
293 | xfs_buftrace("XFS IOERRELSE", bp); | 293 | xfs_buftrace("XFS IOERRELSE", bp); |
294 | fl = XFS_BUF_BFLAGS(bp); | 294 | fl = XFS_BUF_BFLAGS(bp); |
295 | /* | 295 | /* |
296 | * No need to wait until the buffer is unpinned. | 296 | * No need to wait until the buffer is unpinned. |
297 | * We aren't flushing it. | 297 | * We aren't flushing it. |
298 | * | 298 | * |
299 | * chunkhold expects B_DONE to be set, whether | 299 | * chunkhold expects B_DONE to be set, whether |
300 | * we actually finish the I/O or not. We don't want to | 300 | * we actually finish the I/O or not. We don't want to |
301 | * change that interface. | 301 | * change that interface. |
302 | */ | 302 | */ |
303 | XFS_BUF_UNREAD(bp); | 303 | XFS_BUF_UNREAD(bp); |
304 | XFS_BUF_UNDELAYWRITE(bp); | 304 | XFS_BUF_UNDELAYWRITE(bp); |
305 | XFS_BUF_DONE(bp); | 305 | XFS_BUF_DONE(bp); |
306 | XFS_BUF_STALE(bp); | 306 | XFS_BUF_STALE(bp); |
307 | XFS_BUF_CLR_IODONE_FUNC(bp); | 307 | XFS_BUF_CLR_IODONE_FUNC(bp); |
308 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | 308 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); |
309 | if (!(fl & XFS_B_ASYNC)) { | 309 | if (!(fl & XFS_B_ASYNC)) { |
310 | /* | 310 | /* |
311 | * Mark b_error and B_ERROR _both_. | 311 | * Mark b_error and B_ERROR _both_. |
312 | * Lot's of chunkcache code assumes that. | 312 | * Lot's of chunkcache code assumes that. |
313 | * There's no reason to mark error for | 313 | * There's no reason to mark error for |
314 | * ASYNC buffers. | 314 | * ASYNC buffers. |
315 | */ | 315 | */ |
316 | XFS_BUF_ERROR(bp, EIO); | 316 | XFS_BUF_ERROR(bp, EIO); |
317 | XFS_BUF_FINISH_IOWAIT(bp); | 317 | XFS_BUF_FINISH_IOWAIT(bp); |
318 | } else { | 318 | } else { |
319 | xfs_buf_relse(bp); | 319 | xfs_buf_relse(bp); |
320 | } | 320 | } |
321 | return (EIO); | 321 | return (EIO); |
322 | } | 322 | } |
323 | 323 | ||
324 | /* | 324 | /* |
325 | * Prints out an ALERT message about I/O error. | 325 | * Prints out an ALERT message about I/O error. |
326 | */ | 326 | */ |
327 | void | 327 | void |
328 | xfs_ioerror_alert( | 328 | xfs_ioerror_alert( |
329 | char *func, | 329 | char *func, |
330 | struct xfs_mount *mp, | 330 | struct xfs_mount *mp, |
331 | xfs_buf_t *bp, | 331 | xfs_buf_t *bp, |
332 | xfs_daddr_t blkno) | 332 | xfs_daddr_t blkno) |
333 | { | 333 | { |
334 | cmn_err(CE_ALERT, | 334 | cmn_err(CE_ALERT, |
335 | "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" | 335 | "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" |
336 | " (\"%s\") error %d buf count %zd", | 336 | " (\"%s\") error %d buf count %zd", |
337 | (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname, | 337 | (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname, |
338 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 338 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), |
339 | (__uint64_t)blkno, func, | 339 | (__uint64_t)blkno, func, |
340 | XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); | 340 | XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); |
341 | } | 341 | } |
342 | 342 | ||
343 | /* | 343 | /* |
344 | * This isn't an absolute requirement, but it is | 344 | * This isn't an absolute requirement, but it is |
345 | * just a good idea to call xfs_read_buf instead of | 345 | * just a good idea to call xfs_read_buf instead of |
346 | * directly doing a read_buf call. For one, we shouldn't | 346 | * directly doing a read_buf call. For one, we shouldn't |
347 | * be doing this disk read if we are in SHUTDOWN state anyway, | 347 | * be doing this disk read if we are in SHUTDOWN state anyway, |
348 | * so this stops that from happening. Secondly, this does all | 348 | * so this stops that from happening. Secondly, this does all |
349 | * the error checking stuff and the brelse if appropriate for | 349 | * the error checking stuff and the brelse if appropriate for |
350 | * the caller, so the code can be a little leaner. | 350 | * the caller, so the code can be a little leaner. |
351 | */ | 351 | */ |
352 | 352 | ||
353 | int | 353 | int |
354 | xfs_read_buf( | 354 | xfs_read_buf( |
355 | struct xfs_mount *mp, | 355 | struct xfs_mount *mp, |
356 | xfs_buftarg_t *target, | 356 | xfs_buftarg_t *target, |
357 | xfs_daddr_t blkno, | 357 | xfs_daddr_t blkno, |
358 | int len, | 358 | int len, |
359 | uint flags, | 359 | uint flags, |
360 | xfs_buf_t **bpp) | 360 | xfs_buf_t **bpp) |
361 | { | 361 | { |
362 | xfs_buf_t *bp; | 362 | xfs_buf_t *bp; |
363 | int error; | 363 | int error; |
364 | 364 | ||
365 | if (flags) | 365 | if (flags) |
366 | bp = xfs_buf_read_flags(target, blkno, len, flags); | 366 | bp = xfs_buf_read_flags(target, blkno, len, flags); |
367 | else | 367 | else |
368 | bp = xfs_buf_read(target, blkno, len, flags); | 368 | bp = xfs_buf_read(target, blkno, len, flags); |
369 | if (!bp) | 369 | if (!bp) |
370 | return XFS_ERROR(EIO); | 370 | return XFS_ERROR(EIO); |
371 | error = XFS_BUF_GETERROR(bp); | 371 | error = XFS_BUF_GETERROR(bp); |
372 | if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) { | 372 | if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) { |
373 | *bpp = bp; | 373 | *bpp = bp; |
374 | } else { | 374 | } else { |
375 | *bpp = NULL; | 375 | *bpp = NULL; |
376 | if (error) { | 376 | if (error) { |
377 | xfs_ioerror_alert("xfs_read_buf", mp, bp, XFS_BUF_ADDR(bp)); | 377 | xfs_ioerror_alert("xfs_read_buf", mp, bp, XFS_BUF_ADDR(bp)); |
378 | } else { | 378 | } else { |
379 | error = XFS_ERROR(EIO); | 379 | error = XFS_ERROR(EIO); |
380 | } | 380 | } |
381 | if (bp) { | 381 | if (bp) { |
382 | XFS_BUF_UNDONE(bp); | 382 | XFS_BUF_UNDONE(bp); |
383 | XFS_BUF_UNDELAYWRITE(bp); | 383 | XFS_BUF_UNDELAYWRITE(bp); |
384 | XFS_BUF_STALE(bp); | 384 | XFS_BUF_STALE(bp); |
385 | /* | 385 | /* |
386 | * brelse clears B_ERROR and b_error | 386 | * brelse clears B_ERROR and b_error |
387 | */ | 387 | */ |
388 | xfs_buf_relse(bp); | 388 | xfs_buf_relse(bp); |
389 | } | 389 | } |
390 | } | 390 | } |
391 | return (error); | 391 | return (error); |
392 | } | 392 | } |
393 | 393 | ||
394 | /* | 394 | /* |
395 | * Wrapper around bwrite() so that we can trap | 395 | * Wrapper around bwrite() so that we can trap |
396 | * write errors, and act accordingly. | 396 | * write errors, and act accordingly. |
397 | */ | 397 | */ |
398 | int | 398 | int |
399 | xfs_bwrite( | 399 | xfs_bwrite( |
400 | struct xfs_mount *mp, | 400 | struct xfs_mount *mp, |
401 | struct xfs_buf *bp) | 401 | struct xfs_buf *bp) |
402 | { | 402 | { |
403 | int error; | 403 | int error; |
404 | 404 | ||
405 | /* | 405 | /* |
406 | * XXXsup how does this work for quotas. | 406 | * XXXsup how does this work for quotas. |
407 | */ | 407 | */ |
408 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); | 408 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); |
409 | XFS_BUF_SET_FSPRIVATE3(bp, mp); | 409 | bp->b_mount = mp; |
410 | XFS_BUF_WRITE(bp); | 410 | XFS_BUF_WRITE(bp); |
411 | 411 | ||
412 | if ((error = XFS_bwrite(bp))) { | 412 | if ((error = XFS_bwrite(bp))) { |
413 | ASSERT(mp); | 413 | ASSERT(mp); |
414 | /* | 414 | /* |
415 | * Cannot put a buftrace here since if the buffer is not | 415 | * Cannot put a buftrace here since if the buffer is not |
416 | * B_HOLD then we will brelse() the buffer before returning | 416 | * B_HOLD then we will brelse() the buffer before returning |
417 | * from bwrite and we could be tracing a buffer that has | 417 | * from bwrite and we could be tracing a buffer that has |
418 | * been reused. | 418 | * been reused. |
419 | */ | 419 | */ |
420 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 420 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); |
421 | } | 421 | } |
422 | return (error); | 422 | return (error); |
423 | } | 423 | } |
424 | 424 |